## They can be recorded
    - to local files (by default to mlruns directory)
        + Launch UI: mlflow ui
    - to SQLAlchemy compatible database
        + Setup MLflow: mlflow.set_tracking_uri('sqlite:///mlflow.db')
        + Launch UI: mlflow ui --backend-store-uri sqlite:///mlflow.db
    - To show the current tracking uri mlflow.get_tracking_uri()
    
## Manual logging > https://www.mlflow.org/docs/latest/tracking.html#logging-functions
    - Log the fitted model: mlflow.sklearn.log_model(rf, 'random-forest-model')
    - Log the model parameters:
        + One parameter at a time: mlflow.log_param('num_trees', n_estimators)
        + A dict of parameters: mlflow.log_parms({'num_trees', n_estimators, 'alpha', 0.04})
    - Log the evaluation metrics: mlflow.log_metric('mse', mse)
    - Log other artifacts: mlflow.log_artifact('predictions.csv')
    
    
## Automatic logging with MLflow autolog
    - With MLflow's autologging capabilities, a single line of code automatically logs the resulting model, the parameters used to create the model, and a model score > https://www.mlflow.org/docs/latest/tracking.html#automatic-logging
    - Call mlflow.<framework>.autolog() API before running training code to log model-specific metrics, parameters, and model artifacts. Supports many ML frameworks (sklearn, tensorflow, etc).

In [24]:
import numpy as np
import pandas as pd
import mlflow
import sys
from pathlib import Path
import joblib

from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor

In [2]:
mlflow.set_experiment('my_experiment')

In [36]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

def get_dataset() -> pd.DataFrame:
    db = load_diabetes()
    X, y = db.data, db.target
    return train_test_split(X, y, random_state=42)

X_train, X_test, y_train, y_test = get_dataset()
X_train.shape, X_test.shape

((331, 10), (111, 10))

In [47]:
np.savetxt('diabetes_X_test.csv', X_test, delimiter=',')

In [19]:
PROJECT_DIR = Path('.').resolve().parents[0].absolute()
sys.path.append(str(PROJECT_DIR))
MODELS_DIR = PROJECT_DIR / 'models'
MODELS_DIR

WindowsPath('C:/Users/Admin/Artificial_Intelligence/Data Science in Production/ais-dsp-tien/models')

In [28]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    mlflow.log_metrics({'rmse': rmse, 'mae': mae, 'r2': r2})
    print(f'RMSE = {rmse:.2f}, MAE = {mae:.2f}, R2 = {r2:.2f}')
    return rmse, mae, r2

def train_model(X_train, X_test, y_train, y_test: pd.DataFrame, model_class, **model_kwargs) -> int:
    model = model_class(**model_kwargs)
    mlflow.log_params(model_kwargs)
    model.fit(X_train, y_train)
    mlflow.sklearn.log_model(model, 'elastic_net')
    evaluate_model(model, X_test, y_test)
    
    joblib.dump(model, MODELS_DIR / 'diabetes_model.joblib')

In [29]:
with mlflow.start_run():
    model_kwargs = {'alpha': 0.005, 'l1_ratio': 0.8}
    train_model(X_train, X_test, y_train, y_test, ElasticNet, **model_kwargs)

RMSE = 53.30, MAE = 42.59, R2 = 0.49


In [5]:
mlflow.set_tracking_uri('http://127.0.0.1:5000')

In [6]:
with mlflow.start_run():
    model_kwargs = {'alpha': 0.01, 'l1_ratio': 0.75}
    train_model(X_train, X_test, y_train, y_test, ElasticNet, **model_kwargs)

RMSE = 55.11, MAE = 45.22, R2 = 0.45


In [7]:
with mlflow.start_run():
    model_kwargs = {'alpha': 0.02, 'l1_ratio': 0.7}
    train_model(X_train, X_test, y_train, y_test, ElasticNet, **model_kwargs)

RMSE = 59.01, MAE = 49.68, R2 = 0.37


In [8]:
with mlflow.start_run():
    model_kwargs = {'alpha': 0.1, 'l1_ratio': 0.01}
    train_model(X_train, X_test, y_train, y_test, ElasticNet, **model_kwargs)

RMSE = 72.43, MAE = 63.33, R2 = 0.05


In [10]:
mlflow.search_runs(filter_string="metric.rmse < 60")

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.r2,metrics.rmse,metrics.mae,params.alpha,params.l1_ratio,tags.mlflow.log-model.history,tags.mlflow.source.name,tags.mlflow.user,tags.mlflow.source.type
0,1a2c80e0fce746c09d6ec69f99ea3c8b,1,FINISHED,./mlruns/1/1a2c80e0fce746c09d6ec69f99ea3c8b/ar...,2021-06-11 20:56:33.109000+00:00,2021-06-11 20:56:33.256000+00:00,0.486288,53.297923,42.58513,0.005,0.8,"[{""run_id"": ""1a2c80e0fce746c09d6ec69f99ea3c8b""...",C:\Users\Admin\anaconda3\envs\dsp\lib\site-pac...,Admin,LOCAL
1,a507de3be4c64f61bbc56cf184d5c4dd,1,FINISHED,./mlruns/1/a507de3be4c64f61bbc56cf184d5c4dd/ar...,2021-06-11 20:56:32.705000+00:00,2021-06-11 20:56:32.864000+00:00,0.370336,59.007193,49.684719,0.02,0.7,"[{""run_id"": ""a507de3be4c64f61bbc56cf184d5c4dd""...",C:\Users\Admin\anaconda3\envs\dsp\lib\site-pac...,Admin,LOCAL
2,6859353c6e884d49a61556c1e2d0aced,1,FINISHED,./mlruns/1/6859353c6e884d49a61556c1e2d0aced/ar...,2021-06-11 20:56:32.400000+00:00,2021-06-11 20:56:32.665000+00:00,0.450811,55.107592,45.220709,0.01,0.75,"[{""run_id"": ""6859353c6e884d49a61556c1e2d0aced""...",C:\Users\Admin\anaconda3\envs\dsp\lib\site-pac...,Admin,LOCAL
3,7e1f633921d94f628aaec2743e14b7dc,1,FINISHED,./mlruns/1/7e1f633921d94f628aaec2743e14b7dc/ar...,2021-06-11 20:50:26.622000+00:00,2021-06-11 20:50:26.842000+00:00,0.486288,53.297923,42.58513,0.005,0.8,"[{""run_id"": ""7e1f633921d94f628aaec2743e14b7dc""...",C:\Users\Admin\anaconda3\envs\dsp\lib\site-pac...,Admin,LOCAL
4,cd958e552a514550a2ed8088d3333983,1,FINISHED,./mlruns/1/cd958e552a514550a2ed8088d3333983/ar...,2021-06-11 20:50:17.742000+00:00,2021-06-11 20:50:17.956000+00:00,0.370336,59.007193,49.684719,0.02,0.7,"[{""run_id"": ""cd958e552a514550a2ed8088d3333983""...",C:\Users\Admin\anaconda3\envs\dsp\lib\site-pac...,Admin,LOCAL
5,66c49c3e54eb4eb89eebed5a165e3d76,1,FINISHED,./mlruns/1/66c49c3e54eb4eb89eebed5a165e3d76/ar...,2021-06-11 20:49:28.308000+00:00,2021-06-11 20:49:28.541000+00:00,0.450811,55.107592,45.220709,0.01,0.75,"[{""run_id"": ""66c49c3e54eb4eb89eebed5a165e3d76""...",C:\Users\Admin\anaconda3\envs\dsp\lib\site-pac...,Admin,LOCAL
