In [31]:
import mlflow
import mlflow.sklearn as mlflow_sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

## Create New Experiment

In [None]:
mlflow.create_experiment('mnist_models')

In [None]:
mlflow.set_experiment('mnist_models')

## Load Dataset

In [84]:
from sklearn.datasets import load_digits

mnist = load_digits()

train_x, test_x, train_y, test_y = train_test_split(mnist.data, mnist.target, test_size=0.2, random_state=10)

## Helper Function

In [69]:
def log_run(run_name, model, val_x, val_y):
    
    ## Start the run with the given name
    mlflow.start_run(run_name=run_name)
    
    ## Get prediction on validation dataset
    val_pred = model.predict(val_x)
    
    ## log all the hyperparameters
    mlflow.log_params(model.get_params())
        
    ## Calculate the required metrics
    precision, recall, fscore, support = precision_recall_fscore_support(val_y, val_pred, average='micro')
    
    ## log all the required paramters
    mlflow.log_metrics(
        {'precision': precision, 'recall': recall, 'fscore': fscore}
    )
    
    ## This logs sklearn based models by converting them to pickle
    mlflow_sklearn.log_model(model, run_name)
    
    mlflow.end_run()

## Models

In [76]:
rf_model = RandomForestClassifier()
rf_model.fit(train_x, train_y)

RandomForestClassifier()

In [77]:
log_run('random_forest_default_param', rf_model, test_x, test_y)

In [85]:
rf_model_2 = RandomForestClassifier(n_estimators=10)
rf_model_2.fit(train_x, train_y)

RandomForestClassifier(n_estimators=10)

In [86]:
log_run('random_forest_e_est_10', rf_model_2, test_x, test_y)

In [87]:
lg_model = LogisticRegression()
lg_model.fit(train_x, train_y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

In [88]:
log_run('logistic_default_param', lg_model, test_x, test_y)