In [64]:
import mlflow
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier


SEED = 42
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("cross-validation-average")

<Experiment: artifact_location='./mlruns/2', creation_time=1667382175272, experiment_id='2', last_update_time=1667382175272, lifecycle_stage='active', name='cross-validation-average', tags={}>

## Data preparation

In [15]:
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target

X.shape, y.shape

((569, 30), (569,))

## Model training

In [55]:
skf = StratifiedKFold(n_splits=5, random_state=SEED, shuffle=True)

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    with mlflow.start_run():
        mlflow.sklearn.autolog(exclusive=False)
        model = RandomForestClassifier(random_state=SEED)
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        mlflow.log_dict({"y_test": [int(x) for x in y_test],
                         "y_pred": [int(x) for x in y_pred]
                        }, "ytest-ypred.json")
        
        test_acc = accuracy_score(y_test, y_pred)
        mlflow.log_metric("test_accuracy", test_acc)
        print("test_accuracy:", test_acc)

        test_precision, test_recall, test_f1, _ = precision_recall_fscore_support(
            y_test, 
            y_pred, 
            average='binary'
        )
        mlflow.log_metric("test_precision", test_precision)
        mlflow.log_metric("test_recall", test_recall)
        mlflow.log_metric("test_f1_score", test_f1)
        
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
        mlflow.log_metric("tn", tn)
        mlflow.log_metric("fp", fp)
        mlflow.log_metric("fn", fn)
        mlflow.log_metric("tp", tp)
        
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred, normalize="true").ravel()
        mlflow.log_metric("tn_normalized", tn)
        mlflow.log_metric("fp_normalized", fp)
        mlflow.log_metric("fn_normalized", fn)
        mlflow.log_metric("tp_normalized", tp)
        
        mlflow.sklearn.autolog(disable=True)

test_accuracy: 0.9649122807017544
test_accuracy: 0.9385964912280702
test_accuracy: 0.956140350877193
test_accuracy: 0.9473684210526315
test_accuracy: 0.9734513274336283


## Results

In [56]:
runs = mlflow.search_runs(experiment_ids=["2"])
runs.columns

Index(['run_id', 'experiment_id', 'status', 'artifact_uri', 'start_time',
       'end_time', 'metrics.test_recall', 'metrics.tn', 'metrics.tp',
       'metrics.training_precision_score', 'metrics.training_roc_auc_score',
       'metrics.test_f1_score', 'metrics.tn_normalized',
       'metrics.training_accuracy_score', 'metrics.test_accuracy',
       'metrics.test_precision', 'metrics.training_f1_score',
       'metrics.training_log_loss', 'metrics.fn_normalized',
       'metrics.tp_normalized', 'metrics.training_score', 'metrics.fp',
       'metrics.fp_normalized', 'metrics.fn', 'metrics.training_recall_score',
       'params.random_state', 'params.verbose', 'params.bootstrap',
       'params.criterion', 'params.ccp_alpha', 'params.min_samples_split',
       'params.warm_start', 'params.class_weight', 'params.oob_score',
       'params.n_estimators', 'params.max_depth', 'params.max_features',
       'params.max_samples', 'params.min_weight_fraction_leaf',
       'params.max_leaf_nodes'

In [57]:
runs[['metrics.tn', 'metrics.fp', 'metrics.fn', 'metrics.tp' ]]

Unnamed: 0,metrics.tn,metrics.fp,metrics.fn,metrics.tp
0,40.0,2.0,1.0,70.0
1,42.0,0.0,6.0,66.0
2,38.0,4.0,1.0,71.0
3,37.0,6.0,1.0,70.0
4,42.0,1.0,3.0,68.0


In [58]:
runs[['metrics.tn', 'metrics.fp', 'metrics.fn', 'metrics.tp' ]].mean()

metrics.tn    39.8
metrics.fp     2.6
metrics.fn     2.4
metrics.tp    69.0
dtype: float64

In [67]:
np.array([[39.8, 2.6], [2.4, 69.0]])

array([[39.8,  2.6],
       [ 2.4, 69. ]])

In [59]:
runs[['metrics.tn_normalized', 'metrics.fp_normalized', 'metrics.fn_normalized', 'metrics.tp_normalized' ]]

Unnamed: 0,metrics.tn_normalized,metrics.fp_normalized,metrics.fn_normalized,metrics.tp_normalized
0,0.952381,0.047619,0.014085,0.985915
1,1.0,0.0,0.083333,0.916667
2,0.904762,0.095238,0.013889,0.986111
3,0.860465,0.139535,0.014085,0.985915
4,0.976744,0.023256,0.042254,0.957746


In [60]:
runs[['metrics.tn_normalized', 'metrics.fp_normalized', 'metrics.fn_normalized', 'metrics.tp_normalized' ]].mean()

metrics.tn_normalized    0.938870
metrics.fp_normalized    0.061130
metrics.fn_normalized    0.033529
metrics.tp_normalized    0.966471
dtype: float64

In [65]:
np.array([[0.938870, 0.061130], [0.033529, 0.966471]])

array([[0.93887 , 0.06113 ],
       [0.033529, 0.966471]])