In [None]:
import itertools
import matplotlib.pyplot as plt
import mlflow
from mlflow import log_param, log_metric, log_artifact
from mlflow.sklearn import log_model, save_model
from mlflow.tracking import MlflowClient
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, f1_score, make_scorer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

# Settings

In [None]:
# Set tracking URI 

EXPERIMENT_NAME = 'Baseline_LogReg'
MLFLOW_TRACKING_URI='http://mlpanel:5000'

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.tracking.get_tracking_uri()
client= MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

if client.get_experiment_by_name(EXPERIMENT_NAME):
    pass
else: 
    client.create_experiment(EXPERIMENT_NAME)

experiment_id = client.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

# Load dataset

In [None]:
dataset = pd.read_csv('../data/iris.csv')

print(dataset.shape)
dataset.head()

In [None]:
# feature names
feature_names = dataset.columns.tolist()[:4]
feature_names

In [None]:
# iris species
species = dataset['species'].unique().tolist()
species

# Experiment 1 - Baseline_LogReg

## Features engineering

In [None]:
dataset.loc[dataset.species=='setosa', 'species'] = 0
dataset.loc[dataset.species=='versicolor', 'species'] = 1
dataset.loc[dataset.species=='virginica', 'species'] = 2

In [None]:
dataset.head()

## Split in train/test

In [None]:
test_size=0.2

train_dataset, test_dataset = train_test_split(dataset, test_size=test_size, random_state=42)
train_dataset.shape, test_dataset.shape

## Train

In [None]:
# Get X and Y
y = train_dataset.loc[:, 'species'].astype(int)
X = train_dataset.drop('species', 1).astype(float)
print(X.shape)

X.head()

In [None]:
# Fit Logistic Regression Classifier estimator

clf = LogisticRegression(C=0.001, solver='lbfgs', multi_class='multinomial', max_iter=100, random_state=0)
clf.fit(X, y)

try:
    save_modelsave_model(clf, path=f'{EXPERIMENT_NAME}_model')
except:
    print(f'Model {EXPERIMENT_NAME}_model ALREADY EXIST')

## Evaluate model

In [None]:
y_test = test_dataset.loc[:, 'species'].astype(int)
X_test = test_dataset.drop('species', 1).astype(float)

scores = clf.predict(X_test)

f1 = f1_score(y_true=y_test, y_pred=scores, average='macro')
# f1 score value
f1

In [None]:
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
#     plt.show()
    
    return plt

In [None]:
cm = confusion_matrix(scores, y_test)

plt = plot_confusion_matrix(cm, species, normalize=False)
plt.savefig(f'{EXPERIMENT_NAME}_confusion_matrix.svg')

# Log metrics and model to MLPanel

In [None]:
with mlflow.start_run(experiment_id=experiment_id) as run:

    print(run)
    print(run.info)
    print(run.info.run_uuid)

    log_param(key='C', value=0.001)
    log_param(key='multi_class', value='multinomial')
    log_param(key='max_iter', value=100)

    log_metric(key='f1_score', value=f1)
    log_artifact(local_path=f'{EXPERIMENT_NAME}_confusion_matrix.svg')
    log_model(clf, artifact_path=f'{EXPERIMENT_NAME}_model')