In [1]:
import os
import pickle

from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import mlflow
import git

# Data

In [2]:
data_file_path = '../data/iris.pkl'

In [3]:
if os.path.isfile(data_file_path):
    with open(data_file_path, 'rb') as p:
        iris = pickle.load(p)
else:
    iris = datasets.load_iris()
    with open('../data/iris.pkl', 'wb') as p:
        pickle.dump(iris, p, pickle.HIGHEST_PROTOCOL)

In [4]:
X, y = iris.data, iris.target

# MLflow tracking

In [5]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Iris classifier")
mlflow.sklearn.autolog()

repo = git.Repo(search_parent_directories=True)
sha_commit = repo.head.object.hexsha



# Models

## Support verctor machine

In [6]:
mlflow.start_run()
mlflow.set_tag('mlflow.source.git.commit', sha_commit)
mlflow.set_tag('method', 'SVM')

In [7]:
clf_svm = svm.SVC(gamma='scale')

In [8]:
clf_svm.fit(X, y)



In [9]:
clf_svm.score(X, y)

0.9733333333333334

In [10]:
mlflow.end_run()

## Logistic regression

In [11]:
mlflow.start_run()
mlflow.set_tag('mlflow.source.git.commit', sha_commit)
mlflow.set_tag('method', 'Logistic regression')

In [12]:
scaler = StandardScaler()

In [13]:
logistic = LogisticRegression()

In [14]:
clf_logit = Pipeline(steps=[("scaler", scaler), ("logistic", logistic)])

In [15]:
clf_logit.fit(X, y)

In [16]:
clf_logit.score(X, y)

0.9733333333333334

In [17]:
mlflow.end_run()

## Random forest

In [18]:
mlflow.start_run()
mlflow.set_tag('mlflow.source.git.commit', sha_commit)
mlflow.set_tag('method', 'Random forest')

In [19]:
clf_rf = RandomForestClassifier(max_depth=2)

In [20]:
clf_rf.fit(X, y)

In [21]:
clf_rf.score(X, y)

0.96

In [22]:
mlflow.end_run()