In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import mlflow
from mlflow.tracking import MlflowClient
import pickle

In [2]:
# mlflow configuration
remote_server_uri = "http://52.67.153.88:5000/" # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)
mlflow.set_experiment("cd4ml")
tags={'Project': 'aws-cd4ml',
       'Env': 'experimentation'}
mlflow.set_tags(tags)

mlflow.sklearn.autolog() # enabling autolog

In [3]:
# data load
inputFolder = './data/'
df = pd.read_csv(inputFolder + 'iris.csv')
print(df.head())

X = df.loc[:, df.columns != 'variety']
y = df['variety']

# data split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.25)


# modeling
model = RandomForestClassifier()
model.fit(X_train, y_train)
# print score
print("Model score",model.score(X_train,y_train))

   sepal.length  sepal.width  petal.length  petal.width variety
0           5.1          3.5           1.4          0.2  Setosa
1           4.9          3.0           1.4          0.2  Setosa
2           4.7          3.2           1.3          0.2  Setosa
3           4.6          3.1           1.5          0.2  Setosa
4           5.0          3.6           1.4          0.2  Setosa


2021/04/04 01:08:01 ERROR mlflow.utils.rest_utils: API request to http://52.67.153.88:5000/api/2.0/mlflow/runs/log-model failed with code 500 != 200, retrying up to 2 more times. API response body: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<title>500 Internal Server Error</title>
<h1>Internal Server Error</h1>
<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>

2021/04/04 01:08:04 ERROR mlflow.utils.rest_utils: API request to http://52.67.153.88:5000/api/2.0/mlflow/runs/log-model failed with code 500 != 200, retrying up to 1 more times. API response body: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<title>500 Internal Server Error</title>
<h1>Internal Server Error</h1>
<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>

2021/04/04 01:08:09 ER

Model score 1.0


In [4]:
# predict X_test data
predictions = model.predict(X_test)

# scoring
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

# saving model (local)
#pickle.dump(model, open('./output/randomforest_model.pkl', 'wb'))

0.9736842105263158
[[13  0  0]
 [ 0 14  0]
 [ 0  1 10]]
              precision    recall  f1-score   support

      Setosa       1.00      1.00      1.00        13
  Versicolor       0.93      1.00      0.97        14
   Virginica       1.00      0.91      0.95        11

    accuracy                           0.97        38
   macro avg       0.98      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38



In [5]:
# mlflow magic!
# traking parameters

mv = mlflow.register_model(remote_server_uri, "RFClassifierModel")
print("Name: {}".format(mv.name))
print("Version: {}".format(mv.version))

Registered model 'RFClassifierModel' already exists. Creating a new version of this model...
2021/04/04 01:08:12 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: RFClassifierModel, version 14


Name: RFClassifierModel
Version: 14


Created version '14' of model 'RFClassifierModel'.


In [6]:
from pprint import pprint
client = MlflowClient()
for rm in client.list_registered_models():
    pprint(dict(rm),indent=4)

{   'creation_timestamp': 1617499602822,
    'description': '',
    'last_updated_timestamp': 1617509292927,
    'latest_versions': [   <ModelVersion: creation_timestamp=1617509292927, current_stage='None', description='', last_updated_timestamp=1617509292927, name='RFClassifierModel', run_id='', run_link='', source='http://52.67.153.88:5000/', status='READY', status_message='', tags={}, user_id='', version='14'>,
                           <ModelVersion: creation_timestamp=1617503243811, current_stage='Archived', description='', last_updated_timestamp=1617507539574, name='RFClassifierModel', run_id='', run_link='', source='http://52.67.153.88:5000/', status='READY', status_message='', tags={}, user_id='', version='7'>,
                           <ModelVersion: creation_timestamp=1617507365765, current_stage='Production', description='', last_updated_timestamp=1617507539574, name='RFClassifierModel', run_id='ca27eeefbfeb41b2b6301cf7bf6fccdc', run_link='', source='s3://cd4ml-bucket-01/m

In [7]:
# get model in production information
client = MlflowClient()
prod_model = client.get_latest_versions(name = "RFClassifierModel",
                           stages = ["Production"])

print("description: "+ dict(prod_model[0])["description"],
     "name: " + dict(prod_model[0])["name"],
     "run_id: " + dict(prod_model[0])["run_id"],
     "version: " + dict(prod_model[0])["version"],sep="\n")

description: 
name: RFClassifierModel
run_id: ca27eeefbfeb41b2b6301cf7bf6fccdc
version: 13


In [8]:
loaded_model = mlflow.sklearn.load_model(dict(prod_model[0])["source"])

In [9]:
mlflow.end_run()