In [7]:
!pip install -U scikit-learn
!pip install verta
!pip install Minio
!pip install pandas
!pip install alibi
!pip install dill



In [8]:
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import joblib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from datetime import datetime

from minio import Minio
from minio.error import ResponseError
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np

from verta import Client
import dill
import verta.integrations.sklearn

from alibi.explainers import AnchorTabular

# Common Functions - Will be supplied via package
def get_s3_server():
    minioClient = Minio('minio-ml-workshop:9000',
                    access_key='minio',
                    secret_key='minio123',
                    secure=False)

    return minioClient

def record_metrics(classifier, expereiment_name, accuracy_score, hyperparameters):
    client = Client("http://chart-1603715522-webapp:3000")
    proj = client.set_project("HDFC DEmo PRoject")
    client.set_experiment("Iris Classifier")
    run = client.set_experiment_run(expereiment_name)
    
    for key, value in hyperparameters.items():
        run.log_hyperparameters({key : value})
    
    run.log_metric('accuracy', accuracy_score)
    run.log_tags([classifier])

def visualise_iris_data(iris):
    ''' 0 == setosa 
        1 == versicolor
        2 == viginica '''
    #     print(iris)
    
    df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
    print(df.describe())
    df["target"] = iris.target
    print(df.head(500))

In [9]:
# Get Experiment ID
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d%Y%H%M%S%f")
experiment_id = 'IrisScikit' + timestampStr


In [10]:
def train_and_save_model():
#     model = LogisticRegression()
    model = RandomForestClassifier(n_estimators=10)
    
    print('Training model...')
    model.fit(X1, y1)
    print('Model trained!')
    
    accuracy = model.score(X2, y2)
    print("Accuracy: %.2f%%" % (accuracy*100.0))   
#     record_metrics('LogisticREgression', experiment_id, accuracy, {'regularisation': 0.5})
    # record_metrics('RandomForrests', experiment_id, accuracy, {'n_estimators': 10})                                   
    

    filename_p = 'IrisClassifier.sav'
    print('Saving model in %s' % filename_p)
    joblib.dump(model, filename_p)
    print('Model saved!')
    return model

In [11]:
def explain_model(feature_names, model, X_train, X_test_record):
    predict_fn = lambda x: model.predict_proba(x)
    explainer = AnchorTabular(predict_fn, feature_names)
    explainer.fit(X_train, disc_perc=[25, 50, 75])
    explanation = explainer.explain(X_test_record, threshold=0.80)
    print('Anchor: %s' % explanation['anchor'])
    print('Precision: %.2f' % explanation['precision'])
    print('Coverage: %.2f' % explanation['coverage'])
    return explainer

In [14]:
# if __name__ == "__main__":
print('Loading iris data set...')
iris = datasets.load_iris()
feature_names = iris.feature_names
print(feature_names)
X, y = iris.data, iris.target
X1, X2, y1, y2 = train_test_split(X, y, random_state=0, train_size=0.6)
print(X1)

print('Dataset loaded!')
model = train_and_save_model()
explainer = explain_model(feature_names, model, X1, X2[0])
with open("IrisClassifierExapliner.dill", "wb") as x_f:
    dill.dump(explainer, x_f)

Loading iris data set...
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
[[6.  3.4 4.5 1.6]
 [4.8 3.1 1.6 0.2]
 [5.8 2.7 5.1 1.9]
 [5.6 2.7 4.2 1.3]
 [5.6 2.9 3.6 1.3]
 [5.5 2.5 4.  1.3]
 [6.1 3.  4.6 1.4]
 [7.2 3.2 6.  1.8]
 [5.3 3.7 1.5 0.2]
 [4.3 3.  1.1 0.1]
 [6.4 2.7 5.3 1.9]
 [5.7 3.  4.2 1.2]
 [5.4 3.4 1.7 0.2]
 [5.7 4.4 1.5 0.4]
 [6.9 3.1 4.9 1.5]
 [4.6 3.1 1.5 0.2]
 [5.9 3.  5.1 1.8]
 [5.1 2.5 3.  1.1]
 [4.6 3.4 1.4 0.3]
 [6.2 2.2 4.5 1.5]
 [7.2 3.6 6.1 2.5]
 [5.7 2.9 4.2 1.3]
 [4.8 3.  1.4 0.1]
 [7.1 3.  5.9 2.1]
 [6.9 3.2 5.7 2.3]
 [6.5 3.  5.8 2.2]
 [6.4 2.8 5.6 2.1]
 [5.1 3.8 1.6 0.2]
 [4.8 3.4 1.6 0.2]
 [6.5 3.2 5.1 2. ]
 [6.7 3.3 5.7 2.1]
 [4.5 2.3 1.3 0.3]
 [6.2 3.4 5.4 2.3]
 [4.9 3.  1.4 0.2]
 [5.7 2.5 5.  2. ]
 [6.9 3.1 5.4 2.1]
 [4.4 3.2 1.3 0.2]
 [5.  3.6 1.4 0.2]
 [7.2 3.  5.8 1.6]
 [5.1 3.5 1.4 0.3]
 [4.4 3.  1.3 0.2]
 [5.4 3.9 1.7 0.4]
 [5.5 2.3 4.  1.3]
 [6.8 3.2 5.9 2.3]
 [7.6 3.  6.6 2.1]
 [5.1 3.5 1.4 0.2]
 [4.9 3.1 1.5 0.2]

AttributeError: 'numpy.ndarray' object has no attribute 'info'

In [None]:
minioClient = get_s3_server()
minioClient.fput_object(bucket_name='models', object_name=experiment_id  +'/IrisClassifier.sav' , file_path='./IrisClassifier.sav')