# Train and Deploy SKLearn models via MLRun & Nuclio

In [1]:
import os
import numpy as np
from mlrun import new_model_server

## Acquire dataset

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def load_dataset(test_size=0.2):
    iris = load_iris()
    y = iris['target']
    X = iris['data']
    return train_test_split(X, y, test_size=test_size)
    
X_train, X_test, y_train, y_test = load_dataset(0.2)

## Define pipeline

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ('normalizer', StandardScaler()), #Step1 - normalize data
    ('clf', LogisticRegression()) #step2 - classifier
])
pipeline.steps

[('normalizer', StandardScaler(copy=True, with_mean=True, with_std=True)),
 ('clf',
  LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
            intercept_scaling=1, max_iter=100, multi_class='warn',
            n_jobs=None, penalty='l2', random_state=None, solver='warn',
            tol=0.0001, verbose=0, warm_start=False))]

## Train with hyperparams

In [4]:
from sklearn.model_selection import GridSearchCV

cv_grid = GridSearchCV(pipeline, param_grid = {
    'normalizer__with_mean' : [True, False],
    'clf__C' : np.linspace(0.1,1.2,12)
})

cv_grid.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=Pipeline(memory=None,
     steps=[('normalizer', StandardScaler(copy=True, with_mean=True, with_std=True)), ('clf', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))]),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'normalizer__with_mean': [True, False], 'clf__C': array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. , 1.1, 1.2])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [5]:
from sklearn.metrics import accuracy_score

y_predict = cv_grid.predict(X_test)
accuracy = accuracy_score(y_test,y_predict)
print('Accuracy of the best classifier after CV is %.3f%%\n' % (accuracy*100))
print(f'Best classification pipeline:\n{cv_grid.best_estimator_}')

Accuracy of the best classifier after CV is 83.333%

Best classification pipeline:
Pipeline(memory=None,
     steps=[('normalizer', StandardScaler(copy=True, with_mean=False, with_std=True)), ('clf', LogisticRegression(C=0.4, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))])


## Save best model

In [6]:
import joblib

MODEL_PATH = '/User/models/iris_v1/model.sklearn'
MODEL_DIR = os.path.dirname(MODEL_PATH)

# Verify directory exists
os.makedirs(MODEL_DIR, exist_ok=True)

# Save model
joblib.dump(cv_grid.best_estimator_, MODEL_PATH)

['/User/models/iris_v1/model.sklearn']

## Deploy model

In [7]:
# Declare model server
fn = new_model_server('sk-server', 
                      models={'iris_v1': MODEL_PATH}, 
                      model_class='SKModel',
                      image='zilbermanor/nuclio-serving-sk:latest',
                      protocol='general')
fn.with_v3io('User','~/')  # Add v3io mount
fn.spec.base_spec['spec']['handler'] = 'main:handler'
print(f'Function definition:\n{fn.to_dict()}\n')

# Deploy
addr = fn.deploy(project='nuclio-serving')

Function definition:
{'kind': 'remote', 'metadata': {'name': 'sk-server'}, 'spec': {'command': '', 'args': [], 'image': '', 'mode': '', 'description': '', 'volumes': [{'volume': {'flexVolume': {'driver': 'v3io/fuse', 'options': {'accessKey': '460ee949-eaaa-468a-ae26-5fdb9156996e', 'container': 'users', 'subPath': '/admin'}}, 'name': 'fs'}, 'volumeMount': {'name': 'fs', 'mountPath': 'User'}}], 'env': {'SERVING_MODEL_iris_v1': '/User/models/iris_v1/model.sklearn', 'TRANSPORT_PROTOCOL': 'general', 'ENABLE_EXPLAINER': 'False', 'MODEL_CLASS': 'SKModel', 'V3IO_USERNAME': 'admin', 'V3IO_ACCESS_KEY': '460ee949-eaaa-468a-ae26-5fdb9156996e', 'V3IO_API': 'v3io-webapi.default-tenant.svc:8081'}, 'config': {'spec.triggers.http': {'kind': 'http', 'maxWorkers': 8, 'attributes': {'ingresses': {}}, 'annotations': {}}}, 'build_commands': [], 'base_spec': {'apiVersion': 'nuclio.io/v1', 'kind': 'Function', 'metadata': {'name': 'notebook', 'labels': {}, 'annotations': {}}, 'spec': {'runtime': 'python:3.6', 

## Test model

In [8]:
import requests

# Get sample event 
samples = X_train[:2]
print(f'Samples:\n{samples}\n')

# Seldon protocol event
event_seldon = {"data": {"ndarray": samples.tolist()}}
event_general = {"data": samples.tolist()}
even_tensorflow = {"instances": samples.tolist()}
event = event_general

csel = str(event).replace("\'", "\"")
print(f'Sent event:\n{csel}\n')

resp = requests.put(addr + '/predict/iris_v1', data=csel)
print(f'Prediction (Response):\n{resp.content}')

Samples:
[[6.4 2.8 5.6 2.2]
 [5.2 3.4 1.4 0.2]]

Sent event:
{"data": [[6.4, 2.8, 5.6, 2.2], [5.2, 3.4, 1.4, 0.2]]}

Prediction (Response):
b'{"predictions": [2, 0]}'
