# Train and Deploy XGBoost models via MLRun & Nuclio

In [None]:
!pip install mlrun
!pip install sklearn
!pip install xgboost
!pip install requests

In [1]:
import os
import numpy as np
from mlrun import new_model_server

## Acquire dataset

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def load_dataset(test_size=0.2):
    iris = load_iris()
    y = iris['target']
    X = iris['data']
    return train_test_split(X, y, test_size=test_size)
    
X_train, X_test, y_train, y_test = load_dataset(0.2)

## Define pipeline

In [3]:
import xgboost as xgb
from sklearn.metrics import accuracy_score

def xgb_train(X_train,
              X_test,
              y_train,
              y_test,
              max_depth=6,
              num_class=3,
              eta=0.2,
              gamma=0.1,
              steps=20):

    # Get params from event
    param = {"max_depth": max_depth,
             "eta": eta, "nthread": 4,
             "num_class": num_class,
             "gamma": gamma,
             "objective": "multi:softprob"}
    
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test)

    # Train model
    print(f'Training model with params:\n{param}\n')
    xgb_model = xgb.train(param, dtrain, steps)
       
    preds = xgb_model.predict(dtest)
    best_preds = np.asarray([np.argmax(line) for line in preds])
    print(f'Model results:\n{float(accuracy_score(y_test, best_preds))}')
    
    return xgb_model

model = xgb_train(X_train, X_test, y_train, y_test)

Training model with params:
{'max_depth': 6, 'eta': 0.2, 'nthread': 4, 'num_class': 3, 'gamma': 0.1, 'objective': 'multi:softprob'}

Model results:
1.0


## Save the model

In [4]:
MODEL_PATH = '/User/models/iris_v2/model.xgboost'
MODEL_DIR = os.path.dirname(MODEL_PATH)

# Verify directory exists
os.makedirs(MODEL_DIR, exist_ok=True)

# Save model
model.save_model(MODEL_PATH)

## Deploy model

In [5]:
# Declare model server
fn = new_model_server('xgboost-server', 
                      models={'iris_v2': MODEL_PATH}, 
                      model_class='XGBoostModel',
                      image='zilbermanor/nuclio-serving-xgboost:latest',
                      protocol='general')
fn.with_v3io('User','~/')  # Add v3io mount
fn.spec.base_spec['spec']['handler'] = 'main:handler'
print(f'Function definition:\n{fn.to_dict()}\n')

# Deploy
addr = fn.deploy(project='nuclio-serving')

Function definition:
{'kind': 'remote', 'metadata': {'name': 'xgboost-server'}, 'spec': {'command': '', 'args': [], 'image': '', 'mode': '', 'description': '', 'volumes': [{'volume': {'flexVolume': {'driver': 'v3io/fuse', 'options': {'accessKey': '460ee949-eaaa-468a-ae26-5fdb9156996e', 'container': 'users', 'subPath': '/admin'}}, 'name': 'fs'}, 'volumeMount': {'name': 'fs', 'mountPath': 'User'}}], 'env': {'SERVING_MODEL_iris_v2': '/User/models/iris_v2/model.xgboost', 'TRANSPORT_PROTOCOL': 'general', 'ENABLE_EXPLAINER': 'False', 'MODEL_CLASS': 'XGBoostModel', 'V3IO_USERNAME': 'admin', 'V3IO_ACCESS_KEY': '460ee949-eaaa-468a-ae26-5fdb9156996e', 'V3IO_API': 'v3io-webapi.default-tenant.svc:8081'}, 'config': {'spec.triggers.http': {'kind': 'http', 'maxWorkers': 8, 'attributes': {'ingresses': {}}, 'annotations': {}}}, 'build_commands': [], 'base_spec': {'apiVersion': 'nuclio.io/v1', 'kind': 'Function', 'metadata': {'name': 'notebook', 'labels': {}, 'annotations': {}}, 'spec': {'runtime': 'pyt

## Test model

In [6]:
import requests

# Get sample event 
samples = X_train[:2]
print(f'Samples:\n{samples}\n')

# Seldon protocol event
event_seldon = {"data": {"ndarray": samples.tolist()}}
event_general = {"data": samples.tolist()}
even_tensorflow = {"instances": samples.tolist()}
event = event_general

csel = str(event).replace("\'", "\"")
print(f'Sent event:\n{csel}\n')

resp = requests.put(addr + '/predict/iris_v2', data=csel)
print(f'Prediction (Response):\n{resp.content}')

Samples:
[[4.6 3.2 1.4 0.2]
 [5.2 4.1 1.5 0.1]]

Sent event:
{"data": [[4.6, 3.2, 1.4, 0.2], [5.2, 4.1, 1.5, 0.1]]}

Prediction (Response):
b'{"predictions": [[0.973975419998169, 0.014415861107409, 0.011608744971454144], [0.973975419998169, 0.014415861107409, 0.011608744971454144]]}'
