# Server Model

#### Please be sure to run this once:

In [2]:
## ADDED ##
# !pip uninstall -y mlrun
# !pip install -U git+https://github.com/mlrun/mlrun.git#development

# !pip install -U tensorflow==1.14.0 keras sklearn pandas numpy

In [3]:
## ADDED ##
# !pip install -U kfp joblib

In [4]:
# nuclio: ignore
import nuclio

In the following cell add '-c' to the ```%%nuclio cmd``` and that will prevent the notebook from immediately installing these packages, delaying until the build step.

Also please note, I use the package ```joblib``` instead of ```pickle```. Both have portability issues.

In [5]:
%%nuclio cmd -c
pip install -U pandas numpy sklearn kfp tensorflow==1.15 joblib
pip install --upgrade git+https://github.com/fchollet/keras
pip install mlrun

In [6]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


In [7]:
import warnings
warnings.filterwarnings('ignore')

In [8]:
import time
import pandas as pd
import numpy as np
import seaborn as sn
import keras
from keras.models import Sequential
from keras.layers import Dense

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

import matplotlib.pyplot as plt

from mlrun.artifacts import TableArtifact, PlotArtifact

Using TensorFlow backend.


In [13]:
def data_generator(context, target=''):
    data=pd.read_csv('/User/dell-usecases/Credit_card_fraud_detection/creditcard.csv')
    context.logger.info('saving credit card dataframe to {}'.format(target))
    context.log_artifact(TableArtifact('Credit_card_dataset', df=Credit_card_dataset, target_path=target))

In [73]:
def Credit_train(context,dataset='',model_name='model.bst'):
    data = pd.read_csv('/User/dell-usecases/Credit_card_fraud_detection/creditcard.csv')
    corr = data.corr()
    data=data.drop(data.index[len(data)-1])
    data['normalizedAmount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1,1))
    data = data.drop(['Amount'],axis=1)
    data = data.drop(['Time'],axis=1)
    # split data into X and y
    X = data.iloc[:, data.columns != 'Class']
    y = data.iloc[:, data.columns == 'Class']
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state=0)
    classifier = Sequential()
    classifier.add(Dense(units =15 , kernel_initializer = 'uniform', activation = 'relu', input_dim = 29))
    classifier.add(Dense(units = 15, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    time0=time.time()
    # Train model
    classifier.fit(X_train, y_train, batch_size = 32, epochs = 23)
    y_pred = classifier.predict(X_test)
    y_pred = (y_pred > 0.5)
    score = classifier.evaluate(X_test, y_test)
    cm = confusion_matrix(y_test, y_pred)
    df_cm = pd.DataFrame(cm, index = (0, 1), columns = (0, 1))
    # log results and artifacts
    context.log_result("Total training time:", round(time.time()-time0, 3))
    context.log_result("score is",score)
    context.log_result("classification report is",classification_report(y_test, y_pred))
    context.log_result("Test Data Accuracy", accuracy_score(y_test, y_pred))

In [74]:
# nuclio: end-code
# marks the end of a code section

In [75]:
from mlrun import new_function, code_to_function, NewTask, mount_v3io, new_model_server, mlconf, get_run_db
# mlconf.dbpath = 'http://mlrun-db:8080'
mlconf.dbpath = '/User/Credit_run1/mlrun'

In [76]:
df_path = '/User/Credit_run1/mlrun/df.csv'
gen = new_function().run(name='Credit_gen', handler=data_generator, params={'target': df_path})

[mlrun] 2019-12-08 07:14:38,618 starting run Credit_gen uid=89c6fcd15ffb4b339470ba8211c3b6e1  -> /User/Credit_run1/mlrun
[mlrun] 2019-12-08 07:14:40,947 saving credit card dataframe to /User/Credit_run1/mlrun/df.csv



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...c3b6e1,0,Dec 08 07:14:38,completed,Credit_gen,kind=handlerowner=iguaziohost=jupyter-p3uansv432-142vj-69f44548ff-k45xg,,target=/User/Credit_run1/mlrun/df.csv,,Credit_card_dataset


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 89c6fcd15ffb4b339470ba8211c3b6e1 
[mlrun] 2019-12-08 07:14:53,590 run executed, status=completed


In [77]:
# create a task and test our function locally with multiple parameters
parameters={}
task = NewTask(handler=Credit_train, out_path='/User/Credit_run1/mlrun/data', inputs={'dataset': df_path}).with_hyper_params(parameters, 'max.accuracy')

In [78]:
run = new_function().run(task)
 

[mlrun] 2019-12-08 07:15:15,217 starting run Credit_train uid=c88d1451ecf4485a99ea999a10a48c29  -> /User/Credit_run1/mlrun
Epoch 1/23
Epoch 2/23
Epoch 3/23
Epoch 4/23
Epoch 5/23
Epoch 6/23
Epoch 7/23
Epoch 8/23
Epoch 9/23
Epoch 10/23
Epoch 11/23
Epoch 12/23
Epoch 13/23
Epoch 14/23
Epoch 15/23
Epoch 16/23
Epoch 17/23
Epoch 18/23
Epoch 19/23
Epoch 20/23
Epoch 21/23
Epoch 22/23
Epoch 23/23



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...a48c29,0,Dec 08 07:15:15,completed,Credit_train,kind=handlerowner=iguaziohost=jupyter-p3uansv432-142vj-69f44548ff-k45xg,dataset,,"Total training time:=295.014score is=[0.004002935482946554, 0.9992275238037109]classification report is= precision recall f1-score support\n\n 0 1.00 1.00 1.00 85294\n 1 0.89 0.63 0.74 148\n\n micro avg 1.00 1.00 1.00 85442\n macro avg 0.95 0.81 0.87 85442\nweighted avg 1.00 1.00 1.00 85442\nTest Data Accuracy=0.999227546171672",


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid c88d1451ecf4485a99ea999a10a48c29 
[mlrun] 2019-12-08 07:20:13,361 run executed, status=completed


In [79]:
# create the function from the notebook code + annotations, add volumes and parallel HTTP trigger. this is the function name
Creditfn = code_to_function('Credit-sql', runtime='nuclio:mlrun')
Creditfn.add_volume('User','~/').with_http(workers=4).with_v3io()

<mlrun.runtimes.function.RemoteRuntime at 0x7fc14c110f98>

In [80]:
# deploy the function to the cluster
Creditfn.deploy(project='Credit-demo')

[mlrun] 2019-12-08 07:26:51,978 deploy started


DeployError: failed to create project Credit-demo

In [None]:
import kfp
from kfp import dsl

In [None]:
artifacts_path = 'v3io:///users/admin/Credit_run1/mlrun/kfp/{{workflow.uid}}/'

In [None]:
@dsl.pipeline(
    name='My Credit card  training pipeline',
    description='Shows how to use mlrun.'
)
def Credit_pipeline(
   eta = [], gamma = []
):

    ingest = Creditfn.as_step(name='ingest_Credit', handler='data_generator',
                          params = {'target': df_path},
                          outputs=['Credit_dataset'], out_path=artifacts_path).apply(mount_v3io())

    
    train = Creditfn.as_step(name='Credit_train', handler='Credit_train',
                          selector='max.accuracy',
                          inputs = {'dataset': ingest.outputs['Credit_dataset']}, 
                          outputs=['model'], out_path=artifacts_path).apply(mount_v3io())

    
    # define a nuclio-serving functions, generated from a notebook file
    srvfn = new_model_server('Credit-serving', model_class='RNNModel', filename='credit_nuclio_serving.ipynb')
    
    # deploy the model serving function with inputs from the training stage
    deploy = srvfn.with_v3io('User','~/').deploy_step(project = 'Credit-demo', models={'Credit_v1': train.outputs['model']})

In [None]:
# for debug generate the pipeline dsl
#kfp.compiler.Compiler().compile(xgb_pipeline, 'mlrunpipe.yaml')

In [None]:
client = kfp.Client(namespace='default-tenant')
run_result = client.create_run_from_pipeline_func(Credit_pipeline, run_name='Credit-run1', experiment_name='Credit-demo')

In [None]:
# connect to the run db 
db = get_run_db().connect()

In [None]:
# query the DB with filter on workflow ID (only show this workflow) 
db.list_runs('', labels=f'workflow={run_result.run_id}').show()

In [None]:
# use this to supress XGB FutureWarning
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)