# E2E Serverless ML pipeline  - Ingest, Train, Auto Deploy Model
  --------------------------------------------------------------------


In [21]:
# nuclio: ignore
import nuclio

# Change Registry

In [22]:
%run set_env.ipynb

# NetApp volume mounts definition

In [23]:
netapp_volume_mounts = {'mountPath': NETAPP_MOUNT_PATH, 'name': 'nfs-pvc'}
netapp_volumes = {'name': 'nfs-pvc',
                       'persistentVolumeClaim': {'claimName': NETAPP_PVC_CLAIM }}

In [24]:
from mlrun import new_function, code_to_function, NewTask, v3io_cred, new_model_server, mlconf, get_run_db, mount_v3io
# for local DB path use 'User/mlrun' instead 
mlconf.dbpath = 'http://mlrun-api:8080'

<a id="pipeline"></a>
______________________________________________
# Create a multi-stage KubeFlow Pipeline from our notebooks

In [25]:
import kfp
from kfp import dsl
from mlrun import new_project
from kubernetes import client as k8sc

**define the artifacts output path**
the pipeline outputs will be writtento the artifacts path directory, the path can be a file path (require volume mounts) or an object path (v3io://, s3://, ..).

if we specify `{{workflow.uid}}` in the path it will be replaced with the actual workflow ID, this way every workflow run will store artifacts in a unique location for reproducability.

In [26]:
FEATURES_TABLE="netops_features_parquet"
SAVE_TO="models"
metrics_table="netops_metrics_parquet"
features_table="netops_features_parquet"
PREDICTIONS_TABLE='netops_predictions'

# Define function to execute NetApp volume snap

In [27]:
import copy
# Initialize the NetApp snap fucntion once for all functions in a notebook
snapfn = code_to_function('snap',project='NetApp',kind='job',filename="snap.ipynb").apply(mount_v3io())
#.apply(mount_pvc('nfsvol', NETAPP_PVC_CLAIM, NETAPP_MOUNT_PATH))
snapfn.spec.image = docker_registry + '/netapp/pipeline:latest'
snapfn.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
snapfn.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

In [28]:
#print(snapfn.to_yaml())

# Define function to explore feature metrics
We retrieve the function code from a Git repo

In [29]:
describefn =  code_to_function(project='NetApp',kind='job',name='describe',filename='describe.py').apply(mount_v3io())
describefn.spec.image = docker_registry + '/iguazio/netapp'

### Mount NetApp volume
describefn.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
describefn.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

## Build functions for model training pipeline

In [30]:
data_prep = code_to_function(project='NetApp',kind='job',name='data-prep',filename='data-prep.ipynb').apply(mount_v3io())
data_prep.spec.image = docker_registry + '/iguazio/netapp'
training  = code_to_function(project='NetApp',kind='job',name='training',filename='training.ipynb').apply(mount_v3io())
training.spec.image = docker_registry + '/iguazio/netapp'
deploy_inference = code_to_function(project='NetApp',kind='job',name='training',filename='deploy-inference-function.ipynb').apply(mount_v3io())
deploy_inference.spec.image = docker_registry + '/iguazio/netapp'


### Mount NetApp volume
data_prep.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
data_prep.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

training.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
training.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

deploy_inference.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
deploy_inference.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]


#.apply(mount_v3io())
#.apply(mount_v3io(name='bigdata', remote='bigdata/',mount_path='/v3io/bigdata'))

In [31]:
#print(training.to_yaml())

In [32]:
params={   "FEATURES_TABLE":FEATURES_TABLE,
           "SAVE_TO" : SAVE_TO,
           "metrics_table" : metrics_table,
           'FROM_TSDB': 0,
           'PREDICTIONS_TABLE': PREDICTIONS_TABLE,
           'TRAIN_ON_LAST': '1d',
           'TRAIN_SIZE':0.7,
           'NUMBER_OF_SHARDS' : 4,
           'MODEL_FILENAME' : 'netops.v3.model.pickle',
           'APP_DIR' : APP_DIR,
           'FUNCTION_NAME' : 'netops-inference',
           'PROJECT_NAME' : 'netops',
           'NETAPP_SIM' : NETAPP_SIM,
           'NETAPP_MOUNT_PATH': NETAPP_MOUNT_PATH,
           'NETAPP_PVC_CLAIM' : NETAPP_PVC_CLAIM,
           'IGZ_CONTAINER_PATH' : IGZ_CONTAINER_PATH,
           'IGZ_MOUNT_PATH' : IGZ_MOUNT_PATH
            }

**define a 4 step workflow with hyper-params**

In [33]:
@dsl.pipeline(
    name='NetOps trainign pipeline with NetApp volume cloning',
    description='snap volume before training '
)
def xgb_pipeline(
   ontapClusterMgmtHostname = '',
   ontapClusterAdminUsername = '',
   ontapClusterAdminPassword = '',
   sourceVolumeName = ''
):
    snapshot = snapfn.as_step(NewTask(handler='handler',params={'ontapClusterMgmtHostname': ontapClusterMgmtHostname,
                                                                  'ontapClusterAdminUsername': ontapClusterAdminUsername,
                                                                 'ontapClusterAdminPassword': ontapClusterAdminPassword,
                                                                  "FEATURES_TABLE":FEATURES_TABLE,
                                                                 'sourceVolumeName': sourceVolumeName,'NETAPP_SIM' : NETAPP_SIM,
                                                                  'NETAPP_MOUNT_PATH': NETAPP_MOUNT_PATH }),
                            name='snap',outputs=['snapVolumeDetails','training_parquet_file']).apply(mount_v3io())
    
    describe = describefn.as_step(name='describe',handler="describe",params={"key": "summary", "label_column": "is_error", 'class_labels': [0, 1]},
                            inputs={"table": snap.outputs['training_parquet_file']},
                            out_path=artifacts_path).apply(mount_v3io()).after(snaphot)
    
    prep = data_prep.as_step(name='data-prep', handler='handler',params=params,
                          inputs = {'DATA_DIR': snap.outputs['snapVolumeDetails']} ,
                          out_path=artifacts_path).apply(mount_v3io()).after(snap)

    
    train = training.as_step(name='xgb_train', handler='handler',params=params,
                            out_path=artifacts_path).apply(mount_v3io()).after(prep)

    
    deploy = deploy_inference.as_step(name='deploy-model', handler='handler',params=params,                        
                       out_path=artifacts_path).apply(mount_v3io()).after(train)



#### Create a KubeFlow client and submit the pipeline with parameters

In [34]:
# for debug generate the pipeline dsl
kfp.compiler.Compiler().compile(xgb_pipeline, 'mlrunpipe.yaml')

In [35]:
client = kfp.Client(namespace='default-tenant')

In [36]:
arguments={'ontapClusterMgmtHostname': ontapClusterMgmtHostname,
           'ontapClusterAdminUsername': ontapClusterAdminUsername,
           'ontapClusterAdminPassword':ontapClusterAdminPassword,
           'sourceVolumeName': sourceVolumeName
            }
run_result = client.create_run_from_pipeline_func(xgb_pipeline, arguments, experiment_name='NetAppXGB')

**[back to top](#top)**