# E2E Serverless ML pipeline  - Ingest, Train, Auto Deploy Model
  --------------------------------------------------------------------


In [1]:
# nuclio: ignore
import nuclio

# Change Registry

In [2]:
%run set_env.ipynb

# NetApp volume mounts definition

In [3]:
from mlrun import new_function, code_to_function, NewTask, v3io_cred, new_model_server, mlconf, get_run_db, mount_v3io
# for local DB path use 'User/mlrun' instead 
mlconf.dbpath = 'http://mlrun-api:8080'

<a id="pipeline"></a>
______________________________________________
# Create a multi-stage KubeFlow Pipeline from our notebooks

In [4]:
import kfp
from kfp import dsl
from mlrun import new_project
from kubernetes import client as k8sc

# Define function to execute NetApp volume snap
Different function for on prem Netapp and Cloud volume

In [5]:
# Initialize the NetApp snap fucntion once for all functions in a notebook
if [ NETAPP_CLOUD_VOLUME ]:
    snapfn = code_to_function('snap',project='NetApp',kind='job',filename="snap_cv.ipynb").apply(mount_v3io())
    snap_params = {
    "metrics_table" : metrics_table,
    "NETAPP_MOUNT_PATH" : NETAPP_MOUNT_PATH,
    'MANAGER' : MANAGER,
    'svm' : svm,
    'email': email,
    'password': password ,
    'weid': weid,
    'volume': volume,
    "APP_DIR" : APP_DIR
       }
else:
    snapfn = code_to_function('snap',project='NetApp',kind='job',filename="snapshot.ipynb").apply(mount_v3io())
    snap_params = {
    'ontapClusterMgmtHostname': ontapClusterMgmtHostname,
    'ontapClusterAdminUsername': ontapClusterAdminUsername,
    'ontapClusterAdminPassword': ontapClusterAdminPassword,
    "metrics_table" : metrics_table,
    "FEATURES_TABLE":FEATURES_TABLE,
    'sourceVolumeName': sourceVolumeName,'NETAPP_SIM' : NETAPP_SIM,
    'NETAPP_MOUNT_PATH': NETAPP_MOUNT_PATH}

#.apply(mount_pvc('nfsvol', NETAPP_PVC_CLAIM, NETAPP_MOUNT_PATH))
snapfn.spec.image = docker_registry + '/netapp/pipeline:latest'
snapfn.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
snapfn.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

In [6]:
# Cloud Volume snap parameters


In [7]:
#print(snapfn.to_yaml())

# Define function to explore feature metrics
We retrieve the function code from a Git repo

In [8]:
describefn =  code_to_function(project='NetApp',kind='job',name='describe',filename='describe.py').apply(mount_v3io())
describefn.spec.image = docker_registry + '/iguazio/netapp'

### Mount NetApp volume
describefn.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
describefn.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

## Build functions for model training pipeline

In [9]:
data_prep = code_to_function(project='NetApp',kind='job',name='data-prep',filename='data-prep.ipynb').apply(mount_v3io())
data_prep.spec.image = docker_registry + '/iguazio/netapp'
training  = code_to_function(project='NetApp',kind='job',name='training',filename='training.ipynb').apply(mount_v3io())
training.spec.image = docker_registry + '/iguazio/netapp'
deploy_inference = code_to_function(project='NetApp',kind='job',name='training',filename='deploy-inference-function.ipynb').apply(mount_v3io())
deploy_inference.spec.image = docker_registry + '/iguazio/netapp'

deploy_features = code_to_function(project='NetApp',kind='job',name='training',filename='deploy-features-function.ipynb').apply(mount_v3io())
deploy_features.spec.image = docker_registry + '/iguazio/netapp'

### Mount NetApp volume
data_prep.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
data_prep.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

training.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
training.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

deploy_inference.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
deploy_inference.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

deploy_features.spec.volume_mounts = [snapfn.spec.volume_mounts[0],netapp_volume_mounts]
deploy_features.spec.volumes = [ snapfn.spec.volumes[0],netapp_volumes]

#.apply(mount_v3io())
#.apply(mount_v3io(name='bigdata', remote='bigdata/',mount_path='/v3io/bigdata'))

In [10]:
#print(training.to_yaml())

In [11]:
params={   "FEATURES_TABLE":FEATURES_TABLE,
           "SAVE_TO" : SAVE_TO,
           "metrics_table" : metrics_table,
           'FROM_TSDB': 0,
           'PREDICTIONS_TABLE': PREDICTIONS_TABLE,
           'TRAIN_ON_LAST': '1d',
           'TRAIN_SIZE':0.7,
           'NUMBER_OF_SHARDS' : 4,
           'MODEL_FILENAME' : 'netops.v3.model.pickle',
           'APP_DIR' : APP_DIR,
           'FUNCTION_NAME' : 'netops-inference',
           'PROJECT_NAME' : 'netops',
           'NETAPP_SIM' : NETAPP_SIM,
           'NETAPP_MOUNT_PATH': NETAPP_MOUNT_PATH,
           'NETAPP_PVC_CLAIM' : NETAPP_PVC_CLAIM,
           'IGZ_CONTAINER_PATH' : IGZ_CONTAINER_PATH,
           'IGZ_MOUNT_PATH' : IGZ_MOUNT_PATH
            }

**define a 4 step workflow with hyper-params**

In [12]:
@dsl.pipeline(
    name='NetOps trainign pipeline with NetApp volume cloning',
    description='snap volume before training '
)
def xgb_pipeline(
   ontapClusterMgmtHostname = '',
   ontapClusterAdminUsername = '',
   ontapClusterAdminPassword = '',
   sourceVolumeName = ''
):
    snap = snapfn.as_step(NewTask(handler='handler',params=snap_params),
                            name='NetApp_Cloud_Volume_Snapshot',outputs=['snapVolumeDetails','training_parquet_file']).apply(mount_v3io())
    
    describe = describefn.as_step(name='describe',handler="describe",params={"key": "summary", "label_column": "is_error", 'class_labels': [0, 1]},
                            inputs={"table": snap.outputs['training_parquet_file']},
                            out_path=artifacts_path).apply(mount_v3io()).after(snap)
    
    prep = data_prep.as_step(name='data-prep', handler='handler',params=params,
                          inputs = {'DATA_DIR': snap.outputs['snapVolumeDetails']} ,
                          out_path=artifacts_path).apply(mount_v3io()).after(snap)
    
    deployfeatures = deploy_features.as_step(name='deploy-features-function', handler='handler',params=params,
                          inputs = {'DATA_DIR': NETAPP_MOUNT_PATH} ,
                          out_path=artifacts_path).apply(mount_v3io()).after(snap)
    
    train = training.as_step(name='xgb_train', handler='handler',params=params,
                            out_path=artifacts_path).apply(mount_v3io()).after(prep)

    
    deploy = deploy_inference.as_step(name='deploy-model', handler='handler',params=params,                        
                       out_path=artifacts_path).apply(mount_v3io()).after(train)



#### Create a KubeFlow client and submit the pipeline with parameters

In [13]:
# for debug generate the pipeline dsl
kfp.compiler.Compiler().compile(xgb_pipeline, 'mlrunpipe.yaml')

In [14]:
client = kfp.Client(namespace='default-tenant')

In [15]:
arguments={'ontapClusterMgmtHostname': ontapClusterMgmtHostname,
           'ontapClusterAdminUsername': ontapClusterAdminUsername,
           'ontapClusterAdminPassword':ontapClusterAdminPassword,
           'sourceVolumeName': sourceVolumeName
            }
run_result = client.create_run_from_pipeline_func(xgb_pipeline, arguments, experiment_name='NetAppXGB')

**[back to top](#top)**