# Nuclio - Generator function

## Environment

In [2]:
# nuclio: ignore
import nuclio

In [3]:
#%run set_env.ipynb

### Configurations

In [4]:
import os
import datetime

In [5]:
### NOTE
## Platform version dependent. Reguired only to trigger a Spark Pipeline
base_image='iguazio/jupyter-all:2.8_b119_20200417000155'

In [6]:
# nuclio: ignore
spec = nuclio.ConfigSpec(config={"spec.triggers.inference.kind":"cron",
                                "spec.triggers.inference.attributes.interval" :"10m",
                                "spec.readinessTimeoutSeconds" : 60,
                                "spec.minReplicas" : 1,
                                "spec.build.baseImage" : base_image },
                         env={'V3IO_HOME' : os.getenv('V3IO_HOME'),
                              'V3IO_ACCESS_KEY' : os.getenv('V3IO_ACCESS_KEY'),
                              'V3IO_USERNAME' : os.getenv('V3IO_USERNAME'),
                              'V3IO_HOME_URL' : os.getenv('V3IO_HOME_URL'),
                              'MONITOR_CONTAINER' : 'bigdata',
                              'MONITOR_TABLE' : 'kubeflow_runs/',
                              
                         },
                         cmd=['pip install mlrun v3io'],
                        ).with_v3io().add_volume('/User',os.path.join('/users',os.getenv('V3IO_USERNAME')),kind='v3io',name='v3io')

In [7]:
import v3io.dataplane
import kfp
import pandas as pd
import json

In [8]:
Client = kfp.Client(host='http://ml-pipeline.default-tenant.svc:8888')
v3io_client = v3io.dataplane.Client(max_connections=1)

In [9]:
def delete_monitor_record(id):
    v3io_client.delete_object(os.getenv('MONITOR_CONTAINER','bigdata'),os.getenv('MONITOR_TABLE','kubeflow_runs/')+id)
    return

In [10]:
def send_alert(id):
    pass

In [11]:
# Get list of Runs from KV table
# Each pipeline writes a record to this table afte rdeploying
# The table is keyed by Kubeflow run id
def get_runs():
    LastItemIncluded = 'FALSE'
    NextMarker=''
    records=[]
    while LastItemIncluded == "FALSE":
        run_items = json.loads(v3io_client.get_items(os.getenv('MONITOR_CONTAINER','bigdata'),os.getenv('MONITOR_TABLE','kubeflow_runs/'),marker=NextMarker).body.decode('utf-8'))
        LastItemIncluded=run_items['LastItemIncluded']
        if 'NextMarker' in run_items:
            NextMarker=run_items['NextMarker']
        for runs in run_items['Items']:
            status = runs['status']['S']
            id = runs['__name']['S']
            records.append({'id': id,'status': status})
    return records
        

In [12]:
# Get the status of each pipeline entered in the KV table
# Delete record that succeeded
# TBD records that failed
def get_kfp_status(run_items):
    run_status=[]
    for runs in run_items:
        run_out=Client.get_run(runs['id'])
        #print(run_out)
        status=run_out.to_dict()['run']['status']
        run_status.append({'name': runs['id'], 'status' : status})
        print(status)
        if status == 'Succeeded':
           delete_monitor_record(runs['id'])
        if status == 'Failed':
           send_alert(runs['id'])
    return run_status
    
    

In [13]:
def handler(context,event):
    run_id = event.fields
    if 'run_id' in run_id:
        run_id = str(run_id['run_id'])
        try:
            run_out=Client.get_run(run_id)
            run_status=[{'name' : run_id , 'status' : run_out.to_dict()['run']['status']}]
        except:
            run_status="Not Found: %s"% run_id
        return run_status
    
    run_items = get_runs()
    
    pipeline_status = get_kfp_status(run_items)
    
    return pipeline_status

## Test

In [23]:
# nuclio: ignore
#event = nuclio.Event(body='',fields={'run_id':'e121acc0-ab1a-41ec-9e0e-1df6bfcbecbe'})
event = nuclio.Event(body='')
output = handler(context, event)
output

Failed


[{'name': 'de2d3a55-db40-4f90-8ca9-b02552e1c94b', 'status': 'Failed'}]

# Deploy

In [26]:
# nuclio: ignore
#%nuclio deploy -p netops -n generator -c
addr = nuclio.deploy_file(name='kfpmonitor',project='monitoring',spec=spec)


[nuclio] 2020-04-21 15:41:19,140 (info) Build complete
[nuclio] 2020-04-21 15:41:25,220 (info) Function deploy complete
[nuclio] 2020-04-21 15:41:25,230 done updating kfpmonitor, function address: 3.12.248.124:32420


In [44]:
# nuclio: ignore
!curl -X GET {addr}

[{"name": "de2d3a55-db40-4f90-8ca9-b02552e1c94b", "status": "Failed"}]

In [46]:
# nuclio: ignore
!curl -X GET {addr}/?run_id=de2d3a55-db40-4f90-8ca9-b02552e1c94b

[{"name": "de2d3a55-db40-4f90-8ca9-b02552e1c94b", "status": "Failed"}]