# Network Operations
## Workflow

In [1]:
import os
import json
import urllib
import numpy as np

import kfp
import mlrun
from mlrun import new_project, new_function, mount_v3io, import_function, NewTask, mlconf, code_to_function, new_model_server

In [4]:
mlconf.dbpath = 'http://mlrun-api:8080'

## Define project

In [5]:
# update the dir and repo to reflect real locations 
# the remote git repo must be initialized in GitHub
project_dir = '/User/demo-network-operations'
remote_git = 'https://github.com/zilbermanor/demo-network-operations.git'
newproj = new_project('network-operations', project_dir, init_git=True)

MLRUN_COMMIT='v0.4.5'
ARTIFACT_PATH =  os.path.join(project_dir, 'artifacts', '{{run.uid}}')
mlconf.artifact_path = os.path.join(project_dir, 'artifacts', '{{run.uid}}')

In [6]:
newproj.pull()

## Setup functions

In [None]:
# Aggregate
newproj.set_function(import_function(os.path.join(project_dir, 'yaml', 'aggregate.yaml')).apply(mount_v3io()), 'aggregate')

# Summary
newproj.set_function(code_to_function(name='describe',
                                      project='network-operations',
                                      filename='https://raw.githubusercontent.com/yjb-ds/lightgbm-project/pre-project/notebooks/code/describe.py',
                                      image=f'yjbds/mlrun-ml:{MLRUN_COMMIT}',
                                      kind='job').apply(mount_v3io()), 'describe')

newproj.set_function(code_to_function(name='train_lgbm',
                                      project='network-operations',
                                      filename='https://raw.githubusercontent.com/yjb-ds/lightgbm-project/pre-project/notebooks/code/train-classifier.py',
                                      image=f'yjbds/mlrun-ml:{MLRUN_COMMIT}',
                                      kind='job').apply(mount_v3io()), 'train_lgbm')

newproj.set_function(new_model_server("failure_predictor", 
                                      model_class="ClassifierModel", 
                                      filename=os.path.join(os.getcwd(), 'model-server.ipynb')).apply(mount_v3io()), 'serving')

print(newproj.to_yaml())

### Test describe

In [7]:
summ_task = NewTask(
    "sum", 
    handler="table_summary",  
    params={"key": "summary", "label_column": "is_error", 'class_labels': [0, 1]},
    inputs={"table": os.path.join(project_dir, 'data', 'aggregate.pq')},
    artifact_path=ARTIFACT_PATH)

In [8]:
summ_run = newproj.func('describe').run(summ_task)

[mlrun] 2020-03-12 11:06:11,218 starting run sum uid=5b509fda8fe74ed5b9fb9cab3ba24325  -> http://mlrun-api:8080
[mlrun] 2020-03-12 11:06:11,292 Job is running in the background, pod: sum-ptnjj
Intel(R) Data Analytics Acceleration Library (Intel(R) DAAL) solvers for sklearn enabled: https://intelpython.github.io/daal4py/sklearn.html
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
  figarray = table.hist(ax=ax, ylabelsize=5, xlabelsize=5)
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
[mlrun] 2020-03-12 11:06:24,904 log artifact summary at /User/demo-network-operations/artifacts/5b509fda8fe74ed5b9fb9cab3ba24325/summary.csv, size: None, db: Y
[mlrun] 2020-03-12 11:06:24,971 log artifact scale_pos_weight at /User/demo-network-operations/artifacts/5b509fda8fe74ed5b9fb9cab3ba24325/s

uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...a24325,0,Mar 12 11:06:24,completed,sum,host=sum-ptnjjkind=jobowner=admin,table,"class_labels=[0, 1]key=summarylabel_column=is_error",,summaryscale_pos_weightimbalancecorrelationhistograms


to track results use .show() or .logs() or in CLI: 
!mlrun get run 5b509fda8fe74ed5b9fb9cab3ba24325 --project network-operations , !mlrun logs 5b509fda8fe74ed5b9fb9cab3ba24325 --project network-operations
[mlrun] 2020-03-12 11:06:30,527 run executed, status=completed


## Test train_lgbm

In [10]:
DATA_KEY      = os.path.join(project_dir, 'data', 'aggregate.pq')
LABEL_COLUMN  = 'is_error'

# -n for random sample of n obs, -1 for entire dataset, +n for n consecutive rows
SAMPLE_SIZE      = -1

TEST_SIZE        = 0.1       # 10% set aside
TRAIN_VAL_SPLIT  = 0.75      # remainder split into train and val
RNG              = 1

lgbm_url = urllib.request.urlopen('https://raw.githubusercontent.com/yjb-ds/lightgbm-project/pre-project/lightgbm-conf.json')
LGBM = json.load(lgbm_url)

In [16]:
train_task = NewTask(
    'train',
    handler='train_model',
    params={
        'data_key'        : DATA_KEY,
        'sample'          : SAMPLE_SIZE,
        'label_column'    : LABEL_COLUMN,
        'test_size'       : TEST_SIZE,
        'train_val_split' : TRAIN_VAL_SPLIT,
        'rng'             : RNG,
        'class_params'    : LGBM['CLASS_PARAMS'],
        'fit_params'      : LGBM['FIT_PARAMS']},
    artifact_path=ARTIFACT_PATH)

# assign some labels to the run
train_task.set_label('model',  'LightGBM')
train_task.set_label('sample', 'all' if SAMPLE_SIZE==-1 else f'{np.abs(SAMPLE_SIZE):0.0e}')
train_task.set_label('source', 'train_lgbm')

trainer = newproj.func('train_lgbm').run(train_task)

[mlrun] 2020-03-12 11:09:56,492 starting run train uid=78d9781a6ab4408588670d64ae9580d9  -> http://mlrun-api:8080
[mlrun] 2020-03-12 11:09:56,608 Job is running in the background, pod: train-ft4pq
Intel(R) Data Analytics Acceleration Library (Intel(R) DAAL) solvers for sklearn enabled: https://intelpython.github.io/daal4py/sklearn.html
  'precision', 'predicted', average, warn_for)
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
[mlrun] 2020-03-12 11:10:08,817 log artifact test-set at /User/demo-network-operations/artifacts/78d9781a6ab4408588670d64ae9580d9/test-set.csv, size: None, db: Y
[mlrun] 2020-03-12 11:10:08,881 log artifact model at /User/demo-network-operations/artifacts/78d9781a6ab4408588670d64ae9580d9/models/model.pkl, size: None, db: Y
[mlrun] 2020-03-12 11:10:09,092 log artifact roc at /User/d

uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...9580d9,0,Mar 12 11:10:08,completed,train,host=train-ft4pqkind=jobmodel=LightGBMowner=adminsample=allsource=train_lgbm,,"class_params={'boosting_type': 'gbdt', 'colsample_bytree': 1, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': 50, 'min_child_samples': 20, 'min_split_gain': 0.0, 'n_estimators': 300, 'n_jobs': 16, 'num_leaves': 300, 'objective': 'binary', 'random_state': 1, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'silent': True, 'subsample': 1}data_key=/User/demo-network-operations/data/aggregate.pqfit_params={'early_stopping_rounds': 10, 'verbose': False}label_column=is_errorrng=1sample=-1test_size=0.1train_val_split=0.75",accuracy=0.943089430894309avg_precscore=1.0f1_score=0.0rocauc=1.0,test-setmodelrocaucloglossconfusionfeatimp.pngfeatimp.csv


to track results use .show() or .logs() or in CLI: 
!mlrun get run 78d9781a6ab4408588670d64ae9580d9 --project network-operations , !mlrun logs 78d9781a6ab4408588670d64ae9580d9 --project network-operations
[mlrun] 2020-03-12 11:10:15,813 run executed, status=completed


## Pipeline

In [15]:
@kfp.dsl.pipeline(
    name='Network Operations Demo',
    description='Train a Failure Prediction LGBM Model over sensor data'
)
def kfpipeline(
        df_artifact = os.path.join(project_dir, 'data', 'metrics.pq'),
        metrics = ['cpu_utilization'],
        labels = ['is_error'],
        metric_aggs = ['mean', 'sum'],
        label_aggs = ['max'],
        suffix = 'daily',
        append_to_df = True,
        window = 5,
        center = True,
        save_to = os.path.join(project_dir, 'data', 'aggregate.pq'),
        describe_table = 'summary',
        label_column = 'is_error',
        class_labels = [1, 0],
        SAMPLE_SIZE      = -1, # -n for random sample of n obs, -1 for entire dataset, +n for n consecutive rows
        TEST_SIZE        = 0.1,       # 10% set aside
        TRAIN_VAL_SPLIT  = 0.75,      # remainder split into train and val
        RNG              = 1,
        class_params = LGBM['CLASS_PARAMS'],
        fit_params = LGBM['FIT_PARAMS'],
    ):
    
    # Run preprocessing on the data
    aggregate = newproj.func('aggregate').as_step(name='aggregate',
                                                  params={'df_artifact': df_artifact,
                                                          'metrics': metrics,
                                                          'labels': labels,
                                                          'metric_aggs': metric_aggs,
                                                          'label_aggs': label_aggs,
                                                          'suffix': suffix,
                                                          'append_to_df': append_to_df,
                                                          'window': window,
                                                          'center': center,
                                                          'save_to': save_to},
                                                  outputs=['aggregate'],
                                                  handler='aggregate',
                                                  image='docker-registry.default-tenant.app.cnyidfihnjsz.iguazio-cd0.com:80/mlrun/func-default-aggregate-latest')

    describe = newproj.func('describe').as_step(name='sum',
                                                handler="table_summary",  
                                                params={"key": describe_table, 
                                                        "label_column": label_column, 
                                                        'class_labels': class_labels},
                                                inputs={"table": aggregate.outputs['aggregate']},
                                                outputs=["summary", "scale_pos_weight"])
    
    train = newproj.func('train_lgbm').as_step(name='train',
                                               handler='train_model',
                                               params={'sample'          : SAMPLE_SIZE,
                                                       'label_column'    : label_column,
                                                       'test_size'       : TEST_SIZE,
                                                       'train_val_split' : TRAIN_VAL_SPLIT,
                                                       'rng'             : RNG,
                                                       'class_params'    : class_params,
                                                       'fit_params'      : fit_params},
                                               inputs={"data_key": aggregate.outputs['aggregate'],
                                                        "scale_pos_weigth": describe.outputs["scale_pos_weight"]},
                                               outputs=['model', 'test-set'])
    

    # deploy the model using nuclio functions
    deploy = newproj.func('serving').deploy_step(project='nuclio-serving',
                                                 models={'predictor': train.outputs['model']})

In [None]:
# for debug generate the pipeline dsl
kfp.compiler.Compiler().compile(kfpipeline, os.path.join(project_dir, 'yaml', 'lgbm_pipeline.yaml'))

In [17]:
mlrun.run_pipeline(kfpipeline, arguments={}, artifact_path=ARTIFACT_PATH, experiment='network-operations')

[mlrun] 2020-03-13 07:13:39,777 Pipeline run id=90665f1b-e428-4be9-8bcb-d65eeeea7424, check UI or DB for progress


'90665f1b-e428-4be9-8bcb-d65eeeea7424'

## Test endpoint

In [None]:
import pandas as pd
import requests
import json

In [19]:
# Set model
model_name = 'predictor'

# Load pre-processed data example
df = pd.read_parquet('/User/demo-network-operations/data/aggregate.pq')

# Set sample
sample = df.head(1).fillna(0).drop(columns=['is_error']).values.tolist()
msg = {'instances': sample}

# Set endpoint
addr = 'http://3.136.215.154:32434'

In [20]:
# Send Request
req = requests.post(f'{addr}/{model_name}/predict', data=json.dumps(msg))
req.__dict__

{'_content': b'[0.0]',
 '_content_consumed': True,
 '_next': None,
 'status_code': 200,
 'headers': {'Server': 'nuclio', 'Date': 'Fri, 13 Mar 2020 07:18:21 GMT', 'Content-Type': 'application/json', 'Content-Length': '5'},
 'raw': <urllib3.response.HTTPResponse at 0x7fd7736f8860>,
 'url': 'http://3.136.215.154:32434/predictor/predict',
 'encoding': None,
 'history': [],
 'reason': 'OK',
 'cookies': <RequestsCookieJar[]>,
 'elapsed': datetime.timedelta(0, 0, 665942),
 'request': <PreparedRequest [POST]>,
 'connection': <requests.adapters.HTTPAdapter at 0x7fd7738024e0>}

## Save Project yaml

In [18]:
newproj.save(os.path.join(project_dir, 'project.yaml'))