# Network Operations
## Workflow

In [36]:
import os
import json
import urllib
import numpy as np

from kfp import dsl
from mlrun import new_project, new_function, mount_v3io, import_function, NewTask, mlconf, code_to_function

In [37]:
mlconf.dbpath = 'http://mlrun-api:8080'

## Define project

In [5]:
# update the dir and repo to reflect real locations 
# the remote git repo must be initialized in GitHub
project_dir = '/User/demo-network-operations'
remote_git = 'https://github.com/zilbermanor/demo-network-operations.git'
newproj = new_project('network-operations', project_dir, init_git=True)

MLRUN_COMMIT='v0.4.5'
ARTIFACT_PATH =  os.path.join(project_dir, 'artifacts', '{{run.uid}}')

In [6]:
newproj.pull()

## Setup functions

In [42]:
# Aggregate
newproj.set_function(import_function(os.path.join(project_dir, 'yaml', 'aggregate.yaml')), 'aggregate')

# Summary
newproj.set_function(code_to_function(name='describe',
                                      project='network-operations',
                                      filename='https://raw.githubusercontent.com/yjb-ds/lightgbm-project/pre-project/notebooks/code/describe.py',
                                      image=f'yjbds/mlrun-ml:{MLRUN_COMMIT}',
                                      kind='job').apply(mount_v3io()), 'describe')

newproj.set_function(code_to_function(name='train_lgbm',
                                      project='network-operations',
                                      filename='https://raw.githubusercontent.com/yjb-ds/lightgbm-project/pre-project/notebooks/code/train-classifier.py',
                                      image=f'yjbds/mlrun-ml:{MLRUN_COMMIT}',
                                      kind='job').apply(mount_v3io()), 'train_lgbm')

print(newproj.to_yaml())

name: network-operations
functions:
- name: aggregate
  spec:
    kind: job
    metadata:
      name: aggregate
      tag: ''
      hash: 06454ac064876899b23bc9e3128f53c84b1c2ba2
      project: network-operations
    spec:
      command: /User/demo-network-operations/src/aggregate.py
      args: []
      image: ''
      env:
      - name: V3IO_API
        value: ''
      - name: V3IO_USERNAME
        value: ''
      - name: V3IO_ACCESS_KEY
        value: ''
      description: ''
      build:
        source: ./
        commands: []
        code_origin: '#1d692b688d3a22a4a3cbcc50e4dce18faf0df859'
- name: describe
  spec:
    kind: job
    metadata:
      name: describe
      tag: ''
      project: network-operations
    spec:
      command: ''
      args: []
      image: yjbds/mlrun-ml:v0.4.5
      env:
      - name: V3IO_API
        value: ''
      - name: V3IO_USERNAME
        value: ''
      - name: V3IO_ACCESS_KEY
        value: ''
      description: ''
      build:
        functionS

### Test describe

In [8]:
summ_task = NewTask(
    "sum", 
    handler="table_summary",  
    params={"key": "summary", "label_column": "is_error", 'class_labels': [0, 1]},
    inputs={"table": os.path.join(project_dir, 'data', 'aggregate.pq')},
    artifact_path=ARTIFACT_PATH)

In [24]:
summ_run = newproj.func('describe').run(summ_task)

[mlrun] 2020-03-10 14:22:54,718 starting run sum uid=56205dcff97845f3925d665487213c25  -> http://mlrun-api:8080
[mlrun] 2020-03-10 14:22:54,792 Job is running in the background, pod: sum-tjxt6
Intel(R) Data Analytics Acceleration Library (Intel(R) DAAL) solvers for sklearn enabled: https://intelpython.github.io/daal4py/sklearn.html
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
  figarray = table.hist(ax=ax, ylabelsize=5, xlabelsize=5)
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
[mlrun] 2020-03-10 14:23:07,632 log artifact summary at /User/demo-network-operations/artifacts/56205dcff97845f3925d665487213c25/summary.csv, size: None, db: Y
[mlrun] 2020-03-10 14:23:07,698 log artifact scale_pos_weight at /User/demo-network-operations/artifacts/56205dcff97845f3925d665487213c25/s

uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...213c25,0,Mar 10 14:23:07,completed,sum,host=sum-tjxt6kind=jobowner=admin,table,"class_labels=[0, 1]key=summarylabel_column=is_error",,summaryscale_pos_weightimbalancecorrelationhistograms


to track results use .show() or .logs() or in CLI: 
!mlrun get run 56205dcff97845f3925d665487213c25 --project network-operations , !mlrun logs 56205dcff97845f3925d665487213c25 --project network-operations
[mlrun] 2020-03-10 14:23:13,951 run executed, status=completed


## Test train_lgbm

In [39]:
DATA_KEY      = os.path.join(project_dir, 'data', 'aggregate.pq')
LABEL_COLUMN  = 'is_error'

# -n for random sample of n obs, -1 for entire dataset, +n for n consecutive rows
SAMPLE_SIZE      = -1

TEST_SIZE        = 0.1       # 10% set aside
TRAIN_VAL_SPLIT  = 0.75      # remainder split into train and val
RNG              = 1

lgbm_url = urllib.request.urlopen('https://raw.githubusercontent.com/yjb-ds/lightgbm-project/pre-project/lightgbm-conf.json')
LGBM = json.load(lgbm_url)

In [44]:
train_task = NewTask(
    'train',
    handler='train_model',
    params={
        'data_key'        : DATA_KEY,
        'sample'          : SAMPLE_SIZE,
        'label_column'    : LABEL_COLUMN,
        'test_size'       : TEST_SIZE,
        'train_val_split' : TRAIN_VAL_SPLIT,
        'rng'             : RNG,
        'class_params'    : LGBM['CLASS_PARAMS'],
        'fit_params'      : LGBM['FIT_PARAMS']},
    artifact_path=ARTIFACT_PATH)

# assign some labels to the run
train_task.set_label('model',  'LightGBM')
train_task.set_label('sample', 'all' if SAMPLE_SIZE==-1 else f'{np.abs(SAMPLE_SIZE):0.0e}')
train_task.set_label('source', 'train_lgbm')

newproj.func('train_lgbm').run(train_task)

[mlrun] 2020-03-10 14:49:36,848 starting run train uid=4cb55c1fa6af44f7b24c296c0e0e1c25  -> http://mlrun-api:8080
[mlrun] 2020-03-10 14:49:36,966 Job is running in the background, pod: train-8zqzz
Intel(R) Data Analytics Acceleration Library (Intel(R) DAAL) solvers for sklearn enabled: https://intelpython.github.io/daal4py/sklearn.html
  'precision', 'predicted', average, warn_for)
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
[mlrun] 2020-03-10 14:49:47,617 log artifact test-set at /User/demo-network-operations/artifacts/4cb55c1fa6af44f7b24c296c0e0e1c25/test-set.csv, size: None, db: Y
[mlrun] 2020-03-10 14:49:47,693 log artifact model at /User/demo-network-operations/artifacts/4cb55c1fa6af44f7b24c296c0e0e1c25/models/model.pkl, size: None, db: Y
[mlrun] 2020-03-10 14:49:47,905 log artifact roc at /User/d

uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...0e1c25,0,Mar 10 14:49:47,completed,train,host=train-8zqzzkind=jobmodel=LightGBMowner=adminsample=allsource=train_lgbm,,"class_params={'boosting_type': 'gbdt', 'colsample_bytree': 1, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': 50, 'min_child_samples': 20, 'min_split_gain': 0.0, 'n_estimators': 300, 'n_jobs': 16, 'num_leaves': 300, 'objective': 'binary', 'random_state': 1, 'reg_alpha': 0, 'reg_lambda': 1, 'scale_pos_weight': 1, 'silent': True, 'subsample': 1}data_key=/User/demo-network-operations/data/aggregate.pqfit_params={'early_stopping_rounds': 10, 'verbose': False}label_column=is_errorrng=1sample=-1test_size=0.1train_val_split=0.75",accuracy=0.943089430894309avg_precscore=1.0f1_score=0.0rocauc=1.0,test-setmodelrocaucloglossconfusionfeatimp.pngfeatimp.csv


to track results use .show() or .logs() or in CLI: 
!mlrun get run 4cb55c1fa6af44f7b24c296c0e0e1c25 --project network-operations , !mlrun logs 4cb55c1fa6af44f7b24c296c0e0e1c25 --project network-operations
[mlrun] 2020-03-10 14:49:56,196 run executed, status=completed


<mlrun.model.RunObject at 0x7fc0a89f8048>

## Save Project yaml

In [45]:
newproj.save(os.path.join(project_dir, 'project.yaml'))