# Install pipelines SDK

#### Please wait till this cell completes and then run next cells. This just need to be run once per active kernel.

In [1]:
%env RELEASE_VERSION=0.1.18
!pip install https://storage.googleapis.com/ml-pipeline/release/${RELEASE_VERSION}/kfp.tar.gz --upgrade

env: RELEASE_VERSION=0.1.18
Collecting https://storage.googleapis.com/ml-pipeline/release/0.1.18/kfp.tar.gz
  Using cached https://storage.googleapis.com/ml-pipeline/release/0.1.18/kfp.tar.gz (152 kB)
Building wheels for collected packages: kfp
  Building wheel for kfp (setup.py) ... [?25ldone
[?25h  Created wheel for kfp: filename=kfp-0.1.18-py3-none-any.whl size=242154 sha256=1c34b5e997e4d2852e27bb9ab830d7501866ce28436d86a75f4e066cd9001f58
  Stored in directory: /tmp/pip-ephem-wheel-cache-l1p4h0c4/wheels/89/83/34/24a09b34340dfb7efecae25ca19424c615d5f46654cde53432
Successfully built kfp
Installing collected packages: kfp
  Attempting uninstall: kfp
    Found existing installation: kfp 0.1.18
    Uninstalling kfp-0.1.18:
      Successfully uninstalled kfp-0.1.18
Successfully installed kfp-0.1.18


# Import kfp pkgs

In [2]:
import kfp
import kfp.dsl as dsl
import kfp.compiler as compiler
from kubernetes import client as k8s_client

# List existing pipeline experiments

In [3]:
client = kfp.Client()
client.list_experiments()

{'experiments': [{'created_at': datetime.datetime(2020, 6, 17, 4, 53, 17, tzinfo=tzlocal()),
                  'description': 'All runs created without specifying an '
                                 'experiment will be grouped here.',
                  'id': '05814128-3167-4c1f-aeff-3c3c6db8c6dc',
                  'name': 'Default'}],
 'next_page_token': None,
 'total_size': 1}

# Create a Dkube REGRESSION experiment

In [4]:
regression_experiment = client.create_experiment(name='Dkube - steel pl')

# Define e2e regression Pipeline with Dkube components

In [5]:
import kfp.dsl as dsl
from kfp import components
from kfp.components._yaml_utils import load_yaml
from kfp.components._yaml_utils import dump_yaml
from kubernetes import client as k8s_client

import os
import json
from random import randint

def _component(stage, name):
    with open('../components/{}/component.yaml'.format(stage), 'rb') as stream:
        cdict = load_yaml(stream)
        cdict['name'] = name
        cyaml = dump_yaml(cdict)
        return components.load_component_from_text(cyaml)

@dsl.pipeline(
    name='dkube-steel-pl',
    description='steel defect detection pipeline with dkube components'
)

def d3pipeline(
    access_url,
    user,
    auth_token,
    git_token,
    #severstal preprocess
    severstal_preprocess_script="python steel/preprocessing/preprocessing.py",
    severstal_preprocess_datasets=json.dumps(["steel-data"]),
    severstal_preprocess_input_mounts=json.dumps(["/opt/dkube/input"]),
    severstal_preprocess_outputs=json.dumps(["steel-preprocessed"]),
    severstal_preprocess_output_mounts=json.dumps(["/opt/dkube/output"]),
    
    #severstal split
    severstal_split_script="python steel/split/split.py",
    severstal_split_datasets=json.dumps(["steel-preprocessed"]),
    severstal_split_input_mounts=json.dumps(["/opt/dkube/input"]),
    severstal_split_outputs=json.dumps(["steel-train", "steel-test"]),
    severstal_split_output_mounts=json.dumps(["/opt/dkube/output/train", "/opt/dkube/output/test"]),    
    
    #Training
    #In notebook DKUBE_USER_ACCESS_TOKEN is automatically picked up from env variable
    #By default tf v1.14 image is used here, v1.13 or v1.14 can be used. 
    #Or any other custom image name can be supplied.
    #For custom private images, please input username/password
    training_container=json.dumps({'image':'docker.io/ocdr/d3-datascience-tf-cpu:v1.14', 'username':'', 'password': ''}),
    #Name of the workspace in dkube. Update accordingly if different name is used while creating a workspace in dkube.
    training_program="steel",
    #Script to run inside the training container    
    training_script="python steel/model/model.py",
    #Input datasets for training. Update accordingly if different name is used while creating dataset in dkube.    
    training_datasets=json.dumps(["steel-train"]),
    training_input_dataset_mounts=json.dumps(["/opt/dkube/input"]),
    training_outputs=json.dumps(["resUnet"]),
    training_output_mounts=json.dumps(["/opt/dkube/output"]),
    #Request gpus as needed. Val 0 means no gpu, then training_container=docker.io/ocdr/dkube-datascience-tf-cpu:v1.12    
    training_gpus=0,
    #Any envs to be passed to the training program    
    training_envs=json.dumps([{"steps": 100}]),
    
    #Evaluation
    evaluation_script="python steel/evaluation/evaluation.py",
    evaluation_datasets=json.dumps(["steel-test"]),
    evaluation_input_dataset_mounts=json.dumps(["/opt/dkube/input"]),
    evaluation_models=json.dumps(["resUnet"]),
    evaluation_input_model_mounts=json.dumps(["/opt/dkube/model"]),
    
    #Serving
    #Device to be used for serving - dkube mnist example trained on gpu needs gpu for serving else set this param to 'cpu'
    serving_device='cpu',
    serving_container=json.dumps({'image':'docker.io/ocdr/steel-example-preprocess:030720', 'username':'', 'password': ''})):
    
    create_resource = _component('setup', 'steel-setup')(access_url,
                                               auth_token, git_token,
                                               user)
    
    severstal_preprocess = _component('preprocess', 'steel-preprocess')(auth_token, training_container,
                                      program=training_program, run_script=severstal_preprocess_script,
                                      datasets=severstal_preprocess_datasets, outputs=severstal_preprocess_outputs,
                                      input_dataset_mounts=severstal_preprocess_input_mounts, output_mounts=severstal_preprocess_output_mounts).after(create_resource)
                                      
    severstal_split  = _component('preprocess', 'steel-split')(auth_token, training_container,
                                      program=training_program, run_script=severstal_split_script,
                                      datasets=severstal_split_datasets, outputs=severstal_split_outputs,
                                      input_dataset_mounts=severstal_split_input_mounts,
                                      output_mounts=severstal_split_output_mounts).after(severstal_preprocess)
                                      
    train       = _component('training', 'steel-model-training')(auth_token, training_container,
                                    program=training_program, run_script=training_script,
                                    datasets=training_datasets, outputs=training_outputs,
                                    input_dataset_mounts=training_input_dataset_mounts,
                                    output_mounts=training_output_mounts,
                                    ngpus=training_gpus,
                                    envs=training_envs).after(severstal_split)
    evaluate    = _component('training', 'steel-model-eval')(auth_token, training_container,
                                    program=training_program, run_script=evaluation_script,
                                    datasets=evaluation_datasets,
                                    input_dataset_mounts=evaluation_input_dataset_mounts,
                                    models=evaluation_models,
                                    input_model_mounts=evaluation_input_model_mounts,
                                    ngpus=training_gpus,
                                    envs=training_envs).after(train)
    serving     = _component('serving', 'model-serving')(auth_token, train.outputs['artifact'], device=serving_device, serving_container=serving_container).after(evaluate)
    inference   = _component('viewer', 'model-inference')(auth_token, serving.outputs['servingurl'],
                                 'digits', viewtype='inference').after(serving)

# Compile and generate tar ball

In [6]:
compiler.Compiler().compile(d3pipeline, 'dkube_steel_pl_full.tar.gz')
# Upload this generated tarball into the Pipelines UI

# Create and Run pipeline

#### Click the dkube-training stage to see the enhanced Dkube Datascience dashboard, metrics and graphs. Click the dkube-viewer stage for the simple UI to test the model predecitions

In [25]:
run = client.run_pipeline(regression_experiment.id, 'steel_defect_pipeline', 'dkube_steel_pl_full.tar.gz', params={})