# Install pipelines SDK

#### Please wait till this cell completes and then run next cells. This just need to be run once per active kernel.

In [1]:
%env RELEASE_VERSION=0.1.18
!pip install https://storage.googleapis.com/ml-pipeline/release/${RELEASE_VERSION}/kfp.tar.gz --upgrade --user

env: RELEASE_VERSION=0.1.18
Collecting https://storage.googleapis.com/ml-pipeline/release/0.1.18/kfp.tar.gz
  Using cached https://storage.googleapis.com/ml-pipeline/release/0.1.18/kfp.tar.gz
Building wheels for collected packages: kfp
  Building wheel for kfp (setup.py) ... [?25ldone
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-jjhqnq0e/wheels/a8/be/46/964c241809fd50c2a15d63d0d61e4c48596c075dce5a83265c
Successfully built kfp
Installing collected packages: kfp
  Found existing installation: kfp 0.1.18
    Uninstalling kfp-0.1.18:
      Successfully uninstalled kfp-0.1.18
Successfully installed kfp-0.1.18
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


# Import kfp pkgs

In [2]:
import kfp
import kfp.dsl as dsl
import kfp.compiler as compiler
from kubernetes import client as k8s_client

# List existing pipeline experiments

In [3]:
client = kfp.Client()
client.list_experiments()

{'experiments': [{'created_at': datetime.datetime(2020, 2, 21, 6, 43, 2, tzinfo=tzutc()),
                  'description': 'All runs created without specifying an '
                                 'experiment will be grouped here.',
                  'id': '4728fb43-7096-454f-b98d-c915ae28f8f8',
                  'name': 'Default'},
                 {'created_at': datetime.datetime(2020, 2, 23, 10, 32, 53, tzinfo=tzutc()),
                  'description': None,
                  'id': '0eac974c-fa9c-4082-9479-c6878d17a30e',
                  'name': 'Dkube - Nucleoplasm_Cytosol pl min'}],
 'next_page_token': None,
 'total_size': 2}

# Create a Dkube Nucleoplasm_Cytosol experiment

In [4]:
Nucleoplasm_Cytosol_experiment = client.create_experiment(name='Dkube - NC pl')

# Define e2e Nucleoplasm_Cytosol Pipeline with Dkube components

In [7]:
import kfp.dsl as dsl
from kfp import components
from kubernetes import client as k8s_client

import os
import json
from random import randint

dkube_preprocess_op         = components.load_component_from_file("../components/preprocess/component.yaml")
dkube_split_op              = components.load_component_from_file("../components/split/component.yaml")
dkube_training_op           = components.load_component_from_file("../components/training/component.yaml")
dkube_evaluate_op         = components.load_component_from_file("../components/evaluation/component.yaml")
dkube_serving_op            = components.load_component_from_file("../components/serving/component.yaml")
dkube_viewer_op             = components.load_component_from_file('../components/viewer/component.yaml')

@dsl.pipeline(
    name='dkube-NC-pl',
    description='sample protien cell classification pipeline with dkube components'
)

def d3pipeline(
    #cellular preprocess
    cellular_preprocess_script="python cell-classification/preprocessing/merge.py",
    cellular_preprocess_datasets=json.dumps(["cellular-images"]),
    cellular_preprocess_input_mounts=json.dumps(["/opt/dkube/input"]),
    cellular_preprocess_outputs=json.dumps(["cellular-preprocessed"]),
    cellular_preprocess_output_mounts=json.dumps(["/opt/dkube/output"]),
    
    #cellular split
    cellular_split_script="python cell-classification/split/annot_split.py",
    cellular_split_datasets=json.dumps(["cellular-preprocessed"]),
    cellular_split_input_mounts=json.dumps(["/opt/dkube/input"]),
    cellular_split_outputs=json.dumps(["cellular-train", "cellular-test"]),
    cellular_split_output_mounts=json.dumps(["/opt/dkube/output/train", "/opt/dkube/output/test"]),
    
    #Training
    #In notebook DKUBE_USER_ACCESS_TOKEN is automatically picked up from env variable
    auth_token  = os.getenv("DKUBE_USER_ACCESS_TOKEN"),
    #By default tf v1.14 image is used here, v1.13 or v1.14 can be used. 
    #Or any other custom image name can be supplied.
    #For custom private images, please input username/password
    training_container=json.dumps({'image':'docker.io/ocdr/d3-datascience-tf-cpu:v1.14', 'username':'', 'password': ''}),
    #Name of the workspace in dkube. Update accordingly if different name is used while creating a workspace in dkube.
    training_program="cellular",
    #Script to run inside the training container    
    training_script="python cell-classification/model/train_frcnn.py -o simple -p /opt/dkube/input/annot.txt --hf --vf --rot --num_epochs 1",
    #Input datasets for training. Update accordingly if different name is used while creating dataset in dkube.    
    training_datasets=json.dumps(["cellular-train"]),
    training_input_dataset_mounts=json.dumps(["/opt/dkube/input/"]),
    training_outputs=json.dumps(["faster-rcnn"]),
    training_output_mounts=json.dumps(["/opt/dkube/output"]),
    #Request gpus as needed. Val 0 means no gpu, then training_container=docker.io/ocdr/dkube-datascience-tf-cpu:v1.12    
    training_gpus=0,
    #Any envs to be passed to the training program    
    training_envs=json.dumps([{"steps": 100}]),
    
    #Evaluation
    evaluation_script="python cell-classification/model/evaluate.py --path /opt/dkube/input/annot.txt",
    evaluation_datasets=json.dumps(["cellular-test"]),
    evaluation_input_dataset_mounts=json.dumps(["/opt/dkube/input/"]),
    evaluation_models=json.dumps(["faster-rcnn"]),
    evaluation_input_model_mounts=json.dumps(["/opt/dkube/model"]),
    
    #Serving
    #Device to be used for serving - dkube mnist example trained on gpu needs gpu for serving else set this param to 'cpu'
    serving_device='cpu',
    serving_container=json.dumps({'image':'docker.io/ocdr/new-preprocess:satish', 'username':'', 'password': ''})):
    
    cellular_preprocess  = dkube_preprocess_op(auth_token, training_container,
                                      program=training_program, run_script=cellular_preprocess_script,
                                      datasets=cellular_preprocess_datasets, outputs=cellular_preprocess_outputs,
                                      input_dataset_mounts=cellular_preprocess_input_mounts, output_mounts=cellular_preprocess_output_mounts)


    cellular_split  = dkube_split_op(auth_token, training_container,
                                      program=training_program, run_script=cellular_split_script,
                                      datasets=cellular_split_datasets, outputs=cellular_split_outputs,
                                      input_dataset_mounts=cellular_split_input_mounts,
                                      output_mounts=cellular_split_output_mounts).after(cellular_preprocess)
                                      
                                    
    train       = dkube_training_op(auth_token, training_container,
                                    program=training_program, run_script=training_script,
                                    datasets=training_datasets, outputs=training_outputs,
                                    input_dataset_mounts=training_input_dataset_mounts,
                                    output_mounts=training_output_mounts,
                                    ngpus=training_gpus,
                                    envs=training_envs).after(cellular_split)
                                    
    evaluate    = dkube_evaluate_op(auth_token, training_container,
                                    program=training_program, run_script=evaluation_script,
                                    datasets=evaluation_datasets,
                                    input_dataset_mounts=evaluation_input_dataset_mounts,
                                    models=evaluation_models,
                                    input_model_mounts=evaluation_input_model_mounts,
                                    ngpus=training_gpus,
                                    envs=training_envs).after(train)
    # serving     = dkube_serving_op(auth_token, train.outputs['artifact'], device=serving_device, serving_container=serving_container).after(evaluate)
    #inference   = dkube_viewer_op(auth_token, serving.outputs['servingurl'],
    #                              'digits', viewtype='inference').after(serving)


# Compile and generate tar ball

In [8]:
compiler.Compiler().compile(d3pipeline, 'dkube_NC_pl.tar.gz')
# Upload this generated tarball into the Pipelines UI

# Create and Run pipeline

#### Click the dkube-training stage to see the enhanced Dkube Datascience dashboard, metrics and graphs. Click the dkube-viewer stage for the simple UI to test the model predecitions

In [50]:
run = client.run_pipeline(regression_experiment.id, 'Nucleoplasm_Cytosol_pipeline', 'dkube_Nucleoplasm_Cytosol_pl.tar.gz', params={})