# Install pipelines SDK

#### Please wait till this cell completes and then run next cells. This just need to be run once per active kernel.

In [None]:
%env RELEASE_VERSION=1.0.0
!pip install https://storage.googleapis.com/ml-pipeline/release/${RELEASE_VERSION}/kfp.tar.gz --upgrade

# Import kfp pkgs

In [None]:
import os
import kfp
import kfp.dsl as dsl
import kfp.compiler as compiler
from kubernetes import client as k8s_client

# List existing pipeline experiments

In [None]:
client = kfp.Client()
client.list_experiments()

# Define e2e MNIST Pipeline with Dkube components

In [None]:
import kfp.dsl as dsl
from kfp import components
from kubernetes import client as k8s_client

import os
import json
from random import randint

dkube_preprocessing_op      = components.load_component_from_file("../components/preprocess/component.yaml")
dkube_training_op           = components.load_component_from_file("../components/training/component.yaml")
dkube_serving_op            = components.load_component_from_file("../components/serving/component.yaml")
dkube_viewer_op             = components.load_component_from_file('../components/viewer/component.yaml')

@dsl.pipeline(
    name='mnist',
    description='sample mnist digits pipeline with dkube components'
)
def d3pipeline(
    #Authentication token
    auth_token,
    #Name of the project in dkube
    training_program,
    #Dataset for preprocessing
    preprocessing_dataset,
    #Featureset for training
    training_featureset,
    #Output model 
    training_output_model,
    #Dkube authentication token
    data_preprocess_script = "pip3 install pyarrow --user && python featureset.py",
    data_preprocess_input_mounts = "/opt/dkube/input",
    data_preprocess_output_mounts = "/opt/dkube/output",
    #By default 'default' is used as the job group for runs
    job_group = 'default',
    #Framework. One of tensorflow, pytorch, sklearn
    framework = "tensorflow",
    #Framework version
    version = "1.14",
    #By default tf v1.14 image is used here, v1.13 or v1.14 can be used.
    #Or any other custom image name can be supplied.
    #For custom private images, please input username/password
    training_container=json.dumps({'image':'docker.io/ocdr/d3-datascience-tf-cpu:v1.14', 'username':'', 'password': ''}),
    #Script to run inside the training container
    training_script="pip3 install pyarrow --user && python model.py",
    #Input dataset mount path
    training_input_dataset_mount="/opt/dkube/input",
    #Output dataset mount paths
    training_output_mount="/opt/dkube/output",
    #Request gpus as needed. Val 0 means no gpu, then training_container=docker.io/ocdr/dkube-datascience-tf-cpu:v1.12
    training_gpus=0,
    #Any envs to be passed to the training program
    training_envs=json.dumps([{"steps": 100}]),
    #Hyperparameter tuning info
    tuning=json.dumps({}),
    #Device to be used for serving - dkube mnist example trained on gpu needs gpu for serving else set this param to 'cpu'
    serving_device='cpu',
    #Serving image
    serving_image=json.dumps({'image':'ocdr/tensorflowserver:1.14', 'username':'', 'password': ''}),
    #Transformer image
    transformer_image=json.dumps({'image':'docker.io/ocdr/d3-datascience-tf-cpu:v1.14', 'username':'', 'password': ''}),
    #Script to execute the transformer
    transformer_code="tensorflow/classification/mnist-fs/digits/transformer/transformer.py"):
    
    preprocess = dkube_preprocessing_op(auth_token, training_container,
                                      program=training_program, run_script=data_preprocess_script,
                                      datasets=json.dumps([str(preprocessing_dataset)]), outputs=json.dumps([str(training_featureset)]),
                                      input_dataset_mounts=json.dumps([str(data_preprocess_input_mounts)]), output_mounts=json.dumps([str(data_preprocess_output_mounts)]))

    train      = dkube_training_op(auth_token, training_container,
                                    program=training_program, run_script=training_script,
                                    datasets=json.dumps([str(training_featureset)]), outputs=json.dumps([str(training_output_model)]),
                                    input_dataset_mounts=json.dumps([str(training_input_dataset_mount)]),
                                    output_mounts=json.dumps([str(training_output_mount)]),
                                    ngpus=training_gpus, envs=training_envs,
                                    tuning=tuning, job_group=job_group,
                                    framework=framework, version=version).after(preprocess)
    serving    = dkube_serving_op(auth_token, train.outputs['artifact'],
                                device=serving_device, serving_image=serving_image,
                                transformer_image=transformer_image,
                                transformer_project=training_program,
                                transformer_code=transformer_code).after(train)

# Compile and generate tar ball

In [None]:
compiler.Compiler().compile(d3pipeline, 'dkube_mnist_pl.tar.gz')
# Upload this generated tarball into the Pipelines UI