# Installing kubeflow pipelines client

In [None]:
import sys,json, os, random, string
job_class = os.getenv("DKUBE_JOB_CLASS")
if not job_class:
    !{sys.executable} -m pip install kfp==1.4.0 kfp-server-api==1.2.0 --upgrade >> /dev/null

# Import kfp pkgs

In [None]:
import kfp
import kfp.dsl as dsl
import kfp.compiler as compiler
from kubernetes import client as k8s_client
import os

In [None]:
setup_component = '''
name: create_dkube_resource
description: |
    creates dkube resources required for pipeline.
metadata:
  annotations: {platform: 'Dkube'}
  labels: {stage: 'create_dkube_resource', logger: 'dkubepl', wfid: '{{workflow.uid}}', runid: '{{pod.name}}'}
inputs:
  - {name: token,      type: String,   optional: false,
    description: 'Required. Dkube authentication token.'}
  - {name: user,      type: String,   optional: false,
    description: 'Required. Dkube Logged in User name.'}
  - {name: project_id,      type: String,   optional: false,
    description: 'Required. Dkube Project name.'}
implementation:
  container:
    image: docker.io/ocdr/dkube-examples-setup:cli-reg
    command: ['python3', 'regressionsetup.py']
    args: [
      --auth_token, {inputValue: token},
      --user, {inputValue: user},
      --project_id, {inputValue: project_id}
    ]
'''

## Checking project and creating if doesn't exists

In [None]:
project_id = os.environ.get("DKUBE_PROJECT_ID", "")
if not project_id:
    project_name = "clinical-reg"
    from dkube.sdk import *
    DKUBE_URL = os.getenv("DKUBE_URL")
    DKUBE_TOKEN = os.getenv("DKUBE_USER_ACCESS_TOKEN","")
    api = DkubeApi(URL=DKUBE_URL,token=DKUBE_TOKEN)
    try:
        project = DkubeProject(project_name)
        res = api.create_project(project)
    except Exception as e:
        if e.reason.lower()=="conflict":
            print(f"Project {project_name} already exists kindly use a different name")
    project_id = res["id"]
else:
    project_name = os.environ.get("DKUBE_PROJECT_NAME")
run_id = 0

# Define e2e regression Pipeline with Dkube components

In [None]:
import kfp.dsl as dsl
from kfp import components
from kfp.components._yaml_utils import load_yaml
from kfp.components._yaml_utils import dump_yaml
from kubernetes import client as k8s_client

import os
import json
from random import randint

def _component(stage, name):
    with open('/mnt/dkube/pipeline/components/{}/component.yaml'.format(stage), 'rb') as stream:
        cdict = load_yaml(stream)
        cdict['name'] = name
        cyaml = dump_yaml(cdict)
        return components.load_component_from_text(cyaml)
        
setup_op = kfp.components.load_component(text = setup_component)

@dsl.pipeline(
    name='dkube-regression-pl',
    description='sample regression pipeline with dkube components'
)

def d3pipeline(
    user,
    auth_token,
    project_id,
    tags,
    #Clinical preprocess
    clinical_preprocess_script="python cli-pre-processing.py",
    clinical_preprocess_datasets=json.dumps(["clinical"]),
    clinical_preprocess_input_mounts=json.dumps(["/opt/dkube/input"]),
    clinical_preprocess_outputs=json.dumps(["clinical-preprocessed"]),
    clinical_preprocess_output_mounts=json.dumps(["/opt/dkube/output"]),
    
    #Image preprocess
    image_preprocess_script="python img-pre-processing.py",
    image_preprocess_datasets=json.dumps(["images"]),
    image_preprocess_input_mounts=json.dumps(["/opt/dkube/input"]),
    image_preprocess_outputs=json.dumps(["images-preprocessed"]),
    image_preprocess_output_mounts=json.dumps(["/opt/dkube/output"]),
    
    #Clinical split
    clinical_split_script="python split.py --datatype clinical",
    clinical_split_datasets=json.dumps(["clinical-preprocessed"]),
    clinical_split_input_mounts=json.dumps(["/opt/dkube/input"]),
    clinical_split_outputs=json.dumps(["clinical-train", "clinical-test", "clinical-val"]),
    clinical_split_output_mounts=json.dumps(["/opt/dkube/outputs/train", "/opt/dkube/outputs/test", "/opt/dkube/outputs/val"]),
    
    #Image split
    image_split_script="python split.py --datatype image",
    image_split_datasets=json.dumps(["images-preprocessed"]),
    image_split_input_mounts=json.dumps(["/opt/dkube/input"]),
    image_split_outputs=json.dumps(["images-train", "images-test", "images-val"]),
    image_split_output_mounts=json.dumps(["/opt/dkube/outputs/train", "/opt/dkube/outputs/test", "/opt/dkube/outputs/val"])	,
    
    #RNA split
    rna_split_script="python split.py --datatype rna",
    rna_split_datasets=json.dumps(["rna"]),
    rna_split_input_mounts=json.dumps(["/opt/dkube/input"]),
    rna_split_outputs=json.dumps(["rna-train", "rna-test", "rna-val"]),
    rna_split_output_mounts=json.dumps(["/opt/dkube/outputs/train", "/opt/dkube/outputs/test", "/opt/dkube/outputs/val"]),
    
    #Training
    job_group = 'default',
    #Framework. One of tensorflow, pytorch, sklearn
    framework = "tensorflow",
    #Framework version
    version = "2.3.0",
    #In notebook DKUBE_USER_ACCESS_TOKEN is automatically picked up from env variable
    #Or any other custom image name can be supplied.
    #For custom private images, please input username/password
    training_container=json.dumps({'image':'ocdr/dkube-datascience-tf-cpu:v2.3.0-17'}),
    #Name of the workspace in dkube. Update accordingly if different name is used while creating a workspace in dkube.
    training_program="regression",
    #Script to run inside the training container    
    training_script="python train_nn.py --epochs 5",
    #Input datasets for training. Update accordingly if different name is used while creating dataset in dkube.    
    training_datasets=json.dumps(["clinical-train", "clinical-val", "images-train",
                                  "images-val", "rna-train", "rna-val"]),
    training_input_dataset_mounts=json.dumps(["/opt/dkube/inputs/train/clinical", "/opt/dkube/inputs/val/clinical",
                                      "/opt/dkube/inputs/train/images", "/opt/dkube/inputs/val/images",
                                      "/opt/dkube/inputs/train/rna", "/opt/dkube/inputs/val/rna"]),
    training_outputs=json.dumps(["regression-model"]),
    training_output_mounts=json.dumps(["/opt/dkube/output"]),
    #Request gpus as needed. Val 0 means no gpu, then training_container=docker.io/ocdr/dkube-datascience-tf-cpu:v1.12    
    training_gpus=0,
    #Any envs to be passed to the training program    
    training_envs=json.dumps([{"steps": 100}]),
    
    tuning=json.dumps({}),
    
    #Evaluation
    evaluation_script="python evaluate.py",
    evaluation_datasets=json.dumps(["clinical-test", "images-test", "rna-test"]),
    evaluation_input_dataset_mounts=json.dumps(["/opt/dkube/inputs/test/clinical", "/opt/dkube/inputs/test/images",
                                      "/opt/dkube/inputs/test/rna"]),
    evaluation_models=json.dumps(["regression-model"]),
    evaluation_input_model_mounts=json.dumps(["/opt/dkube/inputs/model"]),
    
    #Serving
    #Device to be used for serving - dkube mnist example trained on gpu needs gpu for serving else set this param to 'cpu'
    serving_device='cpu',
    #Serving image
    serving_image=json.dumps({'image':'ocdr/tensorflowserver:2.3.0'}),
    #Transformer image
    transformer_image=json.dumps({'image':'ocdr/dkube-datascience-tf-cpu:v2.3.0-17'}),
    #Script to execute the transformer
    transformer_code="clinical_reg/transformer.py"):
    
    create_resource = setup_op(user = user, token = auth_token, project_id = project_id)
    
    create_resource.execution_options.caching_strategy.max_cache_staleness = "P0D"
    
    clinical_preprocess = _component('preprocess', 'clinical-preprocess')(container=training_container,
                                      tags=tags, program=training_program, run_script=clinical_preprocess_script,
                                      datasets=clinical_preprocess_datasets, outputs=clinical_preprocess_outputs,
                                      input_dataset_mounts=clinical_preprocess_input_mounts, output_mounts=clinical_preprocess_output_mounts).after(create_resource)
    image_preprocess  = _component('preprocess', 'images-preprocess')(container=training_container,
                                      tags=tags, program=training_program, run_script=image_preprocess_script,
                                      datasets=image_preprocess_datasets, outputs=image_preprocess_outputs,
                                      input_dataset_mounts=image_preprocess_input_mounts, output_mounts=image_preprocess_output_mounts).after(create_resource)
                                      
    clinical_split  = _component('preprocess', 'clinical-split')(container=training_container,
                                      tags=tags, program=training_program, run_script=clinical_split_script,
                                      datasets=clinical_split_datasets, outputs=clinical_split_outputs,
                                      input_dataset_mounts=clinical_split_input_mounts,
                                      output_mounts=clinical_split_output_mounts).after(clinical_preprocess)
                                      
    image_split  = _component('preprocess', 'images-split')(container=training_container,
                                      tags=tags, program=training_program, run_script=image_split_script,
                                      datasets=image_split_datasets, outputs=image_split_outputs,
                                      input_dataset_mounts=image_split_input_mounts,
                                      output_mounts=image_split_output_mounts).after(image_preprocess)
                                      
    rna_split  = _component('preprocess', 'rna-split')(container=training_container,
                                      tags=tags, program=training_program, run_script=rna_split_script,
                                      datasets=rna_split_datasets, outputs=rna_split_outputs,
                                      input_dataset_mounts=rna_split_input_mounts, output_mounts=rna_split_output_mounts).after(create_resource)
                                      
    train       = _component('training', 'regression-model-training')(container=training_container,
                                    tags=tags, program=training_program, run_script=training_script,
                                    datasets=training_datasets, outputs=training_outputs,
                                    input_dataset_mounts=training_input_dataset_mounts,
                                    output_mounts=training_output_mounts,
                                    ngpus=training_gpus,
                                    envs=training_envs,
                                    tuning=tuning, job_group=job_group,
                                    framework=framework, version=version).after(clinical_split).after(image_split).after(rna_split)
    serving     = _component('serving', 'model-serving')(model=train.outputs['artifact'], device=serving_device,
                                serving_image=serving_image,
                                transformer_image=transformer_image,
                                transformer_project=training_program,
                                transformer_code=transformer_code).after(train)
    inference   = _component('viewer', 'model-inference')(servingurl=serving.outputs['servingurl'],
                                 servingexample='regression', viewtype='inference').after(serving)

# Compile and generate tar ball

In [None]:
pipeline_filename = 'dkube_regression_pl_full.tar.gz'
pipeline_name = 'Regression Pipeline'
compiler.Compiler().compile(d3pipeline, pipeline_filename)

## Upload Pipeline

In [None]:
existing_token = os.getenv("DKUBE_USER_ACCESS_TOKEN")
client = kfp.Client(host=os.getenv("KF_PIPELINES_ENDPOINT"), existing_token=existing_token, namespace=os.getenv("USERNAME"))
try:
  client.upload_pipeline(pipeline_package_path = pipeline_filename, pipeline_name = pipeline_name, description = None)
except BaseException as e:
    print(e)

In [None]:
res = ''.join(random.choices(string.ascii_lowercase + string.digits, k=4))
tags = json.dumps([f"project:{project_id}"])
project_name = os.environ.get("DKUBE_PROJECT_NAME")
run_name = f"[{project_name}] Clinical Reg {run_id}"
experiment = f"[{project_name}] experiment"

## Create regression experiment

In [None]:
client.list_experiments()
# Create a new experiment
try:
    clinical_experiment = client.create_experiment(name=experiment)
except BaseException as e:
    print(e)

## Create a run

In [None]:
user = os.getenv("USER")
auth_token = existing_token

try:
    run = client.create_run_from_pipeline_func(d3pipeline,
                                               run_name=run_name,
                                               experiment_name=experiment,
                                               arguments={"user":user,
                                                          "auth_token":auth_token,
                                                          "project_id":project_id,
                                                          "tags":tags})
except BaseException as e:
    print(e)