In [None]:
import sys
!{sys.executable} -m pip install kfp >/dev/null

In [None]:
import os, json
import kfp
import kfp.compiler as compiler

In [None]:
existing_token = os.getenv("DKUBE_USER_ACCESS_TOKEN")
client = kfp.Client(existing_token=existing_token)

# Input and output parameters for the pipeline run

In [None]:
# Project owner resources
project_id = "pjti25"
project_owner = "ocdkube"
project_name = f"titanic-{project_owner}"
ptrain_dataset = 'titanic-train-ds'
ptest_dataset = 'titanic-test-ds'

# User specific resources
train_fs = f"titanic-train-fs-{project_owner}"
test_fs = f"titanic-test-fs-{project_owner}"
training_program = 'titanic-code-user'
model = 'titanic-model-user'

# Program specific variables
image = "docker.io/ocdr/dkube-datascience-tf-cpu:v2.0.0"
dataset_mount_points = ["/opt/dkube/input/train", "/opt/dkube/input/test"]
output_featureset_mount_points = ["/opt/dkube/output/train", "/opt/dkube/output/test"]
preprocessing_script = f"python preprocessing.py --train_fs {train_fs} --test_fs {test_fs}"
training_script = f"python training.py --train_fs {train_fs}"
train_inp_mount_points = ["/titanic-train","/titanic-test"]
train_out_mount_points = ["/model"]

user = os.getenv('USERNAME')

# Loading components

In [None]:
components_url = "https://raw.githubusercontent.com/oneconvergence/dkube/master/components/"
dkube_preprocessing_op = kfp.components.load_component_from_url(components_url + "preprocess/component.yaml")
dkube_training_op = kfp.components.load_component_from_url(components_url + "training/component.yaml")
runid = 0

# Pipeline Definition

In [None]:
@kfp.dsl.pipeline(
    name='dkube-titanic-pl',
    description='example titanic pipeline to submit to leaderboard'
)
def titanic_pipeline(token, project_id):

    preprocessing = dkube_preprocessing_op(token, json.dumps({"image": image}),
                                           program=training_program, run_script=preprocessing_script,
                                           datasets=json.dumps([ptrain_dataset, ptest_dataset]), 
                                           output_featuresets=json.dumps([train_fs, test_fs]),
                                           input_dataset_mounts=json.dumps(dataset_mount_points), 
                                           output_featureset_mounts=json.dumps(output_featureset_mount_points)
                                            )

    train       = dkube_training_op(token, json.dumps({"image": image}),
                                framework="sklearn", version="0.23.2",
                                program=training_program, run_script=training_script,
                                featuresets=json.dumps([train_fs, test_fs]), outputs=json.dumps([model]),
                                input_featureset_mounts=json.dumps(train_inp_mount_points),
                                output_mounts=json.dumps(train_out_mount_points)).after(preprocessing)

# Compling pipeline into tar file

In [None]:
pl_file_name = 'titanic_pre_train_pl.tar.gz'
pl_name = "[titanic] pipeline demo"
compiler.Compiler().compile(titanic_pipeline, pl_file_name)
# Upload this generated tarball into the Pipelines UI

# Uploading Pipeline

In [None]:
client = kfp.Client(existing_token=existing_token)
try:
  client.upload_pipeline(pipeline_package_path = pl_file_name, pipeline_name = pl_name, description = None)
except BaseException as e:
    print(e)

# Creating experiment

In [None]:
try:
    titanic_experiment = client.create_experiment(name='[titanic] experiment')
except BaseException as e:
    print(e)

# Creating run from pipeline under the titanic experiment

In [None]:
try:
    run = client.run_pipeline(titanic_experiment.id, "[titanic] Run" + str(runid), pipeline_package_path='train_pl.tar.gz',
                              params={"token":existing_token, "project_id":project_id})
except BaseException as e:
    print(e)
runid += 1

In [None]:
# client = kfp.Client(existing_token=existing_token)
# #Replace [titanic] & wprz8s with your project name and id respectively.
# client.create_run_from_pipeline_func(titanic_pipeline, run_name="[titanic] Run" + str(runid), arguments={"token":existing_token,"project_id":"ynli7c"}, experiment_name = titanic_experiment.name)
# runid += 1