In [None]:
import sys
!{sys.executable} -m pip install kfp >/dev/null

In [None]:
import os, json
import kfp
import kfp.compiler as compiler
import random
import string

generate = lambda hint: "{}-{}".format(hint, ''.join([random.choice(string.digits) for n in range(4)]))

In [None]:
existing_token = os.getenv("DKUBE_USER_ACCESS_TOKEN")
client = kfp.Client(existing_token=existing_token)

# Input and output parameters for the pipeline run

In [None]:
# Project owner resources
project_id = os.getenv("DKUBE_PROJECT_ID")
project_owner = os.getenv("DKUBE_PROJECT_OWNER")
project_name = os.getenv("DKUBE_PROJECT_NAME")
ptrain_dataset = 'titanic-train-ds'
ptest_dataset = 'titanic-test-ds'

assert project_id != "", "Please launch IDE under project or set above variables manually"

# User specific resources
train_fs = f"titanic-train-fs-{project_owner}"
test_fs = f"titanic-test-fs-{project_owner}"
training_program = 'titanic-code-user'
model = 'titanic-model-user'

# Program specific variables
image = "docker.io/ocdr/dkube-datascience-tf-cpu:v2.0.0"
dataset_mount_points = ["/opt/dkube/input/train", "/opt/dkube/input/test"]
output_featureset_mount_points = ["/opt/dkube/output/train", "/opt/dkube/output/test"]
preprocessing_script = f"python preprocessing.py --train_fs {train_fs} --test_fs {test_fs}"
training_script = f"python training.py --train_fs {train_fs}"
predict_script = "python predict.py"
train_inp_mount_points = ["/titanic-train","/titanic-test"]
train_out_mount_points = ["/model"]

user = os.getenv('USERNAME')

# Loading components

In [None]:
components_url = "/mnt/dkube/pipeline/components/"
dkube_preprocessing_op = kfp.components.load_component_from_file(components_url + "preprocess/component.yaml")
dkube_training_op = kfp.components.load_component_from_file(components_url + "training/component.yaml")
dkube_job_op  = kfp.components.load_component_from_file(components_url + "job/component.yaml")
dkube_submit_op = kfp.components.load_component_from_file(components_url + "submit/component.yaml")

# Pipeline Definition

In [None]:
@kfp.dsl.pipeline(
    name='dkube-titanic-pl',
    description='example titanic pipeline to submit to leaderboard'
)
def titanic_pipeline(token, project_id):

    preprocessing = dkube_preprocessing_op(token, json.dumps({"image": image}),
                                           tags=json.dumps([f"project:{project_id}"]),
                                           program=training_program, run_script=preprocessing_script,
                                           datasets=json.dumps([ptrain_dataset, ptest_dataset]), 
                                           output_featuresets=json.dumps([train_fs, test_fs]),
                                           input_dataset_mounts=json.dumps(dataset_mount_points), 
                                           output_featureset_mounts=json.dumps(output_featureset_mount_points)
                                            ).set_display_name("Preprocessing")

    train       = dkube_training_op(token, json.dumps({"image": image}),
                                    tags=json.dumps([f"project:{project_id}"]),
                                    framework="sklearn", version="0.23.2",
                                    program=training_program, run_script=training_script,
                                    featuresets=json.dumps([train_fs, test_fs]), outputs=json.dumps([model]),
                                    input_featureset_mounts=json.dumps(train_inp_mount_points),
                                    output_mounts=json.dumps(train_out_mount_points)).after(preprocessing).set_display_name("Training")


    predict_op = dkube_job_op(
        "predict", token, json.dumps({"image": image}),
        tags=json.dumps([f"project:{project_id}"]),
        program=training_program, run_script=predict_script,
        featuresets=json.dumps([test_fs]),input_featureset_mounts=json.dumps(["/data"]),
        models=json.dumps([model]), input_model_mounts=json.dumps(["/model"]),
        file_outputs={"output": "/output/prediction.csv"},
    ).after(train).set_display_name("Predict")

    predictions = kfp.dsl.InputArgumentPath(predict_op.outputs["output"])

    submit = dkube_submit_op(token, project_id, predictions=predict_op.outputs["output"]).after(predict_op).set_display_name("Submit")

# Compling pipeline into tar file

In [None]:
pl_file_name = 'train_pl.tar.gz'
pl_name = f'[{project_name}]' + f'-{user}-' + generate('pl') 

compiler.Compiler().compile(titanic_pipeline, pl_file_name)
# Upload this generated tarball into the Pipelines UI

# Creating experiment

In [None]:
experiment_name = f'[{project_name}]' + f'-{user}-' + 'exp'
try:
    titanic_experiment = client.create_experiment(name=experiment_name)
except BaseException as e:
    print(e)

# Uploading Pipeline

In [None]:
client = kfp.Client(existing_token=existing_token)
try:
    pipeline = client.upload_pipeline(pipeline_package_path = pl_file_name, 
                                  pipeline_name = pl_name, description = None)
except BaseException as e:
    print(e)

# Creating run from pipeline under the titanic experiment

In [None]:
run_name = f"[{project_name}]" + f'-{user}-' + generate("run")
try:
    run = client.run_pipeline(titanic_experiment.id, run_name, pipeline_id=pipeline.id,
                              params={"token":existing_token, "project_id":project_id})
except BaseException as e:
    print(e)
