In [None]:
import sys
!{sys.executable} -m pip install kfp >/dev/null

In [None]:
import os, json
import kfp
import kfp.compiler as compiler

In [None]:
existing_token = os.getenv("DKUBE_USER_ACCESS_TOKEN")
client = kfp.Client(existing_token=existing_token)

# Input and output parameters for the pipeline run

In [None]:
project_id = "ynli7c"
image = "docker.io/ocdr/dkube-datascience-tf-cpu:fs-v2.0.0"
ptrain_dataset = 'titanic-train'
ptest_dataset = 'titanic-test'
dataset_mount_points = ["/opt/dkube/input/train", "/opt/dkube/input/test"]
train_fs = 'titanic-train-fs'
test_fs = 'titanic-test-fs'
output_featureset_mount_points = ["/opt/dkube/output/train", "/opt/dkube/output/test"]
training_program = 'titanic'
preprocessing_script = f"python preprocessing.py --train_fs {train_fs} --test_fs {test_fs}"
training_script = "python training.py"
train_inp_mount_points = ["/titanic-train","/titanic-test"]
train_out_mount_points = ["/model"]
model = 'titanic'
user = os.getenv('USERNAME')

# Loading components

In [None]:
components_url = "https://raw.githubusercontent.com/oneconvergence/dkube/master/components/"
dkube_preprocessing_op = kfp.components.load_component_from_url(components_url + "preprocess/component.yaml")
dkube_training_op = kfp.components.load_component_from_url(components_url + "training/component.yaml")
dkube_storage_op  = kfp.components.load_component_from_url(components_url + "storage/component.yaml")
dkube_submit_op = kfp.components.load_component_from_url(components_url + "submit/component.yaml")
runid = 0

# Pipeline Definition

In [None]:
@kfp.dsl.pipeline(
    name='dkube-titanic-pl',
    description='example titanic pipeline to submit to leaderboard'
)
def titanic_pipeline(token, project_id):

    preprocessing = dkube_preprocessing_op(token, json.dumps({"image": image}),
                                           program=training_program, run_script=preprocessing_script,
                                           datasets=json.dumps([ptrain_dataset, ptest_dataset]), 
                                           output_featuresets=json.dumps([train_fs, test_fs]),
                                           input_dataset_mounts=json.dumps(dataset_mount_points), 
                                           output_featureset_mounts=json.dumps(output_featureset_mount_points)
                                            )

    with kfp.dsl.ExitHandler(exit_op=dkube_storage_op("reclaim", token, namespace="kubeflow",uid="{{workflow.uid}}")):
        input_volumes = json.dumps(["{{workflow.uid}}-dataset@dataset://" + user + ":" + ptest_dataset,
                                    "{{workflow.uid}}-model@model://" + model,
                                    "{{workflow.uid}}-code@program://" + training_program
                                    ])
        train       = dkube_training_op(token, json.dumps({"image": image}),
                                    framework="sklearn", version="0.23.2",
                                    program=training_program, run_script=training_script,
                                    featuresets=json.dumps([train_fs, test_fs]), outputs=json.dumps([model]),
                                    input_featureset_mounts=json.dumps(train_inp_mount_points),
                                    output_mounts=json.dumps(train_out_mount_points)).after(preprocessing)
        storage  = dkube_storage_op("export", token, namespace="kubeflow", input_volumes=input_volumes).after(train)

    
        predict_op = kfp.dsl.ContainerOp(
            name="predict",
            image=image,
            command=["python", "/code/predict.py"],
            pvolumes={"/titanic-test/": kfp.dsl.PipelineVolume(pvc="{{workflow.uid}}-dataset"),
                     "/model/": kfp.dsl.PipelineVolume(pvc="{{workflow.uid}}-model"),
                     "/code/": kfp.dsl.PipelineVolume(pvc="{{workflow.uid}}-code")
                     },
            file_outputs={"output": "/tmp/prediction.csv"},
        ).after(storage)

        predictions = kfp.dsl.InputArgumentPath(predict_op.outputs["output"])

        submit = dkube_submit_op(token, project_id, predictions=predict_op.outputs["output"]).after(predict_op)

# Compling pipeline into tar file

In [None]:
pl_file_name = 'train_pl.tar.gz'
pl_name = '[titanic] pipeline demo full'
compiler.Compiler().compile(titanic_pipeline, pl_file_name)
# Upload this generated tarball into the Pipelines UI

# Uploading Pipeline

In [None]:
client = kfp.Client(existing_token=existing_token)
try:
  client.upload_pipeline(pipeline_package_path = pl_file_name, pipeline_name = pl_name, description = None)
except BaseException as e:
    print(e)

# Creating experiment

In [None]:
try:
    titanic_experiment = client.create_experiment(name='[titanic] experiment')
except BaseException as e:
    print(e)

# Creating run from pipeline under the titanic experiment

In [None]:
try:
    pipeline_id = client.get_pipeline_id(pl_name)
    run = client.run_pipeline(titanic_experiment.id, "[titanic] Run" + str(runid), None, pipeline_id=pipeline_id,
                              params={"token":existing_token, "project_id":project_id})
except BaseException as e:
    print(e)
runid += 1

In [None]:
# client = kfp.Client(existing_token=existing_token)
# #Replace [titanic] & wprz8s with your project name and id respectively.
# client.create_run_from_pipeline_func(titanic_pipeline, run_name="[titanic] Run" + str(runid), arguments={"token":existing_token,"project_id":"ynli7c"}, experiment_name = titanic_experiment.name)
# runid += 1