In [None]:
import os, json
import kfp
from dkube.sdk import *
from dkube.pipelines import dkube_training_op, dkube_preprocessing_op, dkube_serving_op, dkube_storage_op, dkube_submit_op

In [None]:
token = os.getenv("DKUBE_USER_ACCESS_TOKEN")
client = kfp.Client(existing_token=token)
api = DkubeApi(token=token)

In [None]:
# Project owner resources
project_id = os.environ.get("DKUBE_PROJECT_ID", "wprz8s")
project_name = os.environ.get("DKUBE_PROJECT_NAME","titanic")
project_owner = os.environ.get("DKUBE_PROJECT_OWNER","ocdkube")
username = os.getenv("USERNAME","ocdkube")
ptrain_dataset = f'{project_owner}:titanic-train'
ptest_dataset = f'{project_owner}:titanic-test'

# User resources
code_name = f'{project_name}-code'
train_fs_name = f"{project_name}-train-fs-{username}"
test_fs_name = f"{project_name}-test-fs-{username}"
model_name = f'{project_name}-model'

# Program specific variables
image = "docker.io/ocdr/dkube-datascience-tf-cpu:fs-v2.0.0"
dataset_mount_points = ["/dataset/train", "/dataset/test"]
output_featureset_mount_points = ["/featureset/train", "/featureset/test"]
preprocessing_script = f"python titanic/preprocess.py --train_fs {train_fs_name} --test_fs {test_fs_name}"
training_script = "python training.py"
train_inp_mount_points = ["/titanic-train","/titanic-test"]
train_out_mount_points = ["/model"]
runid = 0

In [None]:
#Create code repo 
code = DkubeCode(username,code_name)
code.update_git_details(url="https://github.com/oneconvergence/dkube-examples-2.0.git", branch="tensorflow")
api.create_code(code)
#Create train/test featureset
api.create_featureset(DkubeFeatureSet(train_fs_name))
api.create_featureset(DkubeFeatureSet(test_fs_name))
#Create model
api.create_model(DkubeModel(username, model_name))

In [None]:
@kfp.dsl.pipeline(
    name='dkube-titanic-pl',
    description='example titanic pipeline to submit to leaderboard'
)
def titanic_pipeline(token, project_id):
    preprocessing = dkube_preprocessing_op(token, json.dumps({"image": image}),
                                            tags=json.dumps([f"project:{project_id}"]),
                                            program=code_name, run_script=preprocessing_script,
                                            datasets=json.dumps([ptrain_dataset, ptest_dataset]), 
                                            output_featuresets=json.dumps([train_fs_name, test_fs_name]),
                                            input_dataset_mounts=json.dumps(dataset_mount_points), 
                                            output_featureset_mounts=json.dumps(output_featureset_mount_points)

    with kfp.dsl.ExitHandler(exit_op=dkube_storage_op("reclaim", token)):
        train       = dkube_training_op(token, json.dumps({"image": image}),
                                        tags=json.dumps([f"project:{project_id}"]),
                                        framework="sklearn", version="0.23.2",
                                        program=code_name, run_script=training_script,
                                        featuresets=json.dumps([train_fs_name, test_fs_name]), outputs=json.dumps([model_name]),
                                        input_featureset_mounts=json.dumps(train_inp_mount_points),
                                        output_mounts=json.dumps(train_out_mount_points)).after(preprocessing)
        input_volumes = json.dumps([
                                    "{{workflow.uid}}-model@model://" + model_name,
                                    "{{workflow.uid}}-code@program://" + code_name,
                                    "{{workflow.uid}}-featureset@featureset://" + test_fs_name
                                    ])

        storage  = dkube_storage_op("export", token , input_volumes=input_volumes).after(train)
    
        predict_op = kfp.dsl.ContainerOp(
            name="predict", image=image,
            command=["python", "/code/titanic/predict.py"],
            pvolumes={
                     "/model/": kfp.dsl.PipelineVolume(pvc="{{workflow.uid}}-model"),
                     "/code/": kfp.dsl.PipelineVolume(pvc="{{workflow.uid}}-code"),
                     "/test_fs/": kfp.dsl.PipelineVolume(pvc="{{workflow.uid}}-featureset")
                     },
            file_outputs={"output": "/tmp/prediction.csv"},
        ).after(storage)

        predictions = kfp.dsl.InputArgumentPath(predict_op.outputs["output"])
        submit = dkube_submit_op(token, project_id, predictions=predict_op.outputs["output"]).after(predict_op)

In [None]:
client.create_run_from_pipeline_func(titanic_pipeline, run_name="[titanic] Run" + str(runid), arguments={"token":token,"project_id":project_id})
runid += 1

In [None]:
#kfp.compiler.Compiler().compile(titanic_pipeline, "titanic_pipeline.zip")