In [None]:
import os, json
import kfp
from dkube.sdk import *
from dkube.pipelines import dkube_training_op, dkube_preprocessing_op, dkube_serving_op, dkube_storage_op, dkube_submit_op

In [None]:
token = os.getenv("DKUBE_USER_ACCESS_TOKEN")
client = kfp.Client(existing_token=token)
api = DkubeApi(token=token)

In [None]:
# Project owner resources
project_id = os.environ.get("DKUBE_PROJECT_ID", "wprz8s")
project_name = os.environ.get("DKUBE_PROJECT_NAME","titanic")
project_owner = os.environ.get("DKUBE_PROJECT_OWNER","ocdkube")
username = os.getenv("USERNAME","ocdkube")
ptrain_dataset = f'{project_owner}:titanic-train'
ptest_dataset = f'{project_owner}:titanic-test'

# User resources
code_name = f'{project_name}-code'
train_fs_name = f"{project_name}-train-fs-{username}"
test_fs_name = f"{project_name}-test-fs-{username}"
model_name = f'{project_name}-model'

# Program specific variables
image = "docker.io/ocdr/dkube-datascience-tf-cpu:fs-v2.0.0"
dataset_mount_points = ["/dataset/train", "/dataset/test"]
output_featureset_mount_points = ["/featureset/train", "/featureset/test"]
preprocessing_script = f"python titanic/preprocess.py --train_fs {train_fs_name} --test_fs {test_fs_name}"
training_script = "python training.py"
train_inp_mount_points = ["/titanic-train","/titanic-test"]
train_out_mount_points = ["/model"]
runid = 0

In [None]:
#Create code repo 
code = DkubeCode(username,code_name)
code.update_git_details(url="https://github.com/oneconvergence/dkube-examples-2.0.git", branch="test")
#api.create_code(code)
#Create train/test featureset
#api.create_featureset(DkubeFeatureSet(train_fs_name))
#api.create_featureset(DkubeFeatureSet(test_fs_name))

In [None]:
preprocess = DkubePreprocessing(username, name=train_fs_name )
preprocess.update_container(image_url=image)
preprocess.add_code(code_name, commitid="")
preprocess.update_startupscript(preprocessing_script)
preprocess.add_input_dataset(ptrain_dataset, mountpath='/dataset/train')
preprocess.add_input_dataset(ptest_dataset, mountpath='/dataset/test')
preprocess.add_output_featureset(train_fs_name, mountpath='/featureset/train')
preprocess.add_output_featureset(test_fs_name, mountpath='/featureset/test')

In [None]:
training = DkubeTraining(username, name=generate(project_name),tags=[f"project:{project_id}"])
training.update_container(framework="scikit_0.2.2", image_url=image)
training.add_code(code_name, commitid="")
training.update_startupscript("python titanic/train.py")
training.add_input_featureset(train_fs_name, mountpath='/featureset/train')
training.add_input_featureset(test_fs_name, mountpath='/featureset/test')
training.add_output_model(model_name, mountpath='/model')

In [None]:
from dkube.sdk.rsrcs.storage import DkubeResourcePVC
model_pvc = DkubeResourcePVC(type='model',kind="input", name=model_name)
test_fs_pvc = DkubeResourcePVC(type='featureset',kind="input", name=test_fs_name)
code_pvc = DkubeResourcePVC(type='program',kind="input", name=code_name)

In [None]:
@kfp.dsl.pipeline(
    name='dkube-titanic-pl',
    description='example titanic pipeline to submit to leaderboard'
)
def titanic_pipeline(token, project_id):
    preprocessing = dkube_preprocessing_op("preprocess", authtoken=token, preprocessing=preprocess)

    with kfp.dsl.ExitHandler(exit_op=dkube_storage_op("reclaim", authtoken=token, command="reclaim")):
        train       = dkube_training_op("train", authtoken=token, training=training).after(preprocessing)
        storage  = dkube_storage_op("export", authtoken=token, command="export", claims=[code_pvc, model_pvc, test_fs_pvc]).after(train)
    
        predict_op = kfp.dsl.ContainerOp(
            name="predict", image=image,
            command=["python", "/code/titanic/predict.py"],
            pvolumes={
                     "/model/": kfp.dsl.PipelineVolume(pvc=model_pvc.claimname),
                     "/test_fs/": kfp.dsl.PipelineVolume(pvc=test_fs_pvc.claimname),
                     "/code/": kfp.dsl.PipelineVolume(pvc=code_pvc.claimname)
                     },
            file_outputs={"output": "/tmp/prediction.csv"},
        ).after(storage)

        predictions = kfp.dsl.InputArgumentPath(predict_op.outputs["output"])
        submit = dkube_submit_op("submit", authtoken=token, project_id=project_id, predictions=predictions).after(predict_op)

In [None]:
client.create_run_from_pipeline_func(titanic_pipeline, run_name="[titanic] Run" + str(runid), arguments={"token":token,"project_id":"wprz8s"})
runid += 1

In [None]:
#kfp.compiler.Compiler().compile(titanic_pipeline, "titanic_pipeline.zip")