In [None]:
import os, json
import kfp
from dkube.sdk import *
from dkube.pipelines import dkube_training_op, dkube_preprocessing_op, dkube_serving_op, dkube_storage_op, dkube_submit_op

In [None]:
token = os.getenv("DKUBE_USER_ACCESS_TOKEN")
client = kfp.Client(existing_token=token)
api = DkubeApi(token=token)

In [None]:
components_url = "/mnt/dkube/pipeline/components/"
dkube_preprocessing_op = kfp.components.load_component_from_file(components_url + "preprocess/component.yaml")
dkube_training_op = kfp.components.load_component_from_file(components_url + "training/component.yaml")
dkube_job_op  = kfp.components.load_component_from_file(components_url + "job/component.yaml")
dkube_submit_op = kfp.components.load_component_from_file(components_url + "submit/component.yaml")

In [None]:
# Project owner resources
project_id = os.environ.get("DKUBE_PROJECT_ID")
project_name = os.environ.get("DKUBE_PROJECT_NAME")
project_owner = os.environ.get("DKUBE_PROJECT_OWNER")

assert project_id != "", "Please launch IDE under project or set above variables manually"

username = os.getenv("USERNAME","ocdkube")
ptrain_dataset = f'{project_owner}:titanic-train'
ptest_dataset = f'{project_owner}:titanic-test'

# User resources
code_name = f'{project_name}-code'
train_fs_name = f"{project_name}-train-fs-{username}"
test_fs_name = f"{project_name}-test-fs-{username}"
model_name = f'{project_name}-model'

# Program specific variables
image = "docker.io/ocdr/dkube-datascience-tf-cpu:v2.0.0-3"
dataset_mount_points = ["/dataset/train", "/dataset/test"]
featureset_mount_points = ["/featureset/train", "/featureset/test"]
preprocessing_script = f"python titanic/preprocess.py --train_fs {train_fs_name} --test_fs {test_fs_name}"
training_script = "python titanic/train.py"
predict_script = "python titanic/predict.py"
train_out_mount_points = ["/model"]
runid = 0

In [None]:
try:
    #set active project
    api.set_active_project(project_id)
    #Create code repo 
    code = DkubeCode(username,code_name)
    code.update_git_details(url="https://github.com/oneconvergence/dkube-examples.git", branch="tensorflow")
    api.create_code(code)
    #Create train/test featureset
    api.create_featureset(DkubeFeatureSet(train_fs_name))
    api.create_featureset(DkubeFeatureSet(test_fs_name))
    api.upload_featurespec(train_fs_name, "train-fs-spec.yaml")
    api.upload_featurespec(test_fs_name, "test-fs-spec.yaml")
    #Create model
    api.create_model(DkubeModel(username, model_name))
except Exception as e:
    print("ERROR:", str(e))

In [None]:
@kfp.dsl.pipeline(
    name='dkube-titanic-pl',
    description='example titanic pipeline to submit to leaderboard'
)
def titanic_pipeline(token, project_id):
    preprocessing = dkube_preprocessing_op(token, json.dumps({"image": image}),
                                            tags=json.dumps([f"project:{project_id}"]),
                                            program=code_name, run_script=preprocessing_script,
                                            datasets=json.dumps([ptrain_dataset, ptest_dataset]), 
                                            output_featuresets=json.dumps([train_fs_name, test_fs_name]),
                                            input_dataset_mounts=json.dumps(dataset_mount_points), 
                                            output_featureset_mounts=json.dumps(featureset_mount_points))


    train       = dkube_training_op(token, json.dumps({"image": image}),
                                    tags=json.dumps([f"project:{project_id}"]),
                                    framework="tensorflow", version="2.0.0",
                                    program=code_name, run_script=training_script,
                                    featuresets=json.dumps([train_fs_name, test_fs_name]), outputs=json.dumps([model_name]),
                                    input_featureset_mounts=json.dumps(featureset_mount_points),
                                    output_mounts=json.dumps(train_out_mount_points)).after(preprocessing)

    predict_op = dkube_job_op(
        "predict", token, json.dumps({"image": image}),
        tags=json.dumps([f"project:{project_id}"]),
        program=code_name, run_script=predict_script,
        featuresets=json.dumps([test_fs_name]),input_featureset_mounts=json.dumps(["/test_fs"]),
        models=json.dumps([model_name]), input_model_mounts=json.dumps(["/model"]),
        file_outputs={"output": "/output/prediction.csv", "mlpipeline-ui-metadata": "/output/metrics.json"},
    ).after(train)

    predictions = kfp.dsl.InputArgumentPath(predict_op.outputs["output"])
    
    submit = dkube_submit_op(token, project_id, predictions=predict_op.outputs["output"]).after(predict_op)

In [None]:
run_name = f"[{project_name}] Run{runid}"
client.create_run_from_pipeline_func(titanic_pipeline, run_name=run_name, arguments={"token":token,"project_id":project_id})
runid += 1
print(f"RUN NAME:{run_name}")

In [None]:
#kfp.compiler.Compiler().compile(titanic_pipeline, "titanic_pipeline.zip")