In [1]:
import kfp
import json
import os
from dkube.sdk import *
from dkube.sdk import DkubeApi

In [2]:
components_url = "/mnt/dkube/pipeline/components/"
storage_op = kfp.components.load_component_from_file("/mnt/dkube/pipeline/components/storage/component.yaml")
dkube_preprocessing_op = kfp.components.load_component_from_file(components_url + "preprocess/component.yaml")
dkube_training_op = kfp.components.load_component_from_file(components_url + "training/component.yaml")
dkube_serving_op = kfp.components.load_component_from_file(components_url + "serving/component.yaml")
token = os.getenv("DKUBE_USER_ACCESS_TOKEN")

In [3]:
image = "ocdr/d3-datascience-sklearn:v0.23.2-1"
code = "arvados-changes-1"
dataset="ARVV"
ptrain_dataset="ARVV"
train_fs_name = "arv-s3-fs-train"
dataset_mount_points = ["/opt/dkube/input"]
featureset_mount_points = ["/featureset/train"]
preprocessing_script = f"python arvados-s3/preprocessing.py --train_fs {train_fs_name}"
model = "arv-s3"
training_script = "python arvados-s3/arvados-training.py"
train_out_mount_points = ["/model"]
output_dataset = "output-ARVV"
output_mount_path = "/output-ARVV"

In [None]:
api = DkubeApi(token=token)
api.create_featureset(DkubeFeatureSet(train_fs_name))

In [11]:
def arv_pipeline(token,code,dataset):

    with kfp.dsl.ExitHandler(
        exit_op=storage_op("reclaim", auth_token=token, namespace="kubeflow", uid="{{workflow.uid}}")
    ):
      
                    
        preprocessing = dkube_preprocessing_op(token, json.dumps({"image": image}),
                                            program = code,run_script=preprocessing_script,
                                            datasets=json.dumps([ptrain_dataset]), 
                                            output_featuresets=json.dumps([train_fs_name]),
                                            input_dataset_mounts=json.dumps(dataset_mount_points), 
                                            output_featureset_mounts=json.dumps(featureset_mount_points),
                                            outputs = json.dumps([output_dataset]),output_mounts=json.dumps([output_mount_path]))
        
        dataset_volume = json.dumps(
            ["{{workflow.uid}}-featureset@featureset://" + train_fs_name,
            "{{workflow.uid}}-dataset@dataset://" + output_dataset
            ])

        storage = storage_op(
            "export",auth_token=token, namespace="kubeflow", input_volumes=dataset_volume,output_volumes=json.dumps(["{{workflow.uid}}-dataset@dataset://"+output_dataset])).after(preprocessing)

        list_dataset = kfp.dsl.ContainerOp(
            name="container-op",
            image="docker.io/ocdr/dkube-datascience-tf-cpu:v2.0.0-3",
            command="bash",
            arguments=["-c", "ls /output-ARVV"],
            pvolumes={
                "/output-ARVV": kfp.dsl.PipelineVolume(pvc="{{workflow.uid}}-dataset")
            },
        ).after(storage)

        train = dkube_training_op(token, json.dumps({"image": image}),
                                    framework="sklearn", version="0.23.2",
                                    program=code, run_script=training_script,
                                    featuresets= json.dumps([train_fs_name]), outputs=json.dumps([model]),
                                    input_featureset_mounts=json.dumps(featureset_mount_points),
                                    output_mounts=json.dumps(train_out_mount_points)).after(preprocessing)

        serving = dkube_serving_op(token,model=model,
    device="cpu",
            serving_image='{"image":"ocdr/sklearnserver:0.23.2"}',
        ).after(train)


In [12]:
client = kfp.Client(existing_token=token)
client.create_run_from_pipeline_func(arv_pipeline,arguments={"token":token,"code":code,"dataset":dataset})

RunPipelineResult(run_id=ce070940-e17b-406b-ad47-1f7accce12a0)