In [None]:
## Read dataset from arv-s3

In [61]:
!ls /opt/dkube/input

config.json  credentials


In [63]:
import json
import os
import kfp

In [64]:
components_url = "/mnt/dkube/pipeline/components/"

dkube_training_op = kfp.components.load_component_from_file(components_url+"training/component.yaml")
storage_op = kfp.components.load_component_from_file(components_url+"storage/component.yaml")
dkube_serving_op = kfp.components.load_component_from_file(components_url + "serving/component.yaml")

In [65]:
token = os.getenv("DKUBE_USER_ACCESS_TOKEN")
username =  os.getenv("USERNAME")
client = kfp.Client(existing_token=token)

In [74]:
code = "arv-s3-pallavi"
dataset = "arv-s3"
model = "arv-s3"
dataset_mount_path = "/opt/dkube/input"
model_mount_path = "/opt/dkube/output"
run_script = "python arv-s3.py"

In [77]:
@kfp.dsl.pipeline(name='arv-s3',description='arvados-s3-pipeline')
def arv_pipeline(auth_token,code,dataset,model,dataset_mount_path,model_mount_path,run_script):
    
    with kfp.dsl.ExitHandler(exit_op=storage_op("reclaim",auth_token=token,namespace="kubeflow", uid="{{workflow.uid}}")):
        
        dataset_volume = json.dumps(["{{workflow.uid}}-dataset@dataset://" + str(dataset)])
        
        storage = storage_op("export",auth_token = token, namespace="kubeflow", input_volumes = dataset_volume)

        list_dataset = kfp.dsl.ContainerOp(name="container-op",image="docker.io/ocdr/dkube-datascience-tf-cpu:v2.0.0-3",command="bash",arguments=["-c", "ls /dataset/CMU-1"],
                                        pvolumes={"/dataset": kfp.dsl.PipelineVolume(pvc="{{workflow.uid}}-dataset")}).after(storage)
        
        
        train = dkube_training_op(auth_token=token,container='{"image":"docker.io/ocdr/d3-datascience-sklearn:v0.23.2"}',
                                  framework="sklearn",
                                  version="0.23.2",
                                  program=str(code),
                                  run_script=str(run_script),
                                  datasets=json.dumps([str(dataset)]),
                                  outputs=json.dumps([str(model)]),
                                  input_dataset_mounts=json.dumps([str(dataset_mount_path)]),
                                  output_mounts=json.dumps([str(model_mount_path)])).after(storage)

        serving = dkube_serving_op(auth_token = token, model = model , device='cpu', serving_image='{"image":"ocdr/sklearnserver:0.23.2"}').after(train)
        

In [78]:
client.create_run_from_pipeline_func(arv_pipeline,arguments={"auth_token":token,"code":code,"dataset":dataset,"model":model,"dataset_mount_path":dataset_mount_path,"model_mount_path":model_mount_path,"run_script":run_script})

RunPipelineResult(run_id=1b9e56d6-531b-4546-8f91-469058b327b5)