In [None]:
!pip install kfp --upgrade --user --quiet

In [1]:
# confirm the kfp sdk
! pip show kfp

Name: kfp
Version: 1.6.3
Summary: KubeFlow Pipelines SDK
Home-page: 
Author: google
Author-email: 
License: 
Location: /opt/conda/lib/python3.8/site-packages
Requires: absl-py, click, cloudpickle, Deprecated, docstring-parser, fire, google-api-python-client, google-auth, google-cloud-storage, jsonschema, kfp-pipeline-spec, kfp-server-api, kubernetes, protobuf, PyYAML, requests-toolbelt, strip-hints, tabulate
Required-by: 


In [2]:
import kfp
import kfp.components as comp
import kfp.dsl as dsl
from kfp.components import OutputPath
from typing import NamedTuple
from kubernetes import client

In [12]:
def data_download_from_url(dataurl, datapath):
    vop = dsl.VolumeOp(name="create_helmet_data_storage_volume",
                       resource_name="helmet_data_storage_volume", size='10Gi', 
                       modes=dsl.VOLUME_MODE_RWO)

    return dsl.ContainerOp(
        name = 'Download Data', 
        image = 'harbor-repo.vmware.com/juanl/helmet_detection_pipeline:v1',
        command = ['python3', 'ingest_pipeline.py'],
        arguments=[
            '--dataurl', dataurl,
            '--datapath', datapath
        ],
        pvolumes={
            '/VOCdevkit': vop.volume
        }
    )

In [13]:
def data_process(comp1):
    return dsl.ContainerOp(
        name = 'Process Data', 
        image = 'harbor-repo.vmware.com/juanl/helmet_detection_pipeline:v1',
        command = ['python3', 'prepare.py'],
        pvolumes={
            '/VOCdevkit': comp1.pvolumes['/VOCdevkit']
        }
    )

In [18]:
def model_train(comp2, epoch, device, workers_num):
    return dsl.ContainerOp(
        name = 'Model Training',
        image = 'harbor-repo.vmware.com/juanl/helmet_detection_pipeline:v1',
        pvolumes={
            '/VOCdevkit': comp2.pvolumes['/VOCdevkit']
        },
        # command=['sh', '-c'],
        # arguments=['nvidia-smi'],
        command = ['python3', 'train_pipeline.py'],
        arguments=[
            '--epoch', epoch,
            '--device', device,
            '--workers', workers_num,
        ],
    ).set_gpu_limit(1).set_cpu_request('2').set_memory_request('8G')

In [21]:
@dsl.pipeline(
    name = 'helmet detection pipeline',
    description = 'pipeline to detect helmet')
def generate_pipeline(dataurl, datapath, epoch, device, workers_num):
    comp1 = data_download_from_url(dataurl, datapath)
    comp2 = data_process(comp1)
    comp3 = model_train(comp2, epoch, device, workers_num)

In [None]:
!mkdir generated_yaml_files

In [22]:
if __name__ == '__main__':
  import kfp.compiler as compiler
  compiler.Compiler().compile(generate_pipeline, './generated_yaml_files/helmet_detection_pipeline' + '.yaml')