In [171]:
import sys
sys.path.insert(1, '/home/jovyan/.local/lib/python3.6/site-packages')
#See https://www.kubeflow.org/docs/pipelines/sdk/component-development/
import kfp
from kfp import compiler
import kfp.components as comp
import kfp.dsl as dsl
from kfp import gcp

In [172]:
#Component definition

version='v25'
def preprocess_op(prep_input,
                  prep_output,
                   ):
  return dsl.ContainerOp(
    name='preprocess-op', 
    image='rio05docker/activity_classification:preprocess'+version,
    command="python3",
    arguments=[
        "preprocess.py",
        '--input-bucket', prep_input,
        '--output-bucket', prep_output,
    ],
    file_outputs={
        'data_file': '/src/final_df.csv',
    }
  ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa'))


def training_op(input,
                output,
               ):
  return dsl.ContainerOp(
    name='gpu-op', 
    image='rio05docker/activity_classification:trainv2',
    command="python3",
    arguments=[
        "train.py",
        '--input-bucket', input,
        '--input-bucket', output,
    ],
    #pvolumes={"/mnt": download_step.pvolume}
    file_outputs={
        'model': '/activity_classification.h5',
    }
  ).set_gpu_limit(1).apply(kfp.gcp.use_gcp_secret('user-gcp-sa'))

def convert_op(prep_input,
                  prep_output,
                   ):
  return dsl.ContainerOp(
    name='convert_op-op', 
    image='rio05docker/activity_classification:convertv2',
    command="python3",
    arguments=[
        "convert.py",
        '--input-bucket', prep_input,
        '--output-bucket', prep_output,
    ],
    file_outputs={
        'model_file': '/activity_classification.tflite',
    }
  ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa'))

In [173]:
#Pipeline definition
@dsl.pipeline(
  name='Kubeflow Test Pipeline',
  description='Performs preprocessing, training and deployment.'
)
def pipeline(
    prep_input='ai-vqc',
    prep_output='ai-vqc',
    ):

    #Pipeline component instances
    prep_op=preprocess_op(prep_input, prep_output)
    training_op(prep_input, prep_output).after(prep_op)
    convert_op(prep_input, prep_output).after(training_op)

In [174]:
#Compile the pipeline
pipeline_func = pipeline
pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'

import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename) #compiles your Python domain-specific language (DSL) code into a single static configuration (in YAML format) that the Kubeflow Pipelines service can process.

In [175]:
#Create Kubeflow experiment
client = kfp.Client()
try:
    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)
except:
    experiment = client.create_experiment(EXPERIMENT_NAME)
    
print(experiment)

{'created_at': datetime.datetime(2020, 4, 7, 15, 21, tzinfo=tzlocal()),
 'description': None,
 'id': '1a72d687-9bac-44af-8cdd-4190322fea3f',
 'name': 'activity_classification',
 'resource_references': None}


In [176]:
#Run the pipeline
arguments = {}
run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, 
                                 run_name, 
                                 pipeline_filename, 
                                 arguments)
print(experiment.id)
print(run_name)
print(pipeline_filename)
print(arguments)

1a72d687-9bac-44af-8cdd-4190322fea3f
pipeline run
pipeline.pipeline.zip
{}
