In [1]:
# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [2]:
# Install Pipeline SDK - This only needs to be ran once in the enviroment. 
!pip3 install kfp --upgrade

# KubeFlow Pipelines basic component build 

In this notebook, we will demo: 

* Defining a KubeFlow pipeline with Python KFP SDK
* Creating an experiment and submitting pipelines to KFP run time enviroment using the KFP SDK 

Reference documentation: 
* https://www.kubeflow.org/docs/pipelines/sdk/build-component/
* https://www.kubeflow.org/docs/pipelines/sdk/sdk-overview/

## Setup

In [3]:
# Set your output and project. !!!Must Do before you can proceed!!!
EXPERIMENT_NAME = 'basic_component'
PROJECT_NAME =  'Your-Gcp-Project-Name'                      #'Your-GCP-Project-ID'
OUTPUT_DIR = 'gs://%s-basic-component' % PROJECT_NAME        # A path for asset outputs
BASE_IMAGE = 'google/cloud-sdk:latest'                       # Base image used in various steps of the pipeline
TARGET_IMAGE = 'gcr.io/%s/component:latest' % PROJECT_NAME   # Target image that will include our final code

In [None]:
!gsutil mb {OUTPUT_DIR}

## Create an Experiment in the Pipeline System

Pipeline system requires an "Experiment" to group pipeline runs. You can create a new experiment, or call client.list_experiments() to get existing ones.

In [5]:
#Get or create an experiment and submit a pipeline run
import kfp
client = kfp.Client()

try:
    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)
except:
    experiment = client.create_experiment(EXPERIMENT_NAME)

## Create a python function

In [6]:
def add(a: float, b: float) -> float:
    '''Calculates sum of two arguments'''
    
    print("Adding two values %s and %s" %(a, b))
    
    return a + b

## Build a Component With the Above Function
The return value "add_op" represents a step that can be used directly in a pipeline function. 

In [7]:
from kfp import compiler

add_op = compiler.build_python_component(
    component_func=add,
    staging_gcs_path=OUTPUT_DIR,
    dependency=[kfp.compiler.VersionedDependency(name='google-api-python-client', version='1.7.0')],
    base_image=BASE_IMAGE,
    target_image=TARGET_IMAGE)

2019-08-07 18:25:48:INFO:Build an image that is based on google/cloud-sdk:latest and push the image to gcr.io/chavoshi-dev-2/pusher:latest
2019-08-07 18:25:48:INFO:Checking path: gs://chavoshi-dev-2-basic-component...
2019-08-07 18:25:48:INFO:Generate entrypoint and serialization codes.
2019-08-07 18:25:48:INFO:Generate build files.




2019-08-07 18:25:50:INFO:Start a kaniko job for build.
2019-08-07 18:25:50:INFO:Cannot Find local kubernetes config. Trying in-cluster config.
2019-08-07 18:25:50:INFO:Initialized with in-cluster config.
2019-08-07 18:25:55:INFO:5 seconds: waiting for job to complete
2019-08-07 18:26:00:INFO:10 seconds: waiting for job to complete
2019-08-07 18:26:05:INFO:15 seconds: waiting for job to complete
2019-08-07 18:26:10:INFO:20 seconds: waiting for job to complete
2019-08-07 18:26:15:INFO:25 seconds: waiting for job to complete
2019-08-07 18:26:20:INFO:30 seconds: waiting for job to complete
2019-08-07 18:26:25:INFO:35 seconds: waiting for job to complete
2019-08-07 18:26:30:INFO:40 seconds: waiting for job to complete
2019-08-07 18:26:35:INFO:45 seconds: waiting for job to complete
2019-08-07 18:26:40:INFO:50 seconds: waiting for job to complete
2019-08-07 18:26:45:INFO:55 seconds: waiting for job to complete
2019-08-07 18:26:50:INFO:60 seconds: waiting for job to complete
2019-08-07 18:26:



2019-08-07 18:27:26:INFO:Build component complete.


### Build a pipeline using this component

In [8]:
import kfp.dsl as dsl
@dsl.pipeline(
   name='Calculation pipeline',
   description='A sample pipeline that performs arithmetic calculations.'
)
def calc_pipeline(
   a='1',
   b='7',
   c='17',
):
    #Passing pipeline parameter and a constant value as operation arguments
    add_task = add_op(a, b) #Returns a dsl.ContainerOp class instance. 
    
    #You can create explicit dependancy between the tasks using xyz_task.after(abc_task)
    add_2_task = add_op(b, c)
    
    add_3_task = add_op(add_task.output, add_2_task.output)

### Complie the pipeline

In [9]:
pipeline_func = calc_pipeline
pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'

import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename)

## Submit the pipeline for execution

In [10]:
#Specify pipeline argument values
arguments = {'a': '7', 'b': '8'}

#Submit a pipeline run
run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)

#This link leads to the run information page. 
#Note: There is a bug in JupyterLab that modifies the URL and makes the link stop working