# Introduction to lightweight component, pipepile and client

In [None]:
# Install the SDK
!pip3 install kfp --upgrade

In [None]:
import kfp.components as comp
import kfp.dsl as dsl

In [None]:
#Define a Python function
def add(a: int, b: int) -> int:
   '''Calculates sum of two arguments'''
   return a + b

We can convert the function ```add``` to a component using the function ```comp.func_to_container_op```

In [None]:
add_op = comp.func_to_container_op(add, base_image='tensorflow/tensorflow:1.13.2-py3')

In [None]:
type(add_op)

In [None]:
print(comp.func_to_component_text(add_op))

There are several requirements for the function:
* The function should be stand-alone. It should not use any code declared outside of the function definition. Any imports should be added inside the main function. Any helper functions should also be defined inside the main function.
* The function can only import packages that are available in the base image. If you need to import a package that's not available you can try to find a container image that already includes the required packages. (As a workaround you can use the module subprocess to run pip install for the required package.)
* If the function operates on numbers, the parameters need to have type hints. Supported types are ```[int, float, bool]```. Everything else is passed as string.
* To build a component with multiple output values, use the typing.NamedTuple type hint syntax: ```NamedTuple('MyFunctionOutputs', [('output_name_1', type), ('output_name_2', float)])```

- create the second component to read from GCS and build the first pipeline
- run pipeline

In [None]:
from typing import NamedTuple

In [None]:
def divide(x: int, y: int) -> NamedTuple('MyDivmodOutput', [('quotient', int), ('remainder', int)]):
    """Returns the quotient and the remainder  of dividing x on y."""
    from collections import namedtuple
    
    quotient = x // y
    remainder = x % y

    myDivmodOutput = namedtuple('MyDivmodOutput', ['quotient', 'remainder'])
    result = myDivmodOutput(quotient=quotient, remainder=remainder)

    return result

In [None]:
divide_op = comp.func_to_container_op(divide)

In [None]:
def print_value(x: str) -> None:
    """Helper funtion to print parameter's value"""
    print(x)

In [None]:
print_value_op = comp.func_to_container_op(print_value)

In [None]:
#Define first pipeline
@dsl.pipeline(
    name='A simple pipeline',
    description='A simple pipeline to illustrate the kfp main concepts'
)
def simple_pipeline(
    x_value: dsl.PipelineParam(name='x', value='5', param_type=dsl.types.Integer),
    y_value: dsl.PipelineParam(name='y', value='4', param_type=dsl.types.Integer),
    z_value: dsl.PipelineParam(name='z', value='2', param_type=dsl.types.Integer)
):
    add_step = add_op(a=x_value, b=y_value)
    add_step.set_display_name('Add a and b')
    add_result = add_step.output

    divide_step = divide_op(x=add_result, y=z_value)
    divide_step.set_display_name('Divide sum by z')
    
    print_value_quotient_step = print_value_op('%s' % divide_step.outputs['quotient'])
    print_value_quotient_step.set_display_name('Print quotient')
    print_value_remainder_step = print_value_op('%s' % divide_step.outputs['remainder'])
    print_value_remainder_step.set_display_name('Print remainder')

To introduce if condition check ```kfp.dsl.Condition```

The pipeline needs to be compiled, before be uploaded to Kubeflow Pipeline

In [None]:
import kfp.compiler as compiler

In [None]:
compiler.Compiler().compile(simple_pipeline, simple_pipeline.__name__ + '.pipeline.zip')

In [None]:
!ls

If running outside of the cluster with Kubeflow, set `GOOGLE_APPLICATION_CREDENTIALS` for dealing with authorisation. The service account needs to have the role `IAP-secured Web App User`.

In [None]:
# import os
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '' # path to the json file of the service account used to log in: it need to have role IAP-secured Web App User
# HOST = '' # url of the cluster e.g. https://demo-kubeflow.endpoints.lf-ml-demo.cloud.goog/pipeline
# CLIENT_ID = '' # The client ID used by Identity-Aware Proxy
# NAMESPACE = '' # user namespace e.g. https://demo-kubeflow.endpoints.lf-ml-demo.cloud.goog/pipeline

In [None]:
from kfp import Client as KfpClient

In [None]:
client = KfpClient(
# we are running into the same Kubeflow so we do not need to do anything
#     host=HOST,
#     client_id=CLIENT_ID,
#     namespace=NAMESPACE  
)

In [None]:
client.create_run_from_pipeline_package(
    pipeline_file=simple_pipeline.__name__ + '.pipeline.zip',
    arguments={'x_value': '4', 'y_value': '5', 'z_value': 2},
    experiment_name='01_lightweight_components',
    run_name='001'
)

Few additional comments:
- ```kfp.Client.create_run_from_pipeline_func``` would have allowed to compile and run the pipeline ```simple_pipeline``` in one single step
- ```kfp.dsl.Condition``` can be used to create branches based on *if statements*