In [1]:
# !pip install kfp==1.6.3

In [2]:
from typing import NamedTuple

import kfp
from kfp.components import InputPath, InputTextFile, OutputPath, OutputTextFile
from kfp.components import func_to_container_op

from datetime import datetime

import sys
sys.path.insert(0, "..")
from constants import NAMESPACE, HOST
from utils.auth import get_session_cookie
from utils import helpers

### Define several constants

In [3]:
EXPERIMENT_NAME = "tutorial"
PIPELINE_NAME = "tutorial"
PIPELINE_VERSION = "0.0.1" # remember to change every run
PIPELINE_DESCRIPTION = "This is a tutorial pipeline"

### Create components from func

In [4]:
@func_to_container_op
def produce_one_small_output() -> str:
    return 'Hello world'

@func_to_container_op
def produce_two_small_outputs() -> NamedTuple('Outputs', [('text', str), ('number', int)]):
    return ("data 1", 42)

@func_to_container_op
def consume_two_arguments(text: str, number: int):
    print('Text={}'.format(text))
    print('Number={}'.format(str(number)))

### Create pipelines by connecting components

In [5]:
def producers_to_consumers_pipeline(text: str = "Hello world"):
    '''Pipeline that passes data from producer to consumer'''
    produce1_task = produce_one_small_output()
    produce2_task = produce_two_small_outputs()

    consume_task1 = consume_two_arguments(produce1_task.output, 42)
    consume_task2 = consume_two_arguments(text, produce2_task.outputs['number'])
    consume_task3 = consume_two_arguments(produce2_task.outputs['text'], produce2_task.outputs['number'])

### Run pipelines

1. First, we define the client to interact with kubeflow API. We use session cookie in this case for authentication.

In [6]:
session_cookie = get_session_cookie()
client = kfp.Client(
    host=f"{HOST}/pipeline",
    cookies=f"authservice_session={session_cookie}",
    namespace=NAMESPACE,
)

{'authservice_session': 'MTY0NDc0NjE5MnxOd3dBTkZSUVEwVkNNMXBQVGtOWFFVcGFRazVKTkVvek0wWTNUa2xHVGt0WFUwRkdVMHRZVjBkT1ZqVlhSMWhDUVVrMVQxWlZORkU9fLDKkWK2rbj9cYQV8EVANunYrwXPj_29P2uPpKUGPDK_'}


2. Next, compile the pipeline into YAML, upload it to the pipeline store, and run

In [7]:
pipeline_package_path = f"pipeline_{PIPELINE_VERSION}.yaml"
kfp.compiler.Compiler().compile(
    pipeline_func=producers_to_consumers_pipeline, package_path=pipeline_package_path
)
# get experiment ID
experiment = helpers.get_or_create_experiment(client, name=EXPERIMENT_NAME)
pipeline = helpers.get_or_create_pipeline(
    client,
    pipeline_name=PIPELINE_NAME,
    version=PIPELINE_VERSION,
    pipeline_description=PIPELINE_DESCRIPTION
)
now = datetime.now().strftime("%Y%m%d%H%M%S")
client.run_pipeline(
    experiment_id=experiment.id,
    job_name=f"{PIPELINE_NAME} {PIPELINE_VERSION} {now}",
    version_id=pipeline.id,
)

{'created_at': datetime.datetime(2022, 2, 13, 9, 56, 39, tzinfo=tzlocal()),
 'description': None,
 'error': None,
 'finished_at': datetime.datetime(1970, 1, 1, 0, 0, tzinfo=tzlocal()),
 'id': '51449c8c-dddc-4e93-9c55-1b1e8344cae4',
 'metrics': None,
 'name': 'tutorial 0.0.1 20220213095638',
 'pipeline_spec': {'parameters': None,
                   'pipeline_id': None,
                   'pipeline_manifest': None,
                   'pipeline_name': None,
                   'workflow_manifest': '{"kind":"Workflow","apiVersion":"argoproj.io/v1alpha1","metadata":{"generateName":"producers-to-consumers-pipeline-","creationTimestamp":null,"labels":{"pipelines.kubeflow.org/kfp_sdk_version":"1.6.3"},"annotations":{"pipelines.kubeflow.org/kfp_sdk_version":"1.6.3","pipelines.kubeflow.org/pipeline_compilation_time":"2022-02-13T09:56:33.902195","pipelines.kubeflow.org/pipeline_spec":"{\\"description\\": '
                                        '\\"Pipeline that passes data from '
               

3. Another way is to run directly from notebook (not recommended for prod)

In [8]:
client.create_run_from_pipeline_func(producers_to_consumers_pipeline, 
            arguments={}, 
            experiment_name=EXPERIMENT_NAME
)

RunPipelineResult(run_id=4a2f56a2-72b1-475d-9e71-8ea54ac21b7c)

4. Create a recurring run with a single command

In [9]:
# Dont forget to disable recurring run in case you dont need anymore
client.create_recurring_run(
    experiment_id=experiment.id,
    job_name=f"{PIPELINE_NAME} {PIPELINE_VERSION} {now}",
    cron_expression="0 0 * * * *", # hourly
    version_id=pipeline.id,
)

{'created_at': datetime.datetime(2022, 2, 13, 9, 56, 44, tzinfo=tzlocal()),
 'description': None,
 'enabled': True,
 'error': None,
 'id': 'adbe5654-ce06-419b-aa53-86e6d1d27ed5',
 'max_concurrency': '1',
 'mode': None,
 'name': 'tutorial 0.0.1 20220213095638',
 'no_catchup': None,
 'pipeline_spec': {'parameters': None,
                   'pipeline_id': None,
                   'pipeline_manifest': None,
                   'pipeline_name': None,
                   'workflow_manifest': '{"kind":"Workflow","apiVersion":"argoproj.io/v1alpha1","metadata":{"generateName":"producers-to-consumers-pipeline-","creationTimestamp":null,"labels":{"pipelines.kubeflow.org/kfp_sdk_version":"1.6.3"},"annotations":{"pipelines.kubeflow.org/kfp_sdk_version":"1.6.3","pipelines.kubeflow.org/pipeline_compilation_time":"2022-02-13T09:56:33.902195","pipelines.kubeflow.org/pipeline_spec":"{\\"description\\": '
                                        '\\"Pipeline that passes data from '
                         