# Data passing tutorial
This tutorial shows how to create python components that produce, consume and transform data.
It shows how to create data passing pipelines by instantiating components and connecting them together.

In [None]:
from typing import NamedTuple

import kfp
from kfp.components import InputPath, InputTextFile, InputBinaryFile, OutputPath, OutputTextFile, OutputBinaryFile
from kfp.components import func_to_container_op

## Small data

### Consuming small data

In [None]:
@func_to_container_op
def consume_one_argument(text: str):
    '''Print small text'''
    print(text)

def constant_to_consumer_pipeline():
    '''Pipeline that passes small constant string to to consumer'''
    consume_task = consume_one_argument('Hello world') # Passing constant as argument to consumer

kfp.Client().create_run_from_pipeline_func(constant_to_consumer_pipeline, arguments={})

In [None]:
def pipeline_parameter_to_consumer_pipeline(text: str):
    '''Pipeline that passes small pipeline parameter string to to consumer'''
    consume_task = consume_one_argument(text) # Passing pipeline parameter as argument to consumer

kfp.Client().create_run_from_pipeline_func(
    pipeline_parameter_to_consumer_pipeline,
    arguments={'text': 'Hello world'}
)

### Producing small data

In [None]:
@func_to_container_op
def produce_one_small_output() -> str:
    return 'Hello world'

def task_output_to_consumer_pipeline():
    '''Pipeline that passes small data from producer to consumer'''
    produce_task = produce_one_small_output()
    # Passing producer task output as argument to consumer
    consume_task1 = consume_one_argument(produce_task.output) # task.output only works for single-output components
    consume_task2 = consume_one_argument(produce_task.outputs['output']) # task.outputs[...] always works

kfp.Client().create_run_from_pipeline_func(task_output_to_consumer_pipeline, arguments={})

### Producing and consuming multiple arguments

In [None]:
@func_to_container_op
def produce_two_small_outputs() -> NamedTuple('Outputs', [('text', str), ('number', int)]):
    return ("data 1", 42)

@func_to_container_op
def consume_two_arguments(text: str, number: int):
    print('Text={}'.format(text))
    print('Number={}'.format(str(number)))

def producers_to_consumers_pipeline(text: str = "Hello world"):
    '''Pipeline that passes data from producer to consumer'''
    produce1_task = produce_one_small_output()
    produce2_task = produce_two_small_outputs()

    consume_task1 = consume_two_arguments(produce1_task.output, 42)
    consume_task2 = consume_two_arguments(text, produce2_task.outputs['number'])
    consume_task3 = consume_two_arguments(produce2_task.outputs['text'], produce2_task.outputs['number'])


kfp.Client().create_run_from_pipeline_func(producers_to_consumers_pipeline, arguments={})

### Consuming and producing data at the same time

In [None]:
@func_to_container_op
def get_item_from_list(list: list, index: int) -> str:
    return list[index]

@func_to_container_op
def truncate_text(text: str, max_length: int) -> str:
    return text[0:max_length]

def processing_pipeline(text: str = "Hello world"):
    truncate_task = truncate_text(text, max_length=5)
    get_item_task = get_item_from_list(list=[3, 1, truncate_task.output, 1, 5, 9, 2, 6, 7], index=2)


kfp.Client().create_run_from_pipeline_func(processing_pipeline, arguments={})

## Bigger data (files)


### Writing and reading bigger data

In [None]:
# Writing bigger data
@func_to_container_op
def repeat_line(line: str, output_text_path: OutputPath(str), count: int = 10):
    '''Repeat the line specified number of times'''
    with open(output_text_path, 'w') as writer:
        for i in range(count):
            writer.write(line + '\n')


# Reading bigger data
@func_to_container_op
def print_text(text_path: InputPath(str)):
    '''Print text'''
    with open(text_path, 'r') as reader:
        for line in reader:
            print(line, end = '')

def print_repeating_lines_pipeline():
    print_text(repeat_line(line='Hello', count=5).output) # Don't forget .output !

kfp.Client().create_run_from_pipeline_func(print_repeating_lines_pipeline, arguments={})

### Processing bigger data

In [None]:
@func_to_container_op
def split_text_lines(source_path: InputPath(str), odd_lines_path: OutputPath(str), even_lines_path: OutputPath(str)):
    with open(source_path, 'r') as reader:
        with open(odd_lines_path, 'w') as odd_writer:
            with open(even_lines_path, 'w') as even_writer:
                while True:
                    line = reader.readline()
                    print(line)
                    if line == "":
                        break
                    odd_writer.write(line)
                    line = reader.readline()
                    print(line)
                    if line == "":
                        break
                    even_writer.write(line)

def text_splitting_pipeline():
    text = '\n'.join(['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten'])
    split_text_task = split_text_lines(text)
    print_text(split_text_task.outputs['odd_lines_path'])
    print_text(split_text_task.outputs['even_lines_path'])

kfp.Client().create_run_from_pipeline_func(text_splitting_pipeline, arguments={})

### Processing bigger data with pre-opened files

In [None]:
@func_to_container_op
def split_text_lines2(source_file: InputTextFile(str), odd_lines_file: OutputTextFile(str), even_lines_file: OutputTextFile(str)):
    while True:
        line = source_file.readline()
        print(line)
        if line == "":
            break
        odd_lines_file.write(line)
        line = source_file.readline()
        print(line)
        if line == "":
            break
        even_lines_file.write(line)

def text_splitting_pipeline2():
    text = '\n'.join(['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten'])
    split_text_task = split_text_lines2(text)
    print_text(split_text_task.outputs['odd_lines_file']).set_display_name('Odd lines')
    print_text(split_text_task.outputs['even_lines_file']).set_display_name('Even lines')

kfp.Client().create_run_from_pipeline_func(text_splitting_pipeline2, arguments={})