### TFX Components

This notebook shows how to create pipeline that uses TFX components:

* CsvExampleGen
* StatisticsGen
* SchemaGen

In [None]:
import kfp

# Initializing the client
client = kfp.Client()

# ! Use kfp.Client(host='https://xxxxx.notebooks.googleusercontent.com/') if working from GCP notebooks (or local notebooks)

In [None]:
import json
from kfp.components import load_component_from_url

download_from_gcs_op = load_component_from_url('https://raw.githubusercontent.com/Ark-kun/pipelines/290fa55/components/google-cloud/storage/download/component.yaml')
CsvExampleGen_op = load_component_from_url('https://raw.githubusercontent.com/Ark-kun/pipelines/3a1159a/components/tfx/ExampleGen/CsvExampleGen/component.yaml')
StatisticsGen_op = load_component_from_url('https://raw.githubusercontent.com/Ark-kun/pipelines/3a1159a/components/tfx/StatisticsGen/component.yaml')
SchemaGen_op = load_component_from_url('https://raw.githubusercontent.com/Ark-kun/pipelines/3a1159a/components/tfx/SchemaGen/component.yaml')

def tfx_pipeline(
    input_data_uri,
):
    download_task = download_from_gcs_op(
        input_data_uri,
    )
    examples_task = CsvExampleGen_op(
        input_base=download_task.output,
        input_config=json.dumps({
            "splits": [
                {'name': 'data', 'pattern': '*.csv'},
            ]
        }),
        output_config=json.dumps({
            "splitConfig": {
                "splits": [
                    {'name': 'train', 'hash_buckets': 2},
                    {'name': 'eval', 'hash_buckets': 1},
                ]
            }
        }),
    )
    
    statistics_task = StatisticsGen_op(
        examples_task.output,
    )
    
    schema_task = SchemaGen_op(
        statistics_task.output,
    )
    
client.create_run_from_pipeline_func(
    tfx_pipeline,
    arguments={
        'input_data_uri': 'gs://avolkov/tensorflow-tfx/tfx/components/testdata/external/csv',
    },
)