# Component Test: Load Dataframe via Trino

## Authors
- Natalie Jann <natalie.jann@ibm.com>
- Sebastian Lehrig <sebastian.lehrig1@ibm.com>


## License
Apache-2.0 License

## Imports & Constants

In [1]:
import kfp
import kfp.dsl as dsl

KFP_CLIENT = kfp.Client()

with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace") as f:
    NAMESPACE = f.read()
NAMESPACE

%load_ext lab_black

## Create pipeline

In [4]:
convert_speech_to_text_comp = kfp.components.load_component_from_file("component.yaml")
load_dataset_comp = kfp.components.load_component_from_file(
    "../../data-collection/load-huggingface-dataset/component.yaml"
)

In [5]:
@dsl.pipeline(
    name="Component Test - Convert Speech To Text",
    description="A simple component test",
)
def train_pipeline():
    
    load_dataset_task = load_dataset_comp(
        path="google/fleurs",
        configuration="en_us",
        split="validation",
    )
    convert_speech_to_text_task.add_node_selector_constraint(
        "worker_type", "baremetal_worker"
    )

    convert_speech_to_text_comp(audio_dir=load_dataset_task.outputs["dataset_dir"])
    
    convert_speech_to_text_task.add_node_selector_constraint(
        "worker_type", "baremetal_worker"
    )

## Run the pipeline within an experiment

In [6]:
KFP_CLIENT.create_run_from_pipeline_func(
    train_pipeline, arguments={}, namespace=NAMESPACE
)

RunPipelineResult(run_id=2589462d-4e21-49fe-9f04-9fec38560931)