# Data Science Pipelines Example

## Install the pre-requisites

In [None]:
!pip install -U pandas

## Python imports

In [None]:
import kfp
import kfp.components as components
import kfp.dsl as dsl

import glob
import pandas as pd
import requests
import io
import zipfile
import re

## Python functions to process data and train models

In [None]:
def ingest_data() -> str:
    return "Ingest Data"

def preprocess_data(msg: str) -> str:
    return "preprocessed data"

def train_model_A(msg: str):
    print("Model A")

def train_model_B(msg: str):
    print("Model B")

## Creating the pipeline components

In [None]:
ingest_data_step = components.create_component_from_func(
    func=ingest_data,
    base_image="registry.redhat.io/ubi8/python-39",
    packages_to_install=['pandas'],)
preprocess_data_step = components.create_component_from_func(
    func=preprocess_data,
    base_image="registry.redhat.io/ubi8/python-39",
    packages_to_install=['pandas'],)
train_model_A_step = components.create_component_from_func(
    func=train_model_A,
    base_image="registry.redhat.io/ubi8/python-39",
    packages_to_install=['pandas'],)
train_model_B_step = components.create_component_from_func(
    func=train_model_B,
    base_image="registry.redhat.io/ubi8/python-39",
    packages_to_install=['pandas'],)

## Structure the Pipeline DAG

In [None]:
@dsl.pipeline(name="pipeline-test")
def world_cup_pipeline():
    ingest_and_process_task = ingest_data_step()
    preprocess_data_task = preprocess_data_step(ingest_and_process_task.output)
    train_model_A_task = train_model_A_step(preprocess_data_task.output)
    train_model_B_task = train_model_B_step(preprocess_data_task.output)

## Deploy the pipeline

### Creating the pipeline definition from the compiler

In [None]:
from kfp_tekton.compiler import TektonCompiler
TektonCompiler().compile(
    pipeline_func = world_cup_pipeline,
    package_path = ('data-science-pipelines.yaml'))

### Connect to the Data Science Pipelines server and submit a run

In [None]:
from kfp_tekton import TektonClient
#kfp_client=TektonClient('http://ds-pipeline-kfpv1.apps.dsp-demo.i4g9.p1.openshiftapps.com')
kfp_client=TektonClient('https://ds-pipeline-ui-kfpv1.apps.dsp-demo.i4g9.p1.openshiftapps.com/')
run_id = kfp_client.create_run_from_pipeline_func(world_cup_pipeline, arguments={}).run_id
print("Run ID: ", run_id)