### Importing Modules

In [1]:
from typing import NamedTuple

import kfp
import kfp.components as components
import kfp.dsl as dsl
from kfp.components import InputPath, OutputPath


NAMESPACE = "kubeflow-user-example-com"

BUCKET_NAME = "pranava-data"

# DATASET_URL = "https://www.kaggle.com/uciml/iris/download"

# base image to build custom components
BASE_IMAGE = "mesosphere/kubeflow:1.2.0-1.1.0-tensorflow-2.4.0"

### Building Pipeline Components

In [2]:
def get_data(input_path: str) -> str:
    """
    gets data from s3 and downloads the dataset
    :param input_path: Bucket to upload to
    :return: path to dataset
    """
    
    import boto3
    
    s3 = boto3.client('s3')
    s3.download_file(input_path, "Iris.csv", "Iris.csv")
    
    return input_path

In [3]:
def pre_process_data(input_path: str) -> str:
    """
    pre process the data and save the model to s3
    :param input_path: location of dataset
    :return: path to Model file
    """
    
    import boto3
    import pandas as pd
    from sklearn.tree import DecisionTreeClassifier
    from sklearn import tree
    import joblib
    
    s3 = boto3.client('s3')
    s3.download_file(input_path, "Iris.csv", "Iris.csv")
    
    iris = pd.read_csv('Iris.csv')
    
    iris.drop('Id',axis=1,inplace=True)
    
    x = iris.drop(columns='Species')
    y = iris['Species']
    
    classifier = DecisionTreeClassifier()
    classifier.fit(x,y)
    
    joblib.dump(classifier, "iris-model-1")
    
    s3.upload_file("iris-model-1", input_path, "iris-model-1")
    
    return input_path

In [4]:
def predict_model(input_path: str):
    """
    loads the model and displays the predicted results
    :param input_path: location of model file
    :return: None
    """
    
    import boto3
    import pandas as pd
    from sklearn.tree import DecisionTreeClassifier
    from sklearn import tree
    import joblib
    
    s3 = boto3.client('s3')
    s3.download_file(input_path, "iris-model-1", "iris-model-1")
    
    model = joblib.load("iris-model-1")
    
    print(model.predict([["5.1","3.5", "1.4", "0.2"]]))
    print(model.predict([["6.4", "3.2", "4.5", "1.5"]]))
    print(model.predict([["6.7","3.3", "5.7", "2.5"]]))
    

### creating components from the above functions

In [5]:
getDataOP = components.func_to_container_op(get_data, base_image=BASE_IMAGE)
preProcessDataOP = components.func_to_container_op(pre_process_data, base_image=BASE_IMAGE)
predictModelOP = components.func_to_container_op(predict_model, base_image=BASE_IMAGE)

### defining the pipeline

In [6]:
@dsl.pipeline(
    name="Sample Hello world pipeline",
    description="A sample pipeline to demonstrate multi-step model training, evaluation, export using Iris data classification",
)
def sample_pipeline(input_path: str = BUCKET_NAME):
    path = getDataOP(input_path).output
    path = preProcessDataOP(input_path=path).output
    predictModelOP(input_path=path)

### connect to kfp client

In [7]:
client = kfp.Client()

### create an experiment

In [8]:
experiment = client.create_experiment(name="sample-iris-pipeline", description="A sample pipeline to demonstrate multi-step model training, evaluation, export using Iris data classification",namespace=NAMESPACE) 

### create run

In [9]:
client.create_run_from_pipeline_func(
        sample_pipeline, arguments={}, run_name="sample-iris-demo-run-1", experiment_name="sample-iris-pipeline")

RunPipelineResult(run_id=aa7a38d1-a751-413f-8474-2bde54442c7e)