# MNIST classification

In [15]:
EXPERIMENT_NAME = 'MNIST-Classification'

## Import package

In [16]:
import kfp
from kfp.components import InputPath, InputTextFile, OutputPath, OutputTextFile
import kfp.dsl as dsl
from kfp.components import func_to_container_op
from constants import NAMESPACE, HOST
from utils import helpers

## Load commponents

In [35]:
download_data_op = kfp.components.load_component_from_file('../components/download_data/component.yaml')
help(download_data_op)


load_data_op = kfp.components.load_component_from_file('../components/load_data/component.yaml')
help(load_data_op)


preprocess_data_op = kfp.components.load_component_from_file('../components/preprocess_data/component.yaml')
help(preprocess_data_op)

train_op = kfp.components.load_component_from_file('../components/train/component.yaml')
help(train_op)

eval_op = kfp.components.load_component_from_file('../components/eval/component.yaml')
help(eval_op)

Help on function download_data:

download_data(download_link: str)
    download_data
    Download data from URL and save

Help on function load_data:

load_data(data_path: str)
    load_data
    Load data and combine

Help on function preprocess_data:

preprocess_data(load_data_path: str, test_size: float)
    preprocess_data
    preprocess data

Help on function train:

train(preprocess_data_path: str, hidden_dim_1: int, hidden_dim_2: int, dropout: float, learning_rate: float, epochs: int, batch_size: int)
    train
    train the model with input data

Help on function eval:

eval(preprocess_data_path: str, model_path: 'Model')
    eval
    evaluate the model with test data



## Build the pipeline

In [45]:
@dsl.pipeline(
  name='MNIST Classification Pipeline',
  description='Performs download, load, preprocessing and training'
)
def pipeline(hidden_dim_1, hidden_dim_2, dropout,
  learning_rate, epochs, batch_size, test_size):

  download_task = download_data_op(
  download_link='https://github.com/kubeflow/examples/blob/master/digit-recognition-kaggle-competition/data/{file}.csv.zip?raw=true'
  )
  
  load_task = load_data_op(
    data_path=download_task.outputs['data_path']
  )
  
  preprocess_task = preprocess_data_op(
    load_data_path=load_task.outputs['load_data_path'],
    test_size=test_size
  )
  
  train_task = train_op(
    preprocess_data_path=preprocess_task.outputs['preprocess_data_path'],
    hidden_dim_1=hidden_dim_1,
    hidden_dim_2=hidden_dim_2,
    dropout=dropout,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=batch_size
  )
  
  print(train_task.outputs.keys())
  
  eval_task = eval_op(
    preprocess_data_path=preprocess_task.outputs['preprocess_data_path'],
    model_path=train_task.outputs['model_path']
  )

## Compile the pipeline

In [46]:
pipeline_func = pipeline
pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'
import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename)

dict_keys(['Model Path', 'model_path'])


## Create a Kubeflow Experiment

In [47]:
client = kfp.Client(host=HOST, namespace=NAMESPACE)

try:
    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)
except:
    experiment = client.create_experiment(EXPERIMENT_NAME)
    
print(experiment)

{'created_at': datetime.datetime(2022, 12, 17, 16, 15, 21, tzinfo=tzutc()),
 'description': None,
 'id': '163e318b-4e6f-4c85-99bc-36a7b157dd2f',
 'name': 'MNIST-Classification',
 'resource_references': None,
 'storage_state': 'STORAGESTATE_AVAILABLE'}


In [48]:
arguments = {
    'hidden_dim_1': 56,
    'hidden_dim_2': 100,
    'dropout': 0.5,
    'learning_rate': 0.001,
    'epochs': 2,
    'batch_size': 64, 
    'test_size': 0.2
}

run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, 
                                 run_name, 
                                 pipeline_filename, 
                                 arguments)

print(experiment.id)
print(run_name)
print(pipeline_filename)
print(arguments)

163e318b-4e6f-4c85-99bc-36a7b157dd2f
pipeline run
pipeline.pipeline.zip
{'hidden_dim_1': 56, 'hidden_dim_2': 100, 'dropout': 0.5, 'learning_rate': 0.001, 'epochs': 2, 'batch_size': 64, 'test_size': 0.2}
