### **02_model_training.ipynb**
### **Model Training Pipeline**

* ##### 01 - Install packages
* ##### 02 - Import packages
* ##### 03 - Create tasks
* ##### 04 - Create pipeline
* ##### 05 - Create pipeline yaml
* ##### 06 - Create pipeline run
* ##### 07 - Run inference

### 01 - Install packages

In [None]:
!pip install kfp-tekton==1.5.9 requests==2.31.0 tensorflow==2.15.0

### 02 - Import packages

In [None]:
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))

import json
import kfp
import kfp_tekton
import requests
import tensorflow as tf

from PIL import Image

from components.create_model     import create_model
from components.delete_artifacts import delete_artifacts
from components.download_dataset import download_dataset
from components.evaluate_model   import evaluate_model
from components.prepare_dataset  import prepare_dataset
from components.train_model      import train_model
from components.upload_artifacts import upload_artifacts
from components.upload_model     import upload_model

### 03 - Create tasks

In [None]:
task_base_image = 'registry.access.redhat.com/ubi9/python-311'

In [None]:
download_dataset_op = kfp.components.create_component_from_func(
    func       = download_dataset,
    base_image = task_base_image
)

In [None]:
prepare_dataset_op = kfp.components.create_component_from_func(
    func       = prepare_dataset,
    base_image = task_base_image
)

In [None]:
create_model_op = kfp.components.create_component_from_func(
    func                = create_model,
    base_image          = task_base_image,
    packages_to_install = ['tensorflow==2.15.0']
)

In [None]:
train_model_op = kfp.components.create_component_from_func(
    func                = train_model,
    base_image          = task_base_image,
    packages_to_install = ['tensorflow==2.15.0']
)

In [None]:
evaluate_model_op = kfp.components.create_component_from_func(
    func                = evaluate_model,
    base_image          = task_base_image,
    packages_to_install = ['tensorflow==2.15.0']
)

In [None]:
upload_artifacts_op = kfp.components.create_component_from_func(
    func                = upload_artifacts,
    base_image          = task_base_image,
    packages_to_install = ['boto3==1.34.28']
)

In [None]:
upload_model_op = kfp.components.create_component_from_func(
    func                = upload_model,
    base_image          = task_base_image,
    packages_to_install = ['boto3==1.34.28', 'openvino==2023.3.0']
)

In [None]:
delete_artifacts_op = kfp.components.create_component_from_func(
    func       = delete_artifacts,
    base_image = task_base_image
)

### 04 - Create pipeline

In [None]:
pipeline_name        = '02_model_training'
pipeline_description = 'Model Training Pipeline'

In [None]:
@kfp.dsl.pipeline(
    name        = pipeline_name,
    description = pipeline_description
)
def pipeline(
    s3_service_name      : str,
    s3_endpoint_url      : str,
    s3_access_key_id     : str,
    s3_secret_access_key : str,
    s3_region            : str,
    s3_bucket            : str
):

    import os

    create_pvc_task = kfp.dsl.VolumeOp(
        name          = 'create_pvc',
        resource_name = 'pvc',
        size          = '1Gi',
        modes         = kfp.dsl.VOLUME_MODE_RWO
    )

    pvc_directory = os.path.join('/', 'pipeline')
    pvc_volume    = create_pvc_task.volume

    download_dataset_task = download_dataset_op()
    download_dataset_task.add_pvolumes({ pvc_directory : pvc_volume.after(create_pvc_task) })

    prepare_dataset_task = prepare_dataset_op()
    prepare_dataset_task.add_pvolumes({ pvc_directory : pvc_volume.after(download_dataset_task) })

    create_model_task = create_model_op()
    create_model_task.add_pvolumes({ pvc_directory : pvc_volume.after(create_pvc_task) })

    train_model_task = train_model_op()
    train_model_task.add_pvolumes({ pvc_directory : pvc_volume.after(prepare_dataset_task).after(create_model_task) })

    evaluate_model_task = evaluate_model_op()
    evaluate_model_task.add_pvolumes({ pvc_directory : pvc_volume.after(train_model_task) })

    upload_artifacts_task = upload_artifacts_op(
        s3_service_name      = s3_service_name,
        s3_endpoint_url      = s3_endpoint_url,
        s3_access_key_id     = s3_access_key_id,
        s3_secret_access_key = s3_secret_access_key,
        s3_region            = s3_region,
        s3_bucket            = s3_bucket
    )
    upload_artifacts_task.add_pvolumes({ pvc_directory : pvc_volume.after(evaluate_model_task) })

    upload_model_task = upload_model_op(
        s3_service_name      = s3_service_name,
        s3_endpoint_url      = s3_endpoint_url,
        s3_access_key_id     = s3_access_key_id,
        s3_secret_access_key = s3_secret_access_key,
        s3_region            = s3_region,
        s3_bucket            = s3_bucket
    )
    upload_model_task.add_pvolumes({ pvc_directory : pvc_volume.after(evaluate_model_task) })

    delete_artifacts_task = delete_artifacts_op()
    delete_artifacts_task.add_pvolumes({ pvc_directory : pvc_volume.after(upload_artifacts_task).after(upload_model_task) })

### 05 - Create pipeline yaml

In [None]:
pipeline_package_path = os.path.join('yaml', f'{ pipeline_name }.yaml')

In [None]:
kfp_tekton.compiler.TektonCompiler().compile(
    pipeline_func = pipeline,
    package_path  = pipeline_package_path
)

### 06 - Create pipeline run

In [None]:
kubeflow_host  = '<kubeflow_host>'
kubeflow_token = '<kubeflow_token>'

In [None]:
pipeline_arguments = {
    's3_service_name'      : 's3',
    's3_endpoint_url'      : '<s3_endpoint_url>',
    's3_access_key_id'     : '<s3_access_key_id>',
    's3_secret_access_key' : '<s3_secret_access_key>',
    's3_region'            : '<s3_region>',
    's3_bucket'            : '<s3_bucket>',
}

In [None]:
kfp_tekton.TektonClient(host = kubeflow_host, existing_token = kubeflow_token).create_run_from_pipeline_package(
    pipeline_file = pipeline_package_path,
    arguments     = pipeline_arguments
)

### 07 - Run inference

In [None]:
inference_endpoint       = '<inference_endpoint>'
inference_endpoint_token = '<inference_endpoint_token>'

In [None]:
def run_inference(image):

    Image.open(image).show()

    image = tf.keras.utils.load_img(image, target_size = (160, 160))
    image = tf.keras.utils.img_to_array(image)
    image = image.tolist()
    image = [image]

    headers = {
        'Content-Type'  : 'application/json',
        'Authorization' : f'Bearer { inference_endpoint_token }'
    }

    payload = json.dumps({
        'inputs' : [
            {
                'name'     : 'layer_0_input',
                'datatype' : 'FP32',
                'data'     : image,
                'shape'    : [1, 160, 160, 3]
            }
        ]
    })

    response = requests.post(url = inference_endpoint, headers = headers, data = payload)
    response = json.loads(response.text)

    prediction_score = response['outputs'][0]['data'][0]
    prediction       = 'cat' if prediction_score < 0.5 else 'dog'

    print(f'prediction : { prediction }')
    print(f'score      : { prediction_score }')

In [None]:
run_inference('images/cat.png')

In [None]:
run_inference('images/dog.png')