In [None]:
import kfp
from kfp import dsl
from kfp.dsl import Output, Metrics, HTML

from typing import NamedTuple


@dsl.component(
    base_image='python:3.10',
    packages_to_install=['minio', 'numpy', 'tensorflow']
)
def upload_data_batch(
    minio_endpoint: str,
    minio_access_key: str,
    minio_secret_key: str,
    minio_bucket: str
) -> NamedTuple('Outputs', [('datapoints_training', float),('datapoints_test', float),('dataset_version', str)]):
    """
    Function to upload dataset and load it to minio bucket
    """
    print("uploading data")
    from tensorflow import keras
    from minio import Minio
    import numpy as np
    import json

    minio_client = Minio(
        endpoint=minio_endpoint, 
        access_key=minio_access_key,
        secret_key=minio_secret_key,
        secure=False
    )
    
    # Load MNIST dataset directly from Keras
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    # save to numpy file, store in Minio (in your original context, you would store in Minio here)
    np.save("/tmp/x_train.npy", x_train)
    np.save("/tmp/y_train.npy", y_train)
    np.save("/tmp/x_test.npy", x_test)
    np.save("/tmp/y_test.npy", y_test)
    
    try:
        minio_client.fput_object(minio_bucket, "x_train.npy", "/tmp/x_train.npy")
        minio_client.fput_object(minio_bucket, "y_train.npy", "/tmp/y_train.npy")
        minio_client.fput_object(minio_bucket, "x_test.npy", "/tmp/x_test.npy")
        minio_client.fput_object(minio_bucket, "y_test.npy", "/tmp/y_test.npy")
    except Exception as e:
        print(f"Datasets already exist: {e}")
    
    dataset_version = "1.0"
    
    from collections import namedtuple
    outputs_tuple = namedtuple('Outputs', ['datapoints_training', 'datapoints_test', 'dataset_version'])
    return outputs_tuple(float(x_train.shape[0]), float(x_test.shape[0]), dataset_version)


@dsl.component(
    base_image='python:3.10',
    packages_to_install=['minio', 'numpy', 'tensorflow']
)
def get_data_batch(
    minio_endpoint: str,
    minio_access_key: str,
    minio_secret_key: str,
    minio_bucket: str
) -> NamedTuple('Outputs', [('datapoints_training', float),('datapoints_test', float),('dataset_version', str)]):
    """
    Function to get dataset and load it to minio bucket
    """
    print("getting data")
    from tensorflow import keras
    from minio import Minio
    import numpy as np
    import json

    minio_client = Minio(
        endpoint=minio_endpoint,
        access_key=minio_access_key,
        secret_key=minio_secret_key,
        secure=False
    )
    
    files = ["x_train.npy", "y_train.npy", "x_test.npy", "y_test.npy"]
    for file in files:
        minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name=file,
        file_path=f"/tmp/{file}"
    )

    # Load data from numpy files
    x_train = np.load("/tmp/x_train.npy")
    y_train = np.load("/tmp/y_train.npy")
    x_test = np.load("/tmp/x_test.npy")
    y_test = np.load("/tmp/y_test.npy")
    
    dataset_version = "1.0"
    
    print(f"x_train shape: {x_train.shape}")
    print(f"y_train shape: {y_train.shape}")

    print(f"x_test shape: {x_test.shape}")
    print(f"y_test shape: {y_test.shape}")
    
    from collections import namedtuple
    outputs_tuple = namedtuple('Outputs', ['datapoints_training', 'datapoints_test', 'dataset_version'])
    return outputs_tuple(float(x_train.shape[0]), float(x_test.shape[0]), dataset_version)
    
@dsl.component(
    base_image='python:3.10'
)
def get_latest_data():
    """
    Dummy functions for showcasing
    """
    print("Adding latest data")
    

@dsl.component(
    base_image='python:3.10',
    packages_to_install=['minio', 'numpy']
)
def reshape_data(
    minio_endpoint: str,
    minio_access_key: str,
    minio_secret_key: str,
    minio_bucket: str
):
    """
    Reshape the data for model building
    """
    print("reshaping data")
    
    from minio import Minio
    import numpy as np

    minio_client = Minio(
        endpoint=minio_endpoint, 
        access_key=minio_access_key,
        secret_key=minio_secret_key,
        secure=False
    )
    
    # load data from minio
    minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name="x_train.npy",
        file_path="/tmp/x_train.npy"
    )
    x_train = np.load("/tmp/x_train.npy")
    
    minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name="x_test.npy",
        file_path="/tmp/x_test.npy"
    )
    x_test = np.load("/tmp/x_test.npy")
    
    # reshaping the data
    # reshaping pixels in a 28x28px image with greyscale, canal = 1. This is needed for the Keras API
    x_train = x_train.reshape(-1,28,28,1)
    x_test = x_test.reshape(-1,28,28,1)

    # normalizing the data
    # each pixel has a value between 0-255. Here we divide by 255, to get values from 0-1
    x_train = x_train / 255
    x_test = x_test / 255
    
    # save data from minio
    np.save("/tmp/x_train.npy",x_train)
    minio_client.fput_object(
        bucket_name=minio_bucket,
        object_name="x_train.npy",
        file_path="/tmp/x_train.npy"
    )
    
    np.save("/tmp/x_test.npy",x_test)
    minio_client.fput_object(
        bucket_name=minio_bucket,
        object_name="x_test.npy",
        file_path="/tmp/x_test.npy"
    )

@dsl.component(
    base_image='tensorflow/tensorflow:2.13.0',
    packages_to_install=['minio', 'pandas', 'numpy']
)
def model_building(
    metrics: Output[Metrics],
    ui_metadata: Output[HTML],
    minio_endpoint: str,
    minio_access_key: str,
    minio_secret_key: str,
    minio_bucket: str,
    no_epochs:int = 1,
    optimizer: str = "adam"):
    """
    Build the model with Keras API
    Export model parameters
    """
    from tensorflow import keras
    import tensorflow as tf
    from minio import Minio
    import numpy as np
    import pandas as pd
    import json
    
    minio_client = Minio(
        endpoint=minio_endpoint,
        access_key=minio_access_key,
        secret_key=minio_secret_key,
        secure=False
    )
    
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28,28,1)))
    model.add(keras.layers.MaxPool2D(2, 2))

    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))

    model.add(keras.layers.Dense(32, activation='relu'))

    model.add(keras.layers.Dense(10, activation='softmax')) #output are 10 classes, numbers from 0-9

    #show model summary - how it looks
    stringlist = []
    model.summary(print_fn=lambda x: stringlist.append(x))
    metric_model_summary = "\n".join(stringlist)
    
    #compile the model - we want to have a binary outcome
    model.compile(optimizer=optimizer,
              loss="sparse_categorical_crossentropy",
              metrics=['accuracy'])
    
    minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name="x_train.npy",
        file_path="/tmp/x_train.npy"
    )
    x_train = np.load("/tmp/x_train.npy")
    
    minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name="y_train.npy",
        file_path="/tmp/y_train.npy"
    )
    y_train = np.load("/tmp/y_train.npy")
    
    #fit the model and return the history while training
    history = model.fit(
      x=x_train,
      y=y_train,
      epochs=no_epochs,
      batch_size=20,
    )
    
    minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name="x_test.npy",
        file_path="/tmp/x_test.npy"
    )
    x_test = np.load("/tmp/x_test.npy")
    
    minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name="y_test.npy",
        file_path="/tmp/y_test.npy"
    )
    y_test = np.load("/tmp/y_test.npy")
    

    # Test the model against the test dataset
    # Returns the loss value & metrics values for the model in test mode.
    model_loss, model_accuracy = model.evaluate(x=x_test,y=y_test)
    
    # Confusion Matrix

    # Generates output predictions for the input samples.
    test_predictions = model.predict(x=x_test)

    # Returns the indices of the maximum values along an axis.
    test_predictions = np.argmax(test_predictions,axis=1) # the prediction outputs 10 values, we take the index number of the highest value, which is the prediction of the model

    # generate confusion matrix
    confusion_matrix = tf.math.confusion_matrix(labels=y_test,predictions=test_predictions)
    confusion_matrix = confusion_matrix.numpy()
    vocab = list(np.unique(y_test))
    data = []
    for target_index, target_row in enumerate(confusion_matrix):
        for predicted_index, count in enumerate(target_row):
            data.append((vocab[target_index], vocab[predicted_index], count))

    df_cm = pd.DataFrame(data, columns=['target', 'predicted', 'count'])
    cm_csv = df_cm.to_csv(header=False, index=False)
    
    metadata_dict = {
        "outputs": [
            {
                "type": "confusion_matrix",
                "format": "csv",
                "schema": [
                    {'name': 'target', 'type': 'CATEGORY'},
                    {'name': 'predicted', 'type': 'CATEGORY'},
                    {'name': 'count', 'type': 'NUMBER'},
                  ],
                "target_col" : "actual",
                "predicted_col" : "predicted",
                "source": cm_csv,
                "storage": "inline",
                "labels": [0,1,2,3,4,5,6,7,8,9]
            },
            {
                'storage': 'inline',
                'source': '''# Model Overview
## Model Summary

```
{}
```

## Model Performance

**Accuracy**: {}
**Loss**: {}

'''.format(metric_model_summary,model_accuracy,model_loss),
                'type': 'markdown',
            }
        ]
    }
    
    metrics_dict = {
      'metrics': [{
          'name': 'model_accuracy',
          'numberValue':  float(model_accuracy),
          'format' : "PERCENTAGE"
        },{
          'name': 'model_loss',
          'numberValue':  float(model_loss),
          'format' : "PERCENTAGE"
        }]}
    
    ### Save model to minIO
    
    keras.models.save_model(model,"/tmp/detect-digits")
    
    from minio import Minio
    import os

    minio_client = Minio(
        endpoint=minio_endpoint,
        access_key=minio_access_key,
        secret_key=minio_secret_key,
        secure=False
    )


    import glob

    def upload_local_directory_to_minio(local_path, bucket_name, minio_path):
        assert os.path.isdir(local_path)

        for local_file in glob.glob(local_path + '/**'):
            local_file = local_file.replace(os.sep, "/")
            if not os.path.isfile(local_file):
                upload_local_directory_to_minio(
                    local_file, bucket_name, minio_path + "/" + os.path.basename(local_file))
            else:
                remote_path = os.path.join(
                    minio_path, local_file[1 + len(local_path):])
                remote_path = remote_path.replace(
                    os.sep, "/")
                minio_client.fput_object(bucket_name, remote_path, local_file)

    upload_local_directory_to_minio("/tmp/detect-digits",minio_bucket,"models/detect-digits/1/")
    
    print("Saved model to minIO")
    
    with open(metrics.path, "w") as f:
        json.dump(metrics_dict, f)
    
    with open(ui_metadata.path, "w") as f:
        json.dump(metadata_dict, f)

@dsl.component(    
    base_image='python:3.10',
    packages_to_install=['kubernetes', 'kserve']
)
def model_serving():
    """
    Create kserve instance
    """
    from kubernetes import client 
    from kserve import KServeClient
    from kserve import constants
    from kserve import utils
    from kserve import V1beta1InferenceService
    from kserve import V1beta1InferenceServiceSpec
    from kserve import V1beta1PredictorSpec
    from kserve import V1beta1TFServingSpec

    namespace = utils.get_default_target_namespace()

    name='digits-recognizer-epochs'
    kserve_version='v1beta1'
    api_version = constants.KSERVE_GROUP + '/' + kserve_version

    isvc = V1beta1InferenceService(api_version=api_version,
                                   kind="InferenceService",
                                   metadata=client.V1ObjectMeta(
                                       name=name, namespace=namespace, annotations={'sidecar.istio.io/inject':'false'}),
                                   spec=V1beta1InferenceServiceSpec(
                                   predictor=V1beta1PredictorSpec(
                                       service_account_name="sa-minio-kserve",
                                       tensorflow=(V1beta1TFServingSpec(
                                           storage_uri="s3://mlpipeline/models/detect-digits/"))))
    )

    KServe = KServeClient()
    KServe.create(isvc)



@dsl.pipeline(
    name='digits-recognizer-pipeline-sequential',
    description='Detect digits'
)
def output_test(
    no_epochs:int, 
    optimizer:str,
    minio_endpoint: str = "minio-service.kubeflow:9000",
    minio_access_key: str = "minio",
    minio_secret_key: str = "minio123",
    minio_bucket: str = "mlpipeline"
):

    comp_upload_data_batch = upload_data_batch(
        minio_endpoint=minio_endpoint,
        minio_access_key=minio_access_key,
        minio_secret_key=minio_secret_key,
        minio_bucket=minio_bucket
    )
    comp_get_data_batch = get_data_batch(
        minio_endpoint=minio_endpoint,
        minio_access_key=minio_access_key,
        minio_secret_key=minio_secret_key,
        minio_bucket=minio_bucket
    )
    comp_get_latest_data = get_latest_data()
    comp_reshape_data = reshape_data(
        minio_endpoint=minio_endpoint,
        minio_access_key=minio_access_key,
        minio_secret_key=minio_secret_key,
        minio_bucket=minio_bucket
    )
    comp_model_building = model_building(
        no_epochs=no_epochs,
        optimizer=optimizer,
        minio_endpoint=minio_endpoint,
        minio_access_key=minio_access_key,
        minio_secret_key=minio_secret_key,
        minio_bucket=minio_bucket
    )
    comp_model_serving = model_serving()

    step1 = comp_upload_data_batch
    
    step2 = comp_get_data_batch
    step2.after(step1)
    
    step3 = comp_get_latest_data
    step3.after(step2)
    
    step4 = comp_reshape_data
    step4.after(step3)
    
    step5 = comp_model_building
    step5.after(step4)
    
    step6 = comp_model_serving
    step6.after(step5)


if __name__ == "__main__":
    import kfp
    
    try:
        client = kfp.Client()

        experiment_name = "epochs"
        try:
            experiment = client.create_experiment(name=experiment_name)
        except Exception:
            experiment = client.get_experiment(experiment_name=experiment_name)
        
        exp_id = getattr(experiment, 'experiment_id', getattr(experiment, 'id', None))
        print(f"Using Experiment ID: {exp_id}")

        arguments = {
            "no_epochs": 2,
            "optimizer": "adam",
            "minio_endpoint": "minio-service.kubeflow:9000",
            "minio_access_key": "minio",
            "minio_secret_key": "minio123",
            "minio_bucket": "mlpipeline"
        }

        run_directly = 1
        
        if (run_directly == 1):
            run_result = client.create_run_from_pipeline_func(
                output_test,
                arguments=arguments,
                experiment_id=exp_id, 
                run_name="digits-recognizer-pipeline-epochs",
                enable_caching=False,
                service_account="default-editor"
            )
            print(f"Run created successfully. Run ID: {getattr(run_result, 'run_id', 'unknown')}")
            
        else:
            kfp.compiler.Compiler().compile(pipeline_func=output_test, package_path='output_test_epochs.yaml')
            client.upload_pipeline_version(
                pipeline_package_path='output_test_epochs.yaml',
                pipeline_version_name="0.5",
                pipeline_name='digits-recognizer-pipeline-epochs',
                description="just for testing"
            )
            print("Pipeline uploaded successfully.")

    except Exception as e:
        print(f"CRITICAL FAILURE: {e}")
        raise e
