In [None]:
import kfp
from kfp import dsl
from kfp.dsl import Output, Metrics, HTML

from typing import NamedTuple


@dsl.component(
    base_image='python:3.10',
    packages_to_install=['minio', 'numpy', 'tensorflow']
)
def upload_test_data(
    minio_endpoint: str,
    minio_access_key: str,
    minio_secret_key: str,
    minio_bucket: str
) -> NamedTuple('Outputs', [('datapoints_training', float),('datapoints_test', float),('dataset_version', str)]):
    """
    Function to upload test dataset and load it to minio bucket
    """
    print("uploading test data")
    from tensorflow import keras
    from minio import Minio
    import numpy as np
    import json

    minio_client = Minio(
        endpoint=minio_endpoint, 
        access_key=minio_access_key,
        secret_key=minio_secret_key,
        secure=False
    )
    
    # Load MNIST dataset directly from Keras
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    # save to numpy file, store in Minio (in your original context, you would store in Minio here)
    np.save("/tmp/x_train.npy", x_train)
    np.save("/tmp/y_train.npy", y_train)
    np.save("/tmp/x_test.npy", x_test)
    np.save("/tmp/y_test.npy", y_test)
    
    try:
        minio_client.fput_object(minio_bucket, "x_train.npy", "/tmp/x_train.npy")
        minio_client.fput_object(minio_bucket, "y_train.npy", "/tmp/y_train.npy")
        minio_client.fput_object(minio_bucket, "x_test.npy", "/tmp/x_test.npy")
        minio_client.fput_object(minio_bucket, "y_test.npy", "/tmp/y_test.npy")
    except Exception as e:
        print(f"Datasets already exist: {e}")
    
    dataset_version = "1.0"
    
    from collections import namedtuple
    outputs_tuple = namedtuple('Outputs', ['datapoints_training', 'datapoints_test', 'dataset_version'])
    return outputs_tuple(float(x_train.shape[0]), float(x_test.shape[0]), dataset_version)


@dsl.component(
    base_image='python:3.10',
    packages_to_install=['minio', 'numpy', 'tensorflow']
)
def get_test_data(
    minio_endpoint: str,
    minio_access_key: str,
    minio_secret_key: str,
    minio_bucket: str
) -> NamedTuple('Outputs', [('datapoints_training', float),('datapoints_test', float),('dataset_version', str)]):
    """
    Function to get test dataset and load it to minio bucket
    """
    print("getting test data")
    from tensorflow import keras
    from minio import Minio
    import numpy as np
    import json

    minio_client = Minio(
        endpoint=minio_endpoint,
        access_key=minio_access_key,
        secret_key=minio_secret_key,
        secure=False
    )
    
    files = ["x_train.npy", "y_train.npy", "x_test.npy", "y_test.npy"]
    for file in files:
        minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name=file,
        file_path=f"/tmp/{file}"
    )

    # Cargar datos desde los archivos locales
    x_train = np.load("/tmp/x_train.npy")
    y_train = np.load("/tmp/y_train.npy")
    x_test = np.load("/tmp/x_test.npy")
    y_test = np.load("/tmp/y_test.npy")
    
    dataset_version = "1.0"
    
    print(f"x_train shape: {x_train.shape}")
    print(f"y_train shape: {y_train.shape}")

    print(f"x_test shape: {x_test.shape}")
    print(f"y_test shape: {y_test.shape}")
    
    from collections import namedtuple
    outputs_tuple = namedtuple('Outputs', ['datapoints_training', 'datapoints_test', 'dataset_version'])
    return outputs_tuple(float(x_train.shape[0]), float(x_test.shape[0]), dataset_version)


@dsl.component(
    base_image='python:3.10',
    packages_to_install=['minio', 'numpy']
)
def test_models(
    ui_metadata: Output[HTML],
    minio_endpoint: str,
    minio_access_key: str,
    minio_secret_key: str,
    minio_bucket: str
):
    """
    Use the test data to measure and check the functionality of the models
    """
    print("testing models")
    
    from minio import Minio
    import numpy as np
    import json

    minio_client = Minio(
        endpoint=minio_endpoint, 
        access_key=minio_access_key,
        secret_key=minio_secret_key,
        secure=False
    )
    
    # load data from minio
    minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name="x_train.npy",
        file_path="/tmp/x_train.npy"
    )
    x_train = np.load("/tmp/x_train.npy")
    
    minio_client.fget_object(
        bucket_name=minio_bucket,
        object_name="x_test.npy",
        file_path="/tmp/x_test.npy"
    )
    x_test = np.load("/tmp/x_test.npy")
    
    # reshaping the data
    # reshaping pixels in a 28x28px image with greyscale, canal = 1. This is needed for the Keras API
    x_train = x_train.reshape(-1,28,28,1)
    x_test = x_test.reshape(-1,28,28,1)

    # normalizing the data
    # each pixel has a value between 0-255. Here we divide by 255, to get values from 0-1
    x_train = x_train / 255
    x_test = x_test / 255
    
    # save data from minio
    np.save("/tmp/x_train.npy",x_train)
    minio_client.fput_object(
        bucket_name=minio_bucket,
        object_name="x_train.npy",
        file_path="/tmp/x_train.npy"
    )
    
    np.save("/tmp/x_test.npy",x_test)
    minio_client.fput_object(
        bucket_name=minio_bucket,
        object_name="x_test.npy",
        file_path="/tmp/x_test.npy"
    )

    metadata_dict = {}

    with open(ui_metadata.path, "w") as f:
        json.dump(metadata_dict, f)
        

@dsl.pipeline(
    name='test_digits-recognizer-pipeline',
    description='Test suite for detect digits'
)
def output_test(
    minio_endpoint: str = "minio-service.kubeflow:9000",
    minio_access_key: str = "minio",
    minio_secret_key: str = "minio123",
    minio_bucket: str = "mlpipeline"
):

    comp_upload_test_data = upload_test_data(
        minio_endpoint=minio_endpoint,
        minio_access_key=minio_access_key,
        minio_secret_key=minio_secret_key,
        minio_bucket=minio_bucket
    )
    comp_get_test_data = get_test_data(
        minio_endpoint=minio_endpoint,
        minio_access_key=minio_access_key,
        minio_secret_key=minio_secret_key,
        minio_bucket=minio_bucket
    )
    comp_test_models = test_models(
        minio_endpoint=minio_endpoint,
        minio_access_key=minio_access_key,
        minio_secret_key=minio_secret_key,
        minio_bucket=minio_bucket
    )

    step1 = comp_upload_test_data
    
    step2 = comp_get_test_data
    step2.after(step1)
    
    step3 = comp_test_models
    step3.after(step2)


if __name__ == "__main__":
    client = kfp.Client()

    arguments = {
        "minio_endpoint": "minio-service.kubeflow:9000",
        "minio_access_key": "minio",
        "minio_secret_key": "minio123",
        "minio_bucket": "mlpipeline"
    }

    run_directly = 1
    
    if (run_directly == 1):
        experiment_name = "test_suite"
        try:
            experiment = client.create_experiment(name=experiment_name)
        except Exception:
            experiment = client.get_experiment(experiment_name=experiment_name)
        
        exp_id = getattr(experiment, 'experiment_id', getattr(experiment, 'id', None))
        print(f"Using Experiment ID: {exp_id}")

        client.create_run_from_pipeline_func(
            output_test,
            arguments=arguments,
            experiment_id=exp_id,
            run_name="test-digits-recognizer-pipeline",
            service_account="default-editor",
            enable_caching=False
        )
    else:
        kfp.compiler.Compiler().compile(pipeline_func=output_test,package_path='output_test_sequential.yaml')
        client.upload_pipeline_version(pipeline_package_path='output_test.yaml',pipeline_version_name="0.4",pipeline_name='test-digits-recognizer-pipeline',description="just for testing")
