In [None]:
import kfp
from kfp import dsl
from kfp.dsl import Output, Metrics, ClassificationMetrics

from typing import NamedTuple


@dsl.component(
    base_image='python:3.10',
    packages_to_install=['minio', 'numpy', 'tensorflow']
)
def upload_data_batch() -> NamedTuple('Outputs', [('datapoints_training', float),('datapoints_test', float),('dataset_version', str)]):
    """
    Function to upload dataset and load it to minio bucket
    """
    print("uploading data")
    from tensorflow import keras
    from minio import Minio
    import numpy as np
    import json

    minio_client = Minio(
        "10.80.0.197:9000",
        access_key="minio",
        secret_key="minio123",
        secure=False
    )
    minio_bucket = "mlpipeline"
    
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    np.save("/tmp/x_train.npy", x_train)
    np.save("/tmp/y_train.npy", y_train)
    np.save("/tmp/x_test.npy", x_test)
    np.save("/tmp/y_test.npy", y_test)
    
    try:
        minio_client.fput_object(minio_bucket, "x_train.npy", "/tmp/x_train.npy")
        minio_client.fput_object(minio_bucket, "y_train.npy", "/tmp/y_train.npy")
        minio_client.fput_object(minio_bucket, "x_test.npy", "/tmp/x_test.npy")
        minio_client.fput_object(minio_bucket, "y_test.npy", "/tmp/y_test.npy")
    except Exception as e:
        print(f"Datasets already exist: {e}")
    
    dataset_version = "1.0"
    
    from collections import namedtuple
    outputs_tuple = namedtuple('Outputs', ['datapoints_training', 'datapoints_test', 'dataset_version'])
    return outputs_tuple(float(x_train.shape[0]), float(x_test.shape[0]), dataset_version)


@dsl.component(
    base_image='python:3.10',
    packages_to_install=['minio', 'numpy', 'tensorflow']
)
def get_data_batch() -> NamedTuple('Outputs', [('datapoints_training', float),('datapoints_test', float),('dataset_version', str)]):
    """
    Function to get dataset and load it to minio bucket
    """
    print("getting data")
    from tensorflow import keras
    from minio import Minio
    import numpy as np
    import json

    minio_client = Minio(
        "10.80.0.197:9000",
        access_key="minio",
        secret_key="minio123",
        secure=False
    )
    minio_bucket = "mlpipeline"
    
    files = ["x_train.npy", "y_train.npy", "x_test.npy", "y_test.npy"]
    for file in files:
        minio_client.fget_object(minio_bucket, file, f"/tmp/{file}")

    x_train = np.load("/tmp/x_train.npy")
    y_train = np.load("/tmp/y_train.npy")
    x_test = np.load("/tmp/x_test.npy")
    y_test = np.load("/tmp/y_test.npy")
    
    dataset_version = "1.0"
    
    print(f"x_train shape: {x_train.shape}")
    print(f"y_train shape: {y_train.shape}")

    print(f"x_test shape: {x_test.shape}")
    print(f"y_test shape: {y_test.shape}")
    
    from collections import namedtuple
    outputs_tuple = namedtuple('Outputs', ['datapoints_training', 'datapoints_test', 'dataset_version'])
    return outputs_tuple(float(x_train.shape[0]), float(x_test.shape[0]), dataset_version)
    
@dsl.component(
    base_image='python:3.10'
)
def get_latest_data():
    """
    Dummy functions for showcasing
    """
    print("Adding latest data")
    

@dsl.component(
    base_image='python:3.10',
    packages_to_install=['minio', 'numpy']
)
def reshape_data():
    """
    Reshape the data for model building
    """
    print("reshaping data")
    
    from minio import Minio
    import numpy as np

    minio_client = Minio(
        "10.80.0.197:9000",
        access_key="minio",
        secret_key="minio123",
        secure=False
    )
    minio_bucket = "mlpipeline"
    
    minio_client.fget_object(minio_bucket,"x_train.npy","/tmp/x_train.npy")
    x_train = np.load("/tmp/x_train.npy")
    
    minio_client.fget_object(minio_bucket,"x_test.npy","/tmp/x_test.npy")
    x_test = np.load("/tmp/x_test.npy")
    
    x_train = x_train.reshape(-1,28,28,1)
    x_test = x_test.reshape(-1,28,28,1)

    x_train = x_train / 255
    x_test = x_test / 255
    
    np.save("/tmp/x_train_reshaped.npy", x_train)
    minio_client.fput_object(minio_bucket, "x_train_reshaped.npy", "/tmp/x_train_reshaped.npy")
    
    np.save("/tmp/x_test_reshaped.npy", x_test)
    minio_client.fput_object(minio_bucket, "x_test_reshaped.npy", "/tmp/x_test_reshaped.npy")
    print("Datos remodelados y guardados como x_train_reshaped.npy y x_test_reshaped.npy")

@dsl.component(
    base_image='python:3.9',
    packages_to_install=['minio', 'numpy', 'tensorflow==2.13.0']
)
def model_building(
    no_epochs: int,
    optimizer: str,
    metrics: Output[Metrics],
    classification_metrics: Output[ClassificationMetrics]
):
    """
    Construye, entrena, evalúa, guarda el modelo y registra las métricas.
    """
    from tensorflow import keras
    import tensorflow as tf
    from minio import Minio
    import numpy as np
    import os
    
    print("Iniciando step: model_building")
    minio_client = Minio("10.80.0.197:9000", access_key="minio", secret_key="minio123", secure=False)
    minio_bucket = "mlpipeline"

    minio_client.fget_object(minio_bucket, "x_train_reshaped.npy", "/tmp/x_train.npy")
    x_train = np.load("/tmp/x_train.npy")
    minio_client.fget_object(minio_bucket, "y_train.npy", "/tmp/y_train.npy")
    y_train = np.load("/tmp/y_train.npy")
    minio_client.fget_object(minio_bucket, "x_test_reshaped.npy", "/tmp/x_test.npy")
    x_test = np.load("/tmp/x_test.npy")
    minio_client.fget_object(minio_bucket, "y_test.npy", "/tmp/y_test.npy")
    y_test = np.load("/tmp/y_test.npy")
    print("Datos de entrenamiento descargados.")

    model = keras.models.Sequential([
        keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28,28,1)),
        keras.layers.MaxPool2D(2, 2),
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=['accuracy'])
    model.summary()

    print(f"Iniciando entrenamiento por {no_epochs} épocas...")
    model.fit(x_train, y_train, epochs=no_epochs, batch_size=32, verbose=1)
    
    model_loss, model_accuracy = model.evaluate(x_test, y_test)
    metrics.log_metric("accuracy", float(model_accuracy))
    metrics.log_metric("loss", float(model_loss))

    predictions = np.argmax(model.predict(x_test), axis=1)
    cm = tf.math.confusion_matrix(labels=y_test, predictions=predictions).numpy()
    classification_metrics.log_confusion_matrix([str(i) for i in range(10)], cm.tolist())
    print("Métricas registradas en la UI de Kubeflow.")

    model.save("/tmp/detect-digits")
    
    def upload_dir(local_path, bucket, minio_path):
        for root, _, files in os.walk(local_path):
            for file in files:
                local_f = os.path.join(root, file)
                remote_f = os.path.join(minio_path, os.path.relpath(local_f, local_path))
                minio_client.fput_object(bucket, remote_f.replace(os.sep, "/"), local_f)
    
    upload_dir("/tmp/detect-digits", minio_bucket, "models/detect-digits/1/")
    print("Modelo guardado en MinIO.")


@dsl.pipeline(
    name='digits-recognizer-pipeline-sequential',
    description='Detect digits'
)
def output_test(no_epochs:int, optimizer:str):

    comp_upload_data_batch = upload_data_batch()
    comp_get_data_batch = get_data_batch()
    comp_get_latest_data = get_latest_data()
    comp_reshape_data = reshape_data()
    comp_model_building = model_building(no_epochs=no_epochs,optimizer=optimizer)

    step1 = comp_upload_data_batch
    
    step2 = comp_get_data_batch
    step2.after(step1)
    
    step3 = comp_get_latest_data
    step3.after(step2)
    
    step4 = comp_reshape_data
    step4.after(step3)
    
    step5 = comp_model_building
    step5.after(step4)


if __name__ == "__main__":
    client = kfp.Client()

    arguments = {
        "no_epochs": 1,
        "optimizer": "adam"
    }
    kfp.compiler.Compiler().compile(pipeline_func=output_test,package_path='pipeline_compile.yaml')