### importar librerías

In [1]:
import os
import random
from calendar import EPOCH

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

from alembic.command import history
from keras.src.metrics.metrics_utils import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, log_loss, confusion_matrix, roc_curve, ConfusionMatrixDisplay, RocCurveDisplay

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.python.keras.saving.saved_model.load import training_lib

IMAGES_DIR = "../data/papila-db/FundusImages/"

### obtener datos

In [2]:
import openpyxl
df_od = pd.read_excel("../data/papila-db/ClinicalData/patient_data_od.xlsx", header=1, skiprows=[2])
df_os = pd.read_excel("../data/papila-db/ClinicalData/patient_data_os.xlsx", header=1, skiprows=[2])

In [3]:
df_od

Unnamed: 0.1,Unnamed: 0,Age,Gender,Diagnosis,dioptre_1,dioptre_2,astigmatism,Phakic/Pseudophakic,Pneumatic,Perkins,Pachymetry,Axial_Length,VF_MD
0,#002,47,0,2,0.75,-1.75,90.0,0.0,21.0,,586.0,23.64,-0.07
1,#004,58,1,1,1.50,-1.75,85.0,0.0,,19.0,501.0,23.06,-3.26
2,#005,89,1,1,-0.75,-1.25,101.0,1.0,13.0,14.0,565.0,23.81,-14.98
3,#006,69,0,2,1.00,-1.50,95.0,0.0,22.0,,612.0,26.25,-2.07
4,#007,22,1,2,-0.25,0.00,0.0,0.0,14.0,,,23.39,-2.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,#289,64,0,0,0.50,-1.00,120.0,1.0,12.0,,529.0,22.27,
240,#290,75,1,0,0.25,-0.25,5.0,1.0,14.0,,577.0,22.00,
241,#291,55,0,0,1.25,-1.25,92.0,0.0,12.0,,452.0,23.53,
242,#292,56,1,0,1.75,-1.50,73.0,0.0,10.0,,499.0,23.68,


In [4]:
df_os

Unnamed: 0.1,Unnamed: 0,Age,Gender,Diagnosis,dioptre_1,dioptre_2,astigmatism,Phakic/Pseudophakic,Pneumatic,Perkins,Pachymetry,Axial_Length,VF_MD
0,#002,47,0,2,-0.50,-1.50,88.0,0.0,20.0,,603.0,23.77,0.17
1,#004,58,1,1,1.50,-2.50,85.0,1.0,,19.0,511.0,22.96,-6.77
2,#005,89,1,1,-0.50,-2.00,100.0,1.0,24.0,10.0,575.0,24.33,-7.44
3,#006,69,0,2,1.00,-1.50,85.0,0.0,22.0,,593.0,26.21,-3.31
4,#007,22,1,2,-0.25,-0.50,0.0,0.0,13.0,,,23.35,-2.61
...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,#289,64,0,0,0.75,-1.50,93.0,1.0,10.0,,531.0,22.31,
240,#290,75,1,0,0.25,-0.25,160.0,1.0,19.0,,573.0,22.01,
241,#291,55,0,0,1.50,-1.25,76.0,0.0,14.0,,443.0,23.51,
242,#292,56,1,0,1.25,-0.75,79.0,0.0,9.0,,479.0,23.84,


In [5]:
X_od = "RET" + df_od["Unnamed: 0"].astype(str).str[1:] + "OD.jpg"
y_od = df_od["Diagnosis"]

X_od, y_od

(0      RET002OD.jpg
 1      RET004OD.jpg
 2      RET005OD.jpg
 3      RET006OD.jpg
 4      RET007OD.jpg
            ...     
 239    RET289OD.jpg
 240    RET290OD.jpg
 241    RET291OD.jpg
 242    RET292OD.jpg
 243    RET293OD.jpg
 Name: Unnamed: 0, Length: 244, dtype: object,
 0      2
 1      1
 2      1
 3      2
 4      2
       ..
 239    0
 240    0
 241    0
 242    0
 243    0
 Name: Diagnosis, Length: 244, dtype: int64)

### cargar imagenes

In [6]:
import cv2

def load_image(file_name):
    try:
        # Construye la ruta de forma segura
        image_path = os.path.join(IMAGES_DIR, file_name)

        img = cv2.imread(image_path)

        # Verifica si la imagen se cargó correctamente ANTES de procesarla
        if img is None:
            print(f"Alerta: No se pudo cargar la imagen. Archivo no encontrado en: {image_path}")
            return None

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (644, 484))
        return img
    except Exception as e:
        # Usa un f-string para imprimir el error correctamente
        print(f"Error procesando la imagen '{file_name}': {e}")
        return None

In [7]:
# Código Corregido
# 1. Cargamos las imágenes en una lista temporal
images_list = [load_image(file_name) for file_name in X_od]

# 2. Filtramos los valores 'None' que resultaron de errores de carga
X_od_filtered = [img for img in images_list if img is not None]

# 3. Solo ahora creamos el array de NumPy con la lista limpia
X_od = np.array(X_od_filtered)
y_od = df_od["Diagnosis"].values

In [8]:
print(f"Dimensión de imágenes: {X_od.shape}")
print(f"Total categorías: {len(np.unique(y_od))}")

Dimensión de imágenes: (244, 484, 644, 3)
Total categorías: 3


### dividir en train y test

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X_od, y_od, test_size=0.1, random_state=42)

In [10]:
print(f"Tamaño del conjunto de entrenamiento: {len(X_train)}")
print(f"Tamaño del conjunto de prueba: {len(X_test)}")

Tamaño del conjunto de entrenamiento: 219
Tamaño del conjunto de prueba: 25


### Normalizar y Cargado en Bloque

### Modelación

In [13]:
def normalize(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255
    return image, label

def prepare_subset(images, labels, batch_size):
    subset = tf.data.Dataset.from_tensor_slices((images, labels))
    batches = subset.cache()
    batches = batches.shuffle(len(images) // 4)
    batches = batches.batch(batch_size).map(normalize).prefetch(1)
    return batches

In [11]:
import mlflow
import mlflow.tensorflow

print(f"MLFlow Version: {mlflow.__version__}")

MLFlow Version: 3.3.2


In [14]:
# =======================================================================
# CELDA A: CONFIGURACIÓN FINAL Y SIMPLIFICADA
# =======================================================================
import mlflow
import mlflow.tensorflow
import tensorflow as tf
from tensorflow.keras.layers import Flatten, Dense
import os

print("--- Celda de Configuración Ejecutada ---")

def log_experiment_basic(experiment_name, run_name, params, model, history):
    """
    Función simplificada que registra parámetros, métricas y el modelo
    directamente desde la memoria, evitando las operaciones de archivo conflictivas.
    """
    mlflow.set_experiment(experiment_name)
    with mlflow.start_run(run_name=run_name) as run:
        # Registrar parámetros y métricas
        mlflow.log_params(params)
        mlflow.log_metric("final_accuracy", history.history['accuracy'][-1])
        mlflow.log_metric("final_loss", history.history['loss'][-1])

        # Registrar el modelo directamente.
        # Volveremos a ver la advertencia de la firma, pero la ejecución no fallará.
        mlflow.tensorflow.log_model(
            model=model,
            artifact_path="model",
            registered_model_name=experiment_name
        )
        print(f'--- Run "{run_name}" registrado con éxito. ---')
        return run.info.run_id

# Parámetros comunes
EPOCHS = 10
BATCH_SIZE = 4 # Usamos un batch size pequeño para la memoria
EXPERIMENT_NAME = "papila-db"
IMAGE_SHAPE = (484, 644, 3)

# Preparar lotes y datos de ejemplo
training_batches = prepare_subset(X_train, y_train, BATCH_SIZE)

print("--- Configuración lista. ---")

--- Celda de Configuración Ejecutada ---
--- Configuración lista. ---


In [15]:
# =======================================================================
# CELDA B: ENTRENAMIENTO del Modelo Denso
# =======================================================================
print("\n--- Entrenando Modelo 1: Red Densa ---")
tf.keras.backend.clear_session()

model_dense = tf.keras.models.Sequential([
    Flatten(input_shape=IMAGE_SHAPE),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(3, activation='softmax')
])
model_dense.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Parámetros para esta ejecución
params_dense = { "model_architecture": "Dense_NN", "epochs": EPOCHS, "batch_size": BATCH_SIZE }

# Iniciar el entrenamiento
history_dense = model_dense.fit(training_batches, epochs=EPOCHS, verbose=1)

print("\n--- Entrenamiento del Modelo Denso FINALIZADO. ---")


--- Entrenando Modelo 1: Red Densa ---



  super().__init__(**kwargs)


Epoch 1/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 1s/step - accuracy: 0.5388 - loss: 82.2340
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 1s/step - accuracy: 0.4840 - loss: 59.1169
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 1s/step - accuracy: 0.5160 - loss: 25.1589
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 1s/step - accuracy: 0.5799 - loss: 10.0365
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 1s/step - accuracy: 0.5479 - loss: 4.4379
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 1s/step - accuracy: 0.6119 - loss: 1.9524
Epoch 7/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 1s/step - accuracy: 0.5799 - loss: 2.6742
Epoch 8/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 1s/step - accuracy: 0.5388 - loss: 3.4239
Epoch 9/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [16]:
# =======================================================================
# CELDA C: REGISTRO del Modelo Denso
# =======================================================================
print("\n--- Registrando el Modelo Denso en MLFlow ---")

# Esta llamada usa las variables 'model_dense' y 'history_dense' de la celda anterior
log_experiment_basic(
    experiment_name=EXPERIMENT_NAME,
    run_name="dense_model_final_run",
    params=params_dense,
    model=model_dense,
    history=history_dense
)

print("\n--- Registro del Modelo Denso COMPLETADO. ---")


--- Registrando el Modelo Denso en MLFlow ---


Registered model 'papila-db' already exists. Creating a new version of this model...
Created version '4' of model 'papila-db'.


--- Run "dense_model_final_run" registrado con éxito. ---

--- Registro del Modelo Denso COMPLETADO. ---
