In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import clone_model
import pandas as pd
from PIL import Image

2024-08-19 00:27:40.245641: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Resized images
train_dir = '../data/train' 
valid_dir = '../data/valid'
test_dir = '../data/test'

In [3]:
# Generador de datos con normalización y aumentación solo para el entrenamiento, esto ayuda a generalizar mejor.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

valid_test_datagen = ImageDataGenerator(rescale=1./255)

# Crear los generadores
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

test_generator = valid_test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

Found 10682 images belonging to 2 classes.
Found 3561 images belonging to 2 classes.


In [4]:
# Seleccionar y cargar el modelo preentrenado
from tensorflow.keras.applications import DenseNet121

# Cambiar este valor para probar diferentes modelos
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [7]:
# Crear listas para almacenar las imágenes y etiquetas
X, y = [], []

# Iterar sobre el generador de entrenamiento y extraer las imágenes y etiquetas
for _ in range(len(train_generator)):
    X_batch, y_batch = next(train_generator)
    X.append(X_batch)
    y.append(y_batch)

# Convertir las listas a arrays numpy
X = np.vstack(X)
y = np.vstack(y)

In [9]:
# Definir el número de splits para la validación cruzada
n_splits = 5

# Crear los objetos de validación cruzada
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Guardar las métricas para cada fold
accuracy_scores = []

for train_index, val_index in skf.split(X, np.argmax(y, axis=1)):
    X_train_fold, X_val_fold = X[train_index], X[val_index]
    y_train_fold, y_val_fold = y[train_index], y[val_index]

    # Clonar el modelo original
    model = clone_model(base_model)
    model.trainable = False
    
    # Añadir capas superiores personalizadas
    x = model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(2, activation='softmax')(x)  # Suponiendo 2 clases: Melanoma y NotMelanoma
    
    model = models.Model(inputs=model.input, outputs=outputs)
    
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Definir callbacks
    checkpoint = ModelCheckpoint(f'best_model_fold_{len(accuracy_scores)+1}.keras', monitor='val_loss', save_best_only=True)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=1e-7)

Epoch 1/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1352s[0m 5s/step - accuracy: 0.5044 - loss: 1.2191 - val_accuracy: 0.5512 - val_loss: 0.9009 - learning_rate: 1.0000e-04
Epoch 2/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1432s[0m 5s/step - accuracy: 0.5698 - loss: 0.8519 - val_accuracy: 0.6430 - val_loss: 0.7551 - learning_rate: 1.0000e-04
Epoch 3/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1307s[0m 5s/step - accuracy: 0.6193 - loss: 0.7410 - val_accuracy: 0.6439 - val_loss: 0.7111 - learning_rate: 1.0000e-04
Epoch 4/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1184s[0m 4s/step - accuracy: 0.6351 - loss: 0.7069 - val_accuracy: 0.6378 - val_loss: 0.6943 - learning_rate: 1.0000e-04
Epoch 5/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1155s[0m 4s/step - accuracy: 0.6378 - loss: 0.6920 - val_accuracy: 0.6397 - val_loss: 0.6840 - learning_rate: 1.0000e-04
Epoch 6/10
[1m268/268[0m [32m━━━━━━━━

In [None]:
# Entrenar el modelo en el fold actual
history = model.fit(X_train_fold, y_train_fold,
                     validation_data=(X_val_fold, y_val_fold),
                    epochs=10,  # Ajusta según sea necesario
                    callbacks=[checkpoint, early_stopping, reduce_lr])
    
# Evaluar el modelo en el fold de validación
val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold)
accuracy_scores.append(val_accuracy)

In [None]:
# Calcular y mostrar la precisión promedio en los folds
print(f"Accuracy promedio en validación cruzada: {np.mean(accuracy_scores)}")

In [None]:
# Evaluar el modelo en el conjunto de pruebas
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

In [None]:
# Calcular el número de steps exactos para cubrir todas las muestras
steps = int(np.ceil(test_generator.samples / test_generator.batch_size))

# Calcular las predicciones con el número de steps correcto
predictions = model.predict(test_generator, steps=steps, verbose=1)

# Asegurarme de que no falten imágenes al final del proceso
predicted_classes = np.argmax(predictions, axis=1)

In [None]:
# Imprimir el tamaño de la salida de predicciones
print(f"Predictions shape: {predictions.shape}")
print(f"Predicted classes length: {len(predicted_classes)}")
true_classes = test_generator.classes
print(f"True classes length: {len(true_classes)}")

# Comparar las longitudes de true_classes y predicted_classes
if len(true_classes) == len(predicted_classes):
    report = classification_report(true_classes, predicted_classes, target_names=['Melanoma', 'NotMelanoma'])
    print(report)
else:
    print("Las longitudes de true_classes y predicted_classes no coinciden. No se puede generar el reporte.")

In [None]:
# Mostrar la matriz de confusión
conf_matrix = confusion_matrix(true_classes, predicted_classes)
print("Confusion Matrix")
print(conf_matrix)