In [None]:
import os

# Configurar directorio de trabajo
target_dir = os.getcwd() if 'cnn-cards' in os.getcwd().lower() else './CNN-Cards'

if os.path.isdir(target_dir):
    os.chdir(target_dir)
print(f'Directorio actual: {os.getcwd()}')

DATA_PATH = './Datasets/Cards/'

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow.keras import regularizers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Global variables
SIZE = 224
CLASSES = 53
EPOCHS = 20
PATIENCE_ES = 25
BATCH_SIZE = 32

path_models = 'Models'
path_results = 'Results'

print(f'TensorFlow version: {tf.__version__}')
print(f'GPU disponible: {tf.config.list_physical_devices("GPU")}')

## Cargar datos con Data Augmentation

In [None]:
# Data augmentation para MobileNet
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=15,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.1,
    fill_mode='reflect',
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    brightness_range=(0.9, 1.1)
)

valid_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255)
test_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255)

train_path = DATA_PATH + 'train'
valid_path = DATA_PATH + 'valid'
test_path = DATA_PATH + 'test'

train_dataset = train_generator.flow_from_directory(
    train_path,
    target_size=(SIZE, SIZE),
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=True
)

valid_dataset = valid_generator.flow_from_directory(
    valid_path,
    target_size=(SIZE, SIZE),
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_dataset = test_generator.flow_from_directory(
    test_path,
    target_size=(SIZE, SIZE),
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=False
)

## Cargar modelo original y evaluar baseline

In [None]:
# Cargar modelo pre-entrenado
mn_pretrained = tf.keras.models.load_model('Models/MobileNet_2.h5')
print(f'Modelo cargado. Accuracy original en test:')
_, acc_original = mn_pretrained.evaluate(test_dataset, verbose=0)
print(f'Accuracy original: {acc_original:.4f}')

In [None]:
# Mostrar arquitectura original
mn_pretrained.summary()

## Reconstruir modelo con mejoras

In [None]:
# Reconstruir MobileNetV2 con mas capas descongeladas
base_mobilenet = tf.keras.applications.MobileNetV2(
    include_top=False, 
    weights='imagenet', 
    input_shape=(SIZE, SIZE, 3)
)

print(f'Total capas en MobileNetV2: {len(base_mobilenet.layers)}')

# Descongelar desde la capa 100 (mas capas entrenables que antes)
for layer in base_mobilenet.layers[:100]:
    layer.trainable = False
for layer in base_mobilenet.layers[100:]:
    layer.trainable = True

trainable_count = sum([1 for layer in base_mobilenet.layers if layer.trainable])
print(f'Capas entrenables: {trainable_count}')

In [None]:
# Arquitectura mejorada con BatchNormalization y regularizacion
inputs = tf.keras.layers.Input(shape=(SIZE, SIZE, 3))
x = base_mobilenet(inputs)
x = tf.keras.layers.GlobalAveragePooling2D()(x)  # Mejor que Flatten
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
x = tf.keras.layers.Dropout(0.4)(x)
x = tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
x = tf.keras.layers.Dropout(0.3)(x)
outputs = tf.keras.layers.Dense(CLASSES, activation='softmax')(x)

mn_improved = tf.keras.Model(inputs=inputs, outputs=outputs)
mn_improved.summary()

In [None]:
# Contar parametros
trainable_params = np.sum([np.prod(v.shape) for v in mn_improved.trainable_variables])
non_trainable_params = np.sum([np.prod(v.shape) for v in mn_improved.non_trainable_variables])
print(f'Parametros entrenables: {trainable_params:,}')
print(f'Parametros no entrenables: {non_trainable_params:,}')

## Entrenar con AdamW (weight decay)

In [None]:
name = 'MobileNet_3'

# Callbacks
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    os.path.join(path_models, name + '.h5'),
    monitor='val_accuracy',
    verbose=1,
    save_best_only=True,
    save_weights_only=False,
    mode='max'
)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    min_delta=0.001,
    patience=PATIENCE_ES,
    verbose=1,
    mode='max',
    restore_best_weights=True
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-7,
    verbose=1
)

tensorboard = tf.keras.callbacks.TensorBoard(
    log_dir=f'logs/{name}',
    histogram_freq=1
)

callbacks_list = [checkpoint, early_stop, reduce_lr, tensorboard]

In [None]:
# Compilar con AdamW (weight decay integrado)
optimizer = tf.keras.optimizers.AdamW(
    learning_rate=1e-4,
    weight_decay=1e-5
)

# Label smoothing para mejor generalizacion
loss_fn = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)

mn_improved.compile(
    optimizer=optimizer,
    loss=loss_fn,
    metrics=['accuracy']
)

In [None]:
# Entrenar
history_adamw = mn_improved.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=EPOCHS,
    callbacks=callbacks_list
)

## Visualizar resultados

In [None]:
def plot_and_save(h, dir, name):
    history_df = pd.DataFrame(h.history)
    history_df['epoch'] = list(range(len(history_df)))
    history_df.to_csv(os.path.join(dir, name + '.csv'), header=True, index=False)

    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Loss
    axes[0].plot(history_df['epoch'], history_df['loss'], label='Train Loss')
    axes[0].plot(history_df['epoch'], history_df['val_loss'], label='Val Loss')
    axes[0].set_title('Loss')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Accuracy
    axes[1].plot(history_df['epoch'], history_df['accuracy'], label='Train Accuracy')
    axes[1].plot(history_df['epoch'], history_df['val_accuracy'], label='Val Accuracy')
    axes[1].set_title('Accuracy')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(os.path.join(dir, name + '_curves.png'), dpi=150)
    plt.show()

plot_and_save(history_adamw, path_results, name)

## Experimento alternativo: SGD con momentum

In [None]:
# Reconstruir modelo para SGD
base_mobilenet_sgd = tf.keras.applications.MobileNetV2(
    include_top=False, 
    weights='imagenet', 
    input_shape=(SIZE, SIZE, 3)
)

for layer in base_mobilenet_sgd.layers[:100]:
    layer.trainable = False
for layer in base_mobilenet_sgd.layers[100:]:
    layer.trainable = True

inputs_sgd = tf.keras.layers.Input(shape=(SIZE, SIZE, 3))
x = base_mobilenet_sgd(inputs_sgd)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
x = tf.keras.layers.Dropout(0.4)(x)
x = tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
x = tf.keras.layers.Dropout(0.3)(x)
outputs_sgd = tf.keras.layers.Dense(CLASSES, activation='softmax')(x)

mn_sgd = tf.keras.Model(inputs=inputs_sgd, outputs=outputs_sgd)

In [None]:
name_sgd = 'MobileNet_3_SGD'

checkpoint_sgd = tf.keras.callbacks.ModelCheckpoint(
    os.path.join(path_models, name_sgd + '.h5'),
    monitor='val_accuracy',
    verbose=1,
    save_best_only=True,
    mode='max'
)

callbacks_sgd = [checkpoint_sgd, early_stop, reduce_lr]

# SGD con momentum y Nesterov
optimizer_sgd = tf.keras.optimizers.SGD(
    learning_rate=1e-3,
    momentum=0.9,
    nesterov=True
)

mn_sgd.compile(
    optimizer=optimizer_sgd,
    loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
    metrics=['accuracy']
)

In [None]:
# Entrenar con SGD (opcional - descomentar para probar)
# history_sgd = mn_sgd.fit(
#     train_dataset,
#     validation_data=valid_dataset,
#     epochs=EPOCHS,
#     callbacks=callbacks_sgd
# )
# plot_and_save(history_sgd, path_results, name_sgd)

## Evaluacion en Test Set

In [None]:
# Cargar mejor modelo
best_model = tf.keras.models.load_model(os.path.join(path_models, name + '.h5'))

# Evaluar
_, acc_improved = best_model.evaluate(test_dataset)

print(f'\n=== Comparacion de resultados ===')
print(f'Accuracy original (MobileNet_2): {acc_original:.4f}')
print(f'Accuracy mejorado (MobileNet_3): {acc_improved:.4f}')
print(f'Mejora: {(acc_improved - acc_original)*100:.2f}%')

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

def show_report(model, dataframe):
    labels = dataframe.class_indices
    true_labels = dataframe.labels
    pred_labels = model.predict(dataframe, verbose=0).argmax(axis=1)
    keys_array = np.array(list(labels.keys()))
    true_text = [keys_array[value] for value in true_labels]
    pred_text = [keys_array[value] for value in pred_labels]
    print(classification_report(true_text, pred_text))

def show_matrix(model, dataframe):
    labels = dataframe.class_indices
    true_labels = dataframe.labels
    pred_labels = model.predict(dataframe, verbose=0).argmax(axis=1)
    keys_array = np.array(list(labels.keys()))
    true_text = [keys_array[value] for value in true_labels]
    pred_text = [keys_array[value] for value in pred_labels]
    cf = confusion_matrix(true_text, pred_text, labels=keys_array)
    fig, ax = plt.subplots(figsize=(14, 14))
    sns.heatmap(cf, annot=False, square=True, cbar=True,
                cmap=plt.cm.Blues, xticklabels=keys_array, yticklabels=keys_array, ax=ax)
    ax.set_ylabel('Actual')
    ax.set_xlabel('Predicted')
    ax.set_title(f'Confusion Matrix - {name}')
    plt.xticks(rotation=90, fontsize=6)
    plt.yticks(fontsize=6)
    plt.tight_layout()
    plt.show()

show_report(best_model, test_dataset)

In [None]:
show_matrix(best_model, test_dataset)