# Práctica 1: Comparativa de Optimizadores en MNIST

Comparación de diferentes algoritmos de optimización en clasificación de dígitos MNIST.

**Optimizadores a comparar:**
- SGD con momentum
- Adam
- RMSprop
- AdamW
- Nadam
- Adagrad

In [None]:
# Configuración inicial
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import pandas as pd

tf.random.set_seed(42)
np.random.seed(42)
plt.style.use('seaborn-v0_8')

print("Configuración completada")

In [None]:
# Carga y preparación de datos
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalización
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Reshape para CNN
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Split validación
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, test_size=0.1, stratify=y_train, random_state=42
)

print(f"Train: {x_train.shape[0]} muestras")
print(f"Validación: {x_val.shape[0]} muestras")
print(f"Test: {x_test.shape[0]} muestras")

In [None]:
# Modelo CNN
def create_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(2, 2),
        
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(2, 2),
        
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    return model

# Configuración de optimizadores
optimizers_config = {
    'SGD': tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True),
    'Adam': tf.keras.optimizers.Adam(learning_rate=0.001),
    'RMSprop': tf.keras.optimizers.RMSprop(learning_rate=0.001),
    'AdamW': tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=1e-4),
    'Nadam': tf.keras.optimizers.Nadam(learning_rate=0.002),
    'Adagrad': tf.keras.optimizers.Adagrad(learning_rate=0.01)
}

EPOCHS = 10
BATCH_SIZE = 128

print(f"Modelo CNN definido con {len(optimizers_config)} optimizadores")
print(f"Épocas: {EPOCHS}, Batch size: {BATCH_SIZE}")

In [None]:
# Entrenamiento de todos los optimizadores
results = {}
training_histories = {}

print("Iniciando entrenamiento comparativo...")

for opt_name, optimizer in optimizers_config.items():
    print(f"\nEntrenando con {opt_name}...")
    
    # Crear modelo fresco
    model = create_model()
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Early stopping
    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy', patience=3, restore_best_weights=True
    )
    
    # Entrenar
    start_time = time.time()
    history = model.fit(
        x_train, y_train,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=(x_val, y_val),
        callbacks=[early_stop],
        verbose=0
    )
    training_time = time.time() - start_time
    
    # Evaluación
    train_loss, train_acc = model.evaluate(x_train, y_train, verbose=0)
    val_loss, val_acc = model.evaluate(x_val, y_val, verbose=0)
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    
    # Predicciones
    y_pred_test = np.argmax(model.predict(x_test, verbose=0), axis=1)
    cm_test = confusion_matrix(y_test, y_pred_test)
    
    # Guardar resultados
    results[opt_name] = {
        'model': model,
        'training_time': training_time,
        'train_acc': train_acc,
        'val_acc': val_acc,
        'test_acc': test_acc,
        'train_loss': train_loss,
        'val_loss': val_loss,
        'test_loss': test_loss,
        'epochs_trained': len(history.history['loss']),
        'cm_test': cm_test
    }
    
    training_histories[opt_name] = history.history
    
    print(f"  Tiempo: {training_time:.2f}s | Test Acc: {test_acc:.4f}")

print("\nEntrenamiento completado")

In [None]:
# Tabla comparativa
comparison_data = []
for opt_name in optimizers_config.keys():
    r = results[opt_name]
    comparison_data.append({
        'Optimizador': opt_name,
        'Test Acc': f"{r['test_acc']:.4f}",
        'Test Loss': f"{r['test_loss']:.4f}",
        'Tiempo (s)': f"{r['training_time']:.2f}",
        'Épocas': r['epochs_trained']
    })

df_comparison = pd.DataFrame(comparison_data)
df_comparison = df_comparison.sort_values('Test Acc', ascending=False)

print("Tabla comparativa:")
print(df_comparison.to_string(index=False))

best_optimizer = df_comparison.iloc[0]['Optimizador']
print(f"\nMejor optimizador: {best_optimizer}")

In [None]:
# Visualización de resultados
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Accuracy comparison
ax = axes[0, 0]
opt_names = list(optimizers_config.keys())
test_accs = [results[name]['test_acc'] for name in opt_names]
bars = ax.bar(opt_names, test_accs, alpha=0.7)
ax.set_title('Test Accuracy por Optimizador')
ax.set_ylabel('Accuracy')
ax.set_xticklabels(opt_names, rotation=45)
for bar, acc in zip(bars, test_accs):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
            f'{acc:.3f}', ha='center', va='bottom')

# Training time comparison
ax = axes[0, 1]
times = [results[name]['training_time'] for name in opt_names]
bars = ax.bar(opt_names, times, color='coral', alpha=0.7)
ax.set_title('Tiempo de Entrenamiento')
ax.set_ylabel('Tiempo (s)')
ax.set_xticklabels(opt_names, rotation=45)

# Learning curves (top 3)
ax = axes[1, 0]
top_3 = df_comparison['Optimizador'].head(3).values
for opt_name in top_3:
    history = training_histories[opt_name]
    ax.plot(history['val_accuracy'], label=f'{opt_name}', linewidth=2)
ax.set_title('Curvas de Validación (Top 3)')
ax.set_xlabel('Época')
ax.set_ylabel('Accuracy')
ax.legend()
ax.grid(True, alpha=0.3)

# Loss curves (top 3)
ax = axes[1, 1]
for opt_name in top_3:
    history = training_histories[opt_name]
    ax.plot(history['val_loss'], label=f'{opt_name}', linewidth=2)
ax.set_title('Curvas de Loss (Top 3)')
ax.set_xlabel('Época')
ax.set_ylabel('Loss')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Matriz de confusión del mejor modelo
best_model = results[best_optimizer]['model']
cm = results[best_optimizer]['cm_test']

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=range(10), yticklabels=range(10))
plt.title(f'Matriz de Confusión - {best_optimizer}')
plt.xlabel('Predicción')
plt.ylabel('Etiqueta Real')
plt.show()

# Reporte de clasificación
y_pred_best = np.argmax(best_model.predict(x_test, verbose=0), axis=1)
print(f"\nReporte de clasificación - {best_optimizer}:")
print(classification_report(y_test, y_pred_best, digits=4))

In [None]:
# Ejemplos de predicciones
sample_indices = np.random.choice(len(x_test), 12, replace=False)
sample_images = x_test[sample_indices]
sample_labels = y_test[sample_indices]
sample_predictions = best_model.predict(sample_images, verbose=0)
sample_pred_labels = np.argmax(sample_predictions, axis=1)

fig, axes = plt.subplots(3, 4, figsize=(12, 9))
fig.suptitle(f'Ejemplos de Predicciones - {best_optimizer}', fontsize=14, fontweight='bold')

for i in range(12):
    row, col = i // 4, i % 4
    
    axes[row, col].imshow(sample_images[i].squeeze(), cmap='gray')
    axes[row, col].axis('off')
    
    color = 'green' if sample_pred_labels[i] == sample_labels[i] else 'red'
    confidence = sample_predictions[i][sample_pred_labels[i]] * 100
    
    title = f"Pred: {sample_pred_labels[i]}\nReal: {sample_labels[i]}\n{confidence:.1f}%"
    axes[row, col].set_title(title, color=color, fontweight='bold')

plt.tight_layout()
plt.show()