# Práctica 2: Estudio de Hiperparámetros y Clasificación Par/Impar

**Objetivos:**
1. Estudiar comportamiento de hiperparámetros de entrenamiento
2. Analizar impacto de: neuronas ocultas, épocas, función de pérdida, batch size, learning rate, % validación
3. Modificar modelo para clasificación par/impar
4. Encontrar configuración óptima

In [None]:
# Configuración inicial
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
import time

tf.random.set_seed(42)
np.random.seed(42)
plt.style.use('seaborn-v0_8')

print(f"TensorFlow: {tf.__version__}")
print(f"GPU disponible: {len(tf.config.list_physical_devices('GPU')) > 0}")

In [None]:
# Carga de datos MNIST
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalización
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Flatten para MLP
x_train_flat = x_train.reshape(-1, 28*28)
x_test_flat = x_test.reshape(-1, 28*28)

# One-hot encoding
y_train_cat = tf.keras.utils.to_categorical(y_train, 10)
y_test_cat = tf.keras.utils.to_categorical(y_test, 10)

print(f"Datos preparados: {x_train_flat.shape}")

In [None]:
# Función para crear modelos MLP
def create_mlp_model(hidden_neurons=128, learning_rate=0.001, loss='categorical_crossentropy', output_classes=10):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_neurons, activation='relu', input_shape=(784,)),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(output_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss=loss,
        metrics=['accuracy']
    )
    
    return model

print("Función de creación de modelos definida")

In [None]:
# ESTUDIO 1: Número de neuronas ocultas
print("=== ESTUDIO 1: NEURONAS OCULTAS ===")

hidden_neurons_values = [32, 64, 128, 256, 512]
neuron_results = {}

for neurons in hidden_neurons_values:
    print(f"Probando {neurons} neuronas...")
    
    model = create_mlp_model(hidden_neurons=neurons)
    
    start_time = time.time()
    history = model.fit(
        x_train_flat, y_train_cat,
        epochs=10,
        batch_size=128,
        validation_split=0.1,
        verbose=0
    )
    training_time = time.time() - start_time
    
    test_loss, test_acc = model.evaluate(x_test_flat, y_test_cat, verbose=0)
    
    neuron_results[neurons] = {
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'training_time': training_time,
        'params': model.count_params()
    }
    
    print(f"  Accuracy: {test_acc:.4f}, Params: {model.count_params():,}")

print("Estudio de neuronas completado")

In [None]:
# ESTUDIO 2: Número de épocas
print("\n=== ESTUDIO 2: NÚMERO DE ÉPOCAS ===")

epochs_values = [5, 10, 15, 20, 30]
epochs_results = {}

for epochs in epochs_values:
    print(f"Probando {epochs} épocas...")
    
    model = create_mlp_model()
    
    start_time = time.time()
    history = model.fit(
        x_train_flat, y_train_cat,
        epochs=epochs,
        batch_size=128,
        validation_split=0.1,
        verbose=0
    )
    training_time = time.time() - start_time
    
    test_loss, test_acc = model.evaluate(x_test_flat, y_test_cat, verbose=0)
    
    epochs_results[epochs] = {
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'training_time': training_time
    }
    
    print(f"  Accuracy: {test_acc:.4f}, Tiempo: {training_time:.2f}s")

print("Estudio de épocas completado")

In [None]:
# ESTUDIO 3: Batch size
print("\n=== ESTUDIO 3: BATCH SIZE ===")

batch_sizes = [32, 64, 128, 256, 512]
batch_results = {}

for batch_size in batch_sizes:
    print(f"Probando batch size {batch_size}...")
    
    model = create_mlp_model()
    
    start_time = time.time()
    history = model.fit(
        x_train_flat, y_train_cat,
        epochs=10,
        batch_size=batch_size,
        validation_split=0.1,
        verbose=0
    )
    training_time = time.time() - start_time
    
    test_loss, test_acc = model.evaluate(x_test_flat, y_test_cat, verbose=0)
    
    batch_results[batch_size] = {
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'training_time': training_time
    }
    
    print(f"  Accuracy: {test_acc:.4f}, Tiempo: {training_time:.2f}s")

print("Estudio de batch size completado")

In [None]:
# ESTUDIO 4: Learning rate
print("\n=== ESTUDIO 4: LEARNING RATE ===")

learning_rates = [0.0001, 0.001, 0.01, 0.1]
lr_results = {}

for lr in learning_rates:
    print(f"Probando learning rate {lr}...")
    
    model = create_mlp_model(learning_rate=lr)
    
    start_time = time.time()
    history = model.fit(
        x_train_flat, y_train_cat,
        epochs=10,
        batch_size=128,
        validation_split=0.1,
        verbose=0
    )
    training_time = time.time() - start_time
    
    test_loss, test_acc = model.evaluate(x_test_flat, y_test_cat, verbose=0)
    
    lr_results[lr] = {
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'training_time': training_time
    }
    
    print(f"  Accuracy: {test_acc:.4f}")

print("Estudio de learning rate completado")

In [None]:
# ESTUDIO 5: Porcentaje de validación
print("\n=== ESTUDIO 5: % VALIDACIÓN ===")

validation_splits = [0.05, 0.1, 0.15, 0.2, 0.3]
val_results = {}

for val_split in validation_splits:
    print(f"Probando validación {val_split*100:.0f}%...")
    
    model = create_mlp_model()
    
    history = model.fit(
        x_train_flat, y_train_cat,
        epochs=10,
        batch_size=128,
        validation_split=val_split,
        verbose=0
    )
    
    test_loss, test_acc = model.evaluate(x_test_flat, y_test_cat, verbose=0)
    
    val_results[val_split] = {
        'test_accuracy': test_acc,
        'val_accuracy': history.history['val_accuracy'][-1]
    }
    
    print(f"  Test Acc: {test_acc:.4f}, Val Acc: {history.history['val_accuracy'][-1]:.4f}")

print("Estudio de % validación completado")

In [None]:
# ESTUDIO 6: Funciones de pérdida
print("\n=== ESTUDIO 6: FUNCIONES DE PÉRDIDA ===")

loss_functions = {
    'categorical_crossentropy': 'categorical_crossentropy',
    'sparse_categorical_crossentropy': 'sparse_categorical_crossentropy'
}

loss_results = {}

for loss_name, loss_func in loss_functions.items():
    print(f"Probando {loss_name}...")
    
    model = create_mlp_model(loss=loss_func)
    
    # Usar labels apropiadas
    if loss_func == 'sparse_categorical_crossentropy':
        y_train_use = y_train
        y_test_use = y_test
    else:
        y_train_use = y_train_cat
        y_test_use = y_test_cat
    
    history = model.fit(
        x_train_flat, y_train_use,
        epochs=10,
        batch_size=128,
        validation_split=0.1,
        verbose=0
    )
    
    test_loss, test_acc = model.evaluate(x_test_flat, y_test_use, verbose=0)
    
    loss_results[loss_name] = {
        'test_accuracy': test_acc,
        'test_loss': test_loss
    }
    
    print(f"  Accuracy: {test_acc:.4f}")

print("Estudio de funciones de pérdida completado")

In [None]:
# Visualización de estudios
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Estudio de Hiperparámetros - MNIST', fontsize=16)

# 1. Neuronas
ax = axes[0, 0]
neurons = list(neuron_results.keys())
accs = [neuron_results[n]['test_accuracy'] for n in neurons]
ax.plot(neurons, accs, 'bo-', linewidth=2)
ax.set_xlabel('Neuronas Ocultas')
ax.set_ylabel('Test Accuracy')
ax.set_title('Neuronas vs Accuracy')
ax.grid(True, alpha=0.3)

# 2. Épocas
ax = axes[0, 1]
epochs = list(epochs_results.keys())
accs = [epochs_results[e]['test_accuracy'] for e in epochs]
ax.plot(epochs, accs, 'ro-', linewidth=2)
ax.set_xlabel('Épocas')
ax.set_ylabel('Test Accuracy')
ax.set_title('Épocas vs Accuracy')
ax.grid(True, alpha=0.3)

# 3. Batch size
ax = axes[0, 2]
batches = list(batch_results.keys())
accs = [batch_results[b]['test_accuracy'] for b in batches]
ax.plot(batches, accs, 'go-', linewidth=2)
ax.set_xlabel('Batch Size')
ax.set_ylabel('Test Accuracy')
ax.set_title('Batch Size vs Accuracy')
ax.set_xscale('log', base=2)
ax.grid(True, alpha=0.3)

# 4. Learning rate
ax = axes[1, 0]
lrs = list(lr_results.keys())
accs = [lr_results[lr]['test_accuracy'] for lr in lrs]
ax.semilogx(lrs, accs, 'mo-', linewidth=2)
ax.set_xlabel('Learning Rate')
ax.set_ylabel('Test Accuracy')
ax.set_title('Learning Rate vs Accuracy')
ax.grid(True, alpha=0.3)

# 5. Validación
ax = axes[1, 1]
vals = [v*100 for v in val_results.keys()]
test_accs = [val_results[v]['test_accuracy'] for v in val_results.keys()]
val_accs = [val_results[v]['val_accuracy'] for v in val_results.keys()]
ax.plot(vals, test_accs, 'co-', linewidth=2, label='Test')
ax.plot(vals, val_accs, 'yo-', linewidth=2, label='Validación')
ax.set_xlabel('% Validación')
ax.set_ylabel('Accuracy')
ax.set_title('% Validación vs Accuracy')
ax.legend()
ax.grid(True, alpha=0.3)

# 6. Loss functions
ax = axes[1, 2]
loss_names = list(loss_results.keys())
accs = [loss_results[l]['test_accuracy'] for l in loss_names]
bars = ax.bar(range(len(loss_names)), accs, color=['skyblue', 'lightcoral'], alpha=0.8)
ax.set_xlabel('Función de Pérdida')
ax.set_ylabel('Test Accuracy')
ax.set_title('Loss Function vs Accuracy')
ax.set_xticks(range(len(loss_names)))
ax.set_xticklabels(['Categorical', 'Sparse'], rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# CLASIFICACIÓN PAR/IMPAR
print("\n=== CLASIFICACIÓN PAR/IMPAR ===")

# Crear labels par/impar
y_train_par_impar = (y_train % 2).astype('int32')  # 0=par, 1=impar
y_test_par_impar = (y_test % 2).astype('int32')
y_train_par_impar_cat = tf.keras.utils.to_categorical(y_train_par_impar, 2)
y_test_par_impar_cat = tf.keras.utils.to_categorical(y_test_par_impar, 2)

print(f"Labels transformadas: 0=par, 1=impar")
print(f"Distribución: Pares={np.sum(y_train_par_impar==0)}, Impares={np.sum(y_train_par_impar==1)}")

# Modelo para clasificación par/impar
def create_par_impar_model(hidden_neurons=128, learning_rate=0.001, dropout_rate=0.3):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_neurons, activation='relu', input_shape=(784,)),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(dropout_rate/2),
        tf.keras.layers.Dense(2, activation='softmax')  # 2 clases
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

print("Modelo par/impar definido")

In [None]:
# Entrenar múltiples configuraciones para par/impar
par_impar_configs = [
    {'neurons': 256, 'lr': 0.001, 'dropout': 0.2, 'epochs': 15},
    {'neurons': 512, 'lr': 0.0005, 'dropout': 0.3, 'epochs': 20},
    {'neurons': 128, 'lr': 0.002, 'dropout': 0.25, 'epochs': 12},
    {'neurons': 384, 'lr': 0.0008, 'dropout': 0.35, 'epochs': 18}
]

par_impar_results = {}

print("Entrenando configuraciones para par/impar:")

for i, config in enumerate(par_impar_configs):
    config_name = f"ParImpar_{i+1}"
    print(f"\n{config_name}: {config}")
    
    model = create_par_impar_model(
        hidden_neurons=config['neurons'],
        learning_rate=config['lr'],
        dropout_rate=config['dropout']
    )
    
    # Callbacks
    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
    ]
    
    start_time = time.time()
    history = model.fit(
        x_train_flat, y_train_par_impar_cat,
        epochs=config['epochs'],
        batch_size=128,
        validation_split=0.15,
        callbacks=callbacks,
        verbose=0
    )
    training_time = time.time() - start_time
    
    test_loss, test_acc = model.evaluate(x_test_flat, y_test_par_impar_cat, verbose=0)
    
    par_impar_results[config_name] = {
        'config': config,
        'model': model,
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'training_time': training_time,
        'history': history.history
    }
    
    print(f"  Accuracy: {test_acc*100:.3f}%, Tiempo: {training_time:.2f}s")

# Mejor modelo par/impar
best_config_name = max(par_impar_results.keys(), key=lambda x: par_impar_results[x]['test_accuracy'])
best_par_impar_model = par_impar_results[best_config_name]['model']
best_par_impar_acc = par_impar_results[best_config_name]['test_accuracy']

print(f"\nMejor modelo par/impar: {best_config_name}")
print(f"Accuracy: {best_par_impar_acc*100:.3f}%")

In [None]:
# Análisis detallado par/impar
y_pred_par_impar = best_par_impar_model.predict(x_test_flat, verbose=0)
y_pred_par_impar_classes = np.argmax(y_pred_par_impar, axis=1)

# Matriz de confusión
cm_par_impar = confusion_matrix(y_test_par_impar, y_pred_par_impar_classes)

plt.figure(figsize=(8, 6))
sns.heatmap(cm_par_impar, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['PAR', 'IMPAR'], yticklabels=['PAR', 'IMPAR'])
plt.title(f'Matriz de Confusión - Par/Impar\nAccuracy: {best_par_impar_acc*100:.2f}%')
plt.ylabel('Clase Real')
plt.xlabel('Predicción')
plt.show()

# Reporte de clasificación
print("\nReporte de clasificación par/impar:")
print(classification_report(y_test_par_impar, y_pred_par_impar_classes, 
                           target_names=['PAR', 'IMPAR']))

# Análisis por dígito
print("\nAnálisis por dígito original:")
for digit in range(10):
    mask = (y_test == digit)
    digit_preds = y_pred_par_impar_classes[mask]
    digit_true = y_test_par_impar[mask]
    accuracy = (digit_preds == digit_true).mean()
    expected = 'PAR' if digit % 2 == 0 else 'IMPAR'
    print(f"  Dígito {digit} ({expected}): {accuracy*100:.2f}% accuracy")

In [None]:
# Tabla comparativa final
print("\nTABLA COMPARATIVA FINAL:")
print("-" * 70)

# Mejor configuración para cada hiperparámetro
best_neurons = max(neuron_results.keys(), key=lambda x: neuron_results[x]['test_accuracy'])
best_epochs = max(epochs_results.keys(), key=lambda x: epochs_results[x]['test_accuracy'])
best_batch = max(batch_results.keys(), key=lambda x: batch_results[x]['test_accuracy'])
best_lr = max(lr_results.keys(), key=lambda x: lr_results[x]['test_accuracy'])
best_val = max(val_results.keys(), key=lambda x: val_results[x]['test_accuracy'])

# Crear modelo final con mejores hiperparámetros
model_final_10 = create_mlp_model(
    hidden_neurons=best_neurons,
    learning_rate=best_lr
)

history_final = model_final_10.fit(
    x_train_flat, y_train_cat,
    epochs=best_epochs,
    batch_size=best_batch,
    validation_split=best_val,
    verbose=0
)

test_loss_final_10, test_acc_final_10 = model_final_10.evaluate(x_test_flat, y_test_cat, verbose=0)

print(f"Modelo optimizado 10-dígitos: {test_acc_final_10*100:.3f}%")
print(f"Modelo optimizado par/impar: {best_par_impar_acc*100:.3f}%")

improvement = (best_par_impar_acc - test_acc_final_10) * 100
print(f"\nDiferencia: {improvement:+.2f} puntos porcentuales")