
# IFCD104 – Tema 3: Overfitting y Regularización (Dropout, L2, Data Augmentation)

**Objetivo:** mostrar **sobreajuste** con una CNN pequeña en MNIST y cómo mitigarlo con **Dropout**, **L2** y **aumento de datos**.


In [1]:

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers


## 1) Datos y subconjunto para inducir overfitting

In [2]:

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Usamos un subconjunto pequeño para que el modelo sobreajuste
n_small = 5000
x_train_small = x_train[:n_small].astype("float32")/255.0
y_train_small = y_train[:n_small]
x_val = x_train[n_small:n_small+5000].astype("float32")/255.0
y_val = y_train[n_small:n_small+5000]

x_test = x_test.astype("float32")/255.0

x_train_small = np.expand_dims(x_train_small, -1)
x_val = np.expand_dims(x_val, -1)
x_test = np.expand_dims(x_test, -1)

print("train_small:", x_train_small.shape, " val:", x_val.shape, " test:", x_test.shape)


train_small: (5000, 28, 28, 1)  val: (5000, 28, 28, 1)  test: (10000, 28, 28, 1)


## 2) Modelo base (tendrá tendencia a sobreajustar)

In [6]:

def make_base():
    inputs = keras.Input(shape=(28,28,1))
    x = layers.Conv2D(32, 3, activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(10, activation='softmax')(x)
    model = keras.Model(inputs, outputs, name="cnn_base")
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

base = make_base()
h_base = base.fit(x_train_small, y_train_small, validation_data=(x_val, y_val),
                  epochs=10, batch_size=128, verbose=0)

print("Base -> val_acc final:", h_base.history['val_accuracy'][-1])


Base -> val_acc final: 0.9603999853134155


## 3) Regularización: Dropout + L2 + BatchNorm

In [7]:

def make_regularized():
    inputs = keras.Input(shape=(28,28,1))
    x = layers.Conv2D(32, 3, activation='relu', padding='same',
                      kernel_regularizer=regularizers.l2(1e-4))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Conv2D(64, 3, activation='relu', padding='same',
                      kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(10, activation='softmax')(x)
    model = keras.Model(inputs, outputs, name="cnn_regularized")
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

reg = make_regularized()
h_reg = reg.fit(x_train_small, y_train_small, validation_data=(x_val, y_val),
                epochs=10, batch_size=128, verbose=0)

print("Regularized -> val_acc final:", h_reg.history['val_accuracy'][-1])


Regularized -> val_acc final: 0.8019999861717224


## 4) Aumento de datos con capas de preprocesado

In [None]:

data_augmentation = keras.Sequential([
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomTranslation(0.1, 0.1)
])

def make_augmented():
    inputs = keras.Input(shape=(28,28,1))
    x = data_augmentation(inputs)
    x = layers.Conv2D(32, 3, activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(10, activation='softmax')(x)
    model = keras.Model(inputs, outputs, name="cnn_augmented")
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

aug = make_augmented()
h_aug = aug.fit(x_train_small, y_train_small, validation_data=(x_val, y_val),
                epochs=10, batch_size=128, verbose=0)

print("Augmented -> val_acc final:", h_aug.history['val_accuracy'][-1])


## 5) Curvas de validación: comparación

In [None]:

plt.figure()
plt.plot(h_base.history['val_accuracy'], label='Base')
plt.plot(h_reg.history['val_accuracy'], label='Regularized')
plt.plot(h_aug.history['val_accuracy'], label='Augmented')
plt.xlabel("Época")
plt.ylabel("Accuracy validación")
plt.title("Comparación de validación")
plt.legend()
plt.show()


## 6) Evaluación en test

In [None]:

for name, model in {'Base': base, 'Regularized': reg, 'Augmented': aug}.items():
    loss, acc = model.evaluate(x_test, y_test, verbose=0)
    print(f"{name:11s} -> test_acc={acc:.4f} | test_loss={loss:.4f}")
