# Práctica 4: Transfer Learning para MNIST

Uso de modelos preentrenados (tf.keras.applications) y comparación con una CNN baseline.

In [None]:
# Imports y configuración
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import VGG16, ResNet50, MobileNetV2
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_pre
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_pre
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_pre
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
import time
np.random.seed(42); tf.random.set_seed(42)
print(f'TF: {tf.__version__} | GPU: {tf.config.list_physical_devices("GPU")}')

In [None]:
# Carga y preprocesado (RGB + resize por backbone)
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')/255.0; x_test = x_test.astype('float32')/255.0
x_train_rgb = np.repeat(x_train[..., None], 3, axis=-1)
x_test_rgb  = np.repeat(x_test[..., None], 3, axis=-1)
y_train_cat = to_categorical(y_train, 10); y_test_cat = to_categorical(y_test, 10)
print(x_train_rgb.shape, y_train_cat.shape)

In [None]:
# Generadores de datasets por tamaño y preprocess
def make_dataset(x, size, pre_fn):
    x_res = tf.image.resize(x, size).numpy()
    return pre_fn(x_res)

x_vgg_resnet_train = make_dataset(x_train_rgb, (224, 224), vgg_pre)
x_vgg_resnet_test  = make_dataset(x_test_rgb,  (224, 224), vgg_pre)
x_mnv2_train = make_dataset(x_train_rgb, (96, 96), mobilenet_pre)
x_mnv2_test  = make_dataset(x_test_rgb,  (96, 96), mobilenet_pre)
x_cnn_train  = tf.image.resize(x_train_rgb, (32, 32)).numpy()
x_cnn_test   = tf.image.resize(x_test_rgb,  (32, 32)).numpy()
print('Datasets preparados')

In [None]:
# Modelos
def model_vgg16():
    base = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3)); base.trainable=False
    m = models.Sequential([base, layers.GlobalAveragePooling2D(), layers.Dense(256, activation='relu'), layers.Dropout(0.3), layers.Dense(10, activation='softmax')])
    m.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy']); return m

def model_resnet50():
    base = ResNet50(weights='imagenet', include_top=False, input_shape=(224,224,3)); base.trainable=False
    m = models.Sequential([base, layers.GlobalAveragePooling2D(), layers.Dense(256, activation='relu'), layers.BatchNormalization(), layers.Dropout(0.3), layers.Dense(10, activation='softmax')])
    m.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy']); return m

def model_mobilenetv2():
    base = MobileNetV2(weights='imagenet', include_top=False, input_shape=(96,96,3)); base.trainable=False
    m = models.Sequential([base, layers.GlobalAveragePooling2D(), layers.Dense(128, activation='relu'), layers.Dropout(0.2), layers.Dense(10, activation='softmax')])
    m.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy']); return m

def model_cnn_baseline():
    m = models.Sequential([layers.Conv2D(32,(3,3),activation='relu',input_shape=(32,32,3),padding='same'), layers.MaxPooling2D(), layers.Conv2D(64,(3,3),activation='relu',padding='same'), layers.MaxPooling2D(), layers.Conv2D(128,(3,3),activation='relu',padding='same'), layers.Flatten(), layers.Dense(256,activation='relu'), layers.Dropout(0.5), layers.Dense(10,activation='softmax')])
    m.compile(optimizer=Adam(1e-3), loss='categorical_crossentropy', metrics=['accuracy']); return m

print('Modelos definidos')

In [None]:
# Entrenamiento
EPOCHS, BATCH_SIZE, VAL_SPLIT = 8, 128, 0.1
models_cfg = [
    ('VGG16_Transfer', model_vgg16, x_vgg_resnet_train, x_vgg_resnet_test),
    ('ResNet50_Transfer', model_resnet50, x_vgg_resnet_train, x_vgg_resnet_test),
    ('MobileNetV2_Transfer', model_mobilenetv2, x_mnv2_train, x_mnv2_test),
    ('CNN_Baseline', model_cnn_baseline, x_cnn_train, x_cnn_test)
]

results, histories = {}, {}
for name, fn, Xtr, Xte in models_cfg:
    print(f'\nEntrenando {name}...')
    m = fn()
    h = m.fit(Xtr, y_train_cat, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VAL_SPLIT, verbose=1)
    tl, ta = m.evaluate(Xte, y_test_cat, verbose=0)
    results[name] = {'model': m, 'test_acc': ta, 'test_loss': tl, 'params': m.count_params()}
    histories[name] = h.history
    print(f'  Test Acc: {ta:.4f} | Loss: {tl:.4f} | Params: {m.count_params():,}')

In [None]:
# Tabla y visualización básica
df = pd.DataFrame([{
    'Modelo': k, 'Test Acc': f"{v['test_acc']:.4f}", 'Test Loss': f"{v['test_loss']:.4f}", 'Parámetros': f"{v['params']:,}"
} for k,v in results.items()]).sort_values('Test Acc', ascending=False)
print(df.to_string(index=False))

plt.figure(figsize=(10,5))
for name, h in histories.items():
    plt.plot(h['val_accuracy'], label=name)
plt.title('Val Accuracy'); plt.xlabel('Época'); plt.ylabel('Accuracy'); plt.legend(); plt.show()