In [None]:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torch.optim.lr_scheduler import LinearLR
import kagglehub

#Directorio y parámetros iniciales
DATA_DIR = kagglehub.dataset_download("pankaj4321/fer-2013-facial-expression-dataset")
print("Usando dataset en:", DATA_DIR)
BATCH_SIZE = 64
IMG_SIZE = (48, 48)
EPOCHS = 20
LEARNING_RATE = 0.0004
SEED = 42

#En caso de tener GPU no usar CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(SEED) #Generación por semilla

#Transformación del formato de imágenes
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

#Cargar datos de entrenamiento, validación y prueba
train_dataset = datasets.ImageFolder(root=os.path.join(DATA_DIR, 'train'), transform=transform)
val_dataset = datasets.ImageFolder(root=os.path.join(DATA_DIR, 'val'), transform=transform)
test_dataset = datasets.ImageFolder(root=os.path.join(DATA_DIR, 'test'), transform=transform)

print("Clases detectadas en train_dataset:", train_dataset.classes)
print("Número de clases:", len(train_dataset.classes))


#Carga de datos por lotes
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

#Transfer learning con resnet18
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(train_dataset.classes))
model = model.to(device)

#Función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.999), eps=1e-08)
scheduler = LinearLR(optimizer, start_factor=0.1, total_iters=int(EPOCHS * 0.1)) #Reduce el learning rate por épocas

#Listas para almacenar las métricas
train_losses, val_losses, train_accuracies, val_accuracies = [], [], [], []

#Función evaluación del modelo
def evaluate(model, loader, criterion):
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    accuracy = 100 * correct / total
    loss = running_loss / len(loader)
    return loss, accuracy, all_labels, all_preds

#Entrenamiento
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    epoch_start_time = time.time() #Tiempo de ejecución por época

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    epoch_loss = running_loss / len(train_loader)
    val_loss, val_accuracy, _, _ = evaluate(model, val_loader, criterion)
    epoch_time = time.time() - epoch_start_time

    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)

    scheduler.step() #cambiar learning rate

    #Estadísticas de época
    print(f"Epoch [{epoch + 1}/{EPOCHS}], Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%, Time: {epoch_time:.2f}s")

#Evaluación de conjunto de prueba
test_loss, test_accuracy, test_labels, test_preds = evaluate(model, test_loader, criterion)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")


  from .autonotebook import tqdm as notebook_tqdm


Usando dataset en: C:\Users\laura\.cache\kagglehub\datasets\pankaj4321\fer-2013-facial-expression-dataset\versions\1
Clases detectadas en train_dataset: ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']
Número de clases: 7




Epoch [1/20], Train Loss: 1.5524, Train Accuracy: 41.14%, Val Loss: 1.3102, Val Accuracy: 50.71%, Time: 111.52s
Epoch [2/20], Train Loss: 1.2222, Train Accuracy: 53.92%, Val Loss: 1.1224, Val Accuracy: 58.40%, Time: 105.69s
Epoch [3/20], Train Loss: 1.0436, Train Accuracy: 60.96%, Val Loss: 1.0891, Val Accuracy: 59.35%, Time: 176.49s
Epoch [4/20], Train Loss: 0.8612, Train Accuracy: 67.95%, Val Loss: 1.0988, Val Accuracy: 60.24%, Time: 140.63s
Epoch [5/20], Train Loss: 0.7152, Train Accuracy: 73.54%, Val Loss: 1.1456, Val Accuracy: 60.99%, Time: 111.36s
Epoch [6/20], Train Loss: 0.5880, Train Accuracy: 78.49%, Val Loss: 1.2017, Val Accuracy: 61.80%, Time: 101.73s
Epoch [7/20], Train Loss: 0.4576, Train Accuracy: 83.57%, Val Loss: 1.3689, Val Accuracy: 58.54%, Time: 100.17s
Epoch [8/20], Train Loss: 0.3703, Train Accuracy: 86.71%, Val Loss: 1.4569, Val Accuracy: 62.22%, Time: 90.78s
Epoch [9/20], Train Loss: 0.2929, Train Accuracy: 89.64%, Val Loss: 1.5751, Val Accuracy: 60.35%, Time: 9

In [None]:
import os
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
import kagglehub

# ==========================================
# 1. CONFIGURACIÓN INICIAL
# ==========================================

# Configuración de Semilla para reproducibilidad
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)

# Descarga del dataset
DATA_DIR = kagglehub.dataset_download("pankaj4321/fer-2013-facial-expression-dataset")
print("Usando dataset en:", DATA_DIR)

# Parámetros
BATCH_SIZE = 64
IMG_SIZE = (48, 48)
EPOCHS = 20
LEARNING_RATE = 0.0004

# Detección de hardware (Keras lo hace automático, pero lo imprimimos)
print("Dispositivos disponibles:", tf.config.list_physical_devices())

# ==========================================
# 2. CARGA DE DATOS (Data Pipeline)
# ==========================================

# Función auxiliar para cargar datasets
def load_dataset(directory, subset_name):
    full_path = os.path.join(DATA_DIR, directory)
    if not os.path.exists(full_path):
        print(f"Advertencia: No se encontró {full_path}, buscando rutas alternativas...")
        # Ajuste por si la estructura de carpetas difiere
        full_path = DATA_DIR if subset_name != 'val' else os.path.join(DATA_DIR, 'test')

    return tf.keras.utils.image_dataset_from_directory(
        full_path,
        labels='inferred',
        label_mode='categorical', # Para CrossEntropy
        class_names=None,
        color_mode='rgb',         # Equivalente a transformar a 3 canales
        batch_size=BATCH_SIZE,
        image_size=IMG_SIZE,
        shuffle=(subset_name == 'train'),
        seed=SEED if subset_name == 'train' else None
    )

print("Cargando datos...")
train_ds = load_dataset('train', 'train')
val_ds  = load_dataset('val', 'val') # Nota: A veces FER2013 en Kaggle llama a val 'test'
test_ds = load_dataset('test', 'test')

class_names = train_ds.class_names
print("Clases detectadas:", class_names)
print("Número de clases:", len(class_names))

# Optimización de carga (Prefetching) para velocidad
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

# ==========================================
# 3. DEFINICIÓN DEL MODELO (Transfer Learning)
# ==========================================

def build_model(num_classes):
    inputs = tf.keras.Input(shape=(48, 48, 3))
    
    # 1. Normalización equivalente a PyTorch transforms.Normalize((0.5), (0.5))
    # Entrada [0, 255] -> Salida [-1, 1]
    x = layers.Rescaling(scale=1./127.5, offset=-1)(inputs)
    
    # 2. Base Pre-entrenada (ResNet50V2 es la alternativa estándar a ResNet18 en Keras)
    # include_top=False elimina la capa densa final de ImageNet
    base_model = tf.keras.applications.ResNet50V2(
        include_top=False, 
        weights='imagenet', 
        input_tensor=x
    )
    
    # En PyTorch model.train() entrena todo por defecto. Aquí hacemos lo mismo:
    base_model.trainable = True 
    
    # 3. Cabezal de clasificación (Top layers)
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x) # Convierte features 2D a vector 1D
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs, name="ResNet_Transfer")
    return model

model = build_model(len(class_names))

# ==========================================
# 4. COMPILACIÓN Y SCHEDULER
# ==========================================

# Optimizador equivalente
optimizer = optimizers.Adam(
    learning_rate=LEARNING_RATE, 
    beta_1=0.9, 
    beta_2=0.999, 
    epsilon=1e-08
)

# Función de pérdida
loss_fn = 'categorical_crossentropy'

model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

# Scheduler: Simulación de LinearLR (Warmup o Decay lineal)
# Tu código PyTorch usaba LinearLR con total_iters = 10% de las épocas.
# Aquí definimos un scheduler simple que reduce el LR linealmente o lo mantiene.
def lr_scheduler(epoch, lr):
    if epoch < int(EPOCHS * 0.1):
        return lr # Mantener o hacer warmup (aquí mantenemos para simplificar)
    return lr # O aplicar decay: lr * 0.95

scheduler_cb = callbacks.LearningRateScheduler(lr_scheduler)

# ==========================================
# 5. ENTRENAMIENTO
# ==========================================

start_time = time.time()

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=[scheduler_cb],
    verbose=1 
)

total_time = time.time() - start_time
print(f"Tiempo total de entrenamiento: {total_time:.2f}s")

# ==========================================
# 6. EVALUACIÓN
# ==========================================

print("\nEvaluando en conjunto de prueba...")
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc*100:.2f}%")

# Si necesitas las predicciones y etiquetas (como en tu función PyTorch)
# y_pred = model.predict(test_ds)
# y_pred_classes = np.argmax(y_pred, axis=1)
# y_true = np.concatenate([y for x, y in test_ds], axis=0)
# y_true_classes = np.argmax(y_true, axis=1)