# Laboratorio 9

andre marroquin 22266
sergio orellana 221122
nelson garcia 22434
joaquin puente 22296

## Arquitectura AlexNet para CIFAR-10

### Imports y configuración inicial

In [None]:
# imports principales
import os
import math
import time
from typing import Dict, Tuple

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

# asegurar reproducibilidad básica
def seed_everything(seed: int = 42) -> None:
    import random
    import numpy as np
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

seed_everything(42)

# elegir dispositivo
def get_device() -> torch.device:
    if torch.cuda.is_available():
        return torch.device("cuda")
    if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
        return torch.device("mps")
    return torch.device("cpu")

device = get_device()
device

device(type='cpu')

### Funciones de entrenamiento y evaluación

In [None]:
# calcular accuracy
def accuracy_from_logits(logits: torch.Tensor, targets: torch.Tensor) -> float:
    preds = logits.argmax(dim=1)
    correct = (preds == targets).sum().item()
    total = targets.numel()
    return correct / total

# construir matriz de confusión
def confusion_matrix(num_classes: int, preds: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
    cm = torch.zeros((num_classes, num_classes), dtype=torch.long)
    for t, p in zip(targets.view(-1), preds.view(-1)):
        cm[t.long(), p.long()] += 1
    return cm

# calcular macro-f1 desde matriz de confusión
def macro_f1_from_confusion(cm: torch.Tensor) -> float:
    cm = cm.to(torch.float32)
    tp = torch.diag(cm)
    fp = cm.sum(dim=0) - tp
    fn = cm.sum(dim=1) - tp

    precision = tp / torch.clamp(tp + fp, min=1.0)
    recall = tp / torch.clamp(tp + fn, min=1.0)
    f1 = 2.0 * precision * recall / torch.clamp(precision + recall, min=1e-12)
    return f1.mean().item()

# ciclo de entrenamiento por época
def train_one_epoch(model: nn.Module,
                    loader: DataLoader,
                    criterion: nn.Module,
                    optimizer: optim.Optimizer,
                    device: torch.device,
                    log_every: int = 100) -> Dict[str, float]:
    model.train()
    running_loss = 0.0
    running_acc = 0.0
    count = 0

    t0 = time.time()
    for step, (x, y) in enumerate(loader, 1):
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad(set_to_none=True)
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        batch_acc = accuracy_from_logits(logits, y)
        running_loss += loss.item()
        running_acc += batch_acc
        count += 1

        if step % log_every == 0:
            print(f"step {step:04d} | loss {running_loss / count:.4f} | acc {running_acc / count:.4f}")

    dt = time.time() - t0
    return {
        "train_loss": running_loss / max(count, 1),
        "train_acc":  running_acc / max(count, 1),
        "train_time_s": dt
    }

# evaluación completa
@torch.no_grad()
def evaluate(model: nn.Module,
             loader: DataLoader,
             criterion: nn.Module,
             device: torch.device,
             num_classes: int) -> Dict[str, float]:
    model.eval()
    total_loss = 0.0
    total_acc = 0.0
    count = 0
    cm = torch.zeros((num_classes, num_classes), dtype=torch.long)

    for x, y in loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        loss = criterion(logits, y)

        total_loss += loss.item()
        total_acc += accuracy_from_logits(logits, y)
        count += 1

        preds = logits.argmax(dim=1)
        cm += confusion_matrix(num_classes, preds.cpu(), y.cpu())

    macro_f1 = macro_f1_from_confusion(cm)
    return {
        "val_loss": total_loss / max(count, 1),
        "val_acc":  total_acc / max(count, 1),
        "val_macro_f1": macro_f1
    }

# bucle de entrenamiento de varias épocas con mejor modelo por accuracy
def fit(model: nn.Module,
        train_loader: DataLoader,
        val_loader: DataLoader,
        criterion: nn.Module,
        optimizer: optim.Optimizer,
        scheduler,
        device: torch.device,
        num_classes: int,
        epochs: int = 5,
        name: str = "model") -> Tuple[nn.Module, Dict[str, float]]:
    best_state = None
    best_acc = -1.0
    history = {}

    for epoch in range(1, epochs + 1):
        print(f"\n==> {name} | epoch {epoch}/{epochs}")
        train_stats = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_stats = evaluate(model, val_loader, criterion, device, num_classes)

        if scheduler is not None:
            scheduler.step()

        print(f"train  | loss {train_stats['train_loss']:.4f} acc {train_stats['train_acc']:.4f} time {train_stats['train_time_s']:.1f}s")
        print(f"valid  | loss {val_stats['val_loss']:.4f} acc {val_stats['val_acc']:.4f} macro_f1 {val_stats['val_macro_f1']:.4f}")

        # actualizar mejor
        if val_stats["val_acc"] > best_acc:
            best_acc = val_stats["val_acc"]
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}

        history[epoch] = {**train_stats, **val_stats}

    # cargar mejor estado antes de devolver
    if best_state is not None:
        model.load_state_dict(best_state)

    print(f"\n>> best val acc: {best_acc:.4f}")
    return model, history

### Preparación de datos CIFAR-10

In [None]:
# transforms para cifar-10
# media y desviación estándar de cifar-10
cifar10_mean = (0.4914, 0.4822, 0.4465)
cifar10_std = (0.2470, 0.2435, 0.2616)

# data augmentation para entrenamiento
train_tf_cifar10 = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(cifar10_mean, cifar10_std),
])

# transforms para validación/test
test_tf_cifar10 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(cifar10_mean, cifar10_std),
])

# cargar datasets cifar-10
data_root = "./data"

train_cifar10 = datasets.CIFAR10(root=data_root, train=True, download=True, transform=train_tf_cifar10)
test_cifar10 = datasets.CIFAR10(root=data_root, train=False, download=True, transform=test_tf_cifar10)

batch_size_cifar10 = 128
num_workers = min(4, os.cpu_count() or 0)

train_loader_cifar10 = DataLoader(train_cifar10, batch_size=batch_size_cifar10, shuffle=True, num_workers=num_workers, pin_memory=True)
val_loader_cifar10 = DataLoader(test_cifar10, batch_size=batch_size_cifar10, shuffle=False, num_workers=num_workers, pin_memory=True)

print(f"Train samples: {len(train_cifar10)}, Test samples: {len(test_cifar10)}")
print(f"Classes: {train_cifar10.classes}")

Train samples: 50000, Test samples: 10000
Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


### Implementación de AlexNet

In [None]:
# implementación de AlexNet adaptada para CIFAR-10 (32x32)
# la arquitectura original fue diseñada para ImageNet (224x224)
# esta versión adapta las dimensiones para imágenes pequeñas

class AlexNet(nn.Module):
    def __init__(self, num_classes: int = 10):
        super().__init__()

        # capa convolucional 1: entrada 3x32x32 -> salida 64x16x16
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # capa convolucional 2: entrada 64x16x16 -> salida 192x8x8
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # capa convolucional 3: entrada 192x8x8 -> salida 384x8x8
        self.conv3 = nn.Conv2d(in_channels=192, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU(inplace=True)

        # capa convolucional 4: entrada 384x8x8 -> salida 256x8x8
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.relu4 = nn.ReLU(inplace=True)

        # capa convolucional 5: entrada 256x8x8 -> salida 256x4x4
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.relu5 = nn.ReLU(inplace=True)
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)

        # adaptive pooling para garantizar tamaño fijo
        self.avgpool = nn.AdaptiveAvgPool2d((4, 4))

        # clasificador (fully connected layers)
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(256 * 4 * 4, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # feature extraction
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.relu3(self.conv3(x))
        x = self.relu4(self.conv4(x))
        x = self.pool5(self.relu5(self.conv5(x)))

        x = self.avgpool(x)

        # flatten
        x = x.view(x.size(0), -1)

        # classification
        x = self.classifier(x)
        return x

# instanciar alexnet
alexnet = AlexNet(num_classes=10).to(device)
alexnet

AlexNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU(inplace=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU(inplace=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu3): ReLU(inplace=True)
  (conv4): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu4): ReLU(inplace=True)
  (conv5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu5): ReLU(inplace=True)
  (pool5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (avgpool): AdaptiveAvgPool2d(output_size=(4, 4))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=4096, out_features=2048, bias=True)
    (2): R

### Configuración del optimizador y scheduler

In [None]:
# configuración de entrenamiento para alexnet
criterion_cifar10 = nn.CrossEntropyLoss()
optimizer_cifar10 = optim.SGD(alexnet.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
scheduler_cifar10 = optim.lr_scheduler.StepLR(optimizer_cifar10, step_size=10, gamma=0.5)

print("Configuración de entrenamiento:")
print(f"Criterio: CrossEntropyLoss")
print(f"Optimizador: SGD (lr=0.01, momentum=0.9, weight_decay=5e-4)")
print(f"Scheduler: StepLR (step_size=10, gamma=0.5)")

Configuración de entrenamiento:
Criterio: CrossEntropyLoss
Optimizador: SGD (lr=0.01, momentum=0.9, weight_decay=5e-4)
Scheduler: StepLR (step_size=10, gamma=0.5)


### Entrenamiento de AlexNet en CIFAR-10

In [None]:
# entrenar alexnet en cifar-10
epochs_cifar10 = 20

alexnet, history_cifar10 = fit(
    model=alexnet,
    train_loader=train_loader_cifar10,
    val_loader=val_loader_cifar10,
    criterion=criterion_cifar10,
    optimizer=optimizer_cifar10,
    scheduler=scheduler_cifar10,
    device=device,
    num_classes=10,
    epochs=epochs_cifar10,
    name="alexnet-cifar10"
)


==> alexnet-cifar10 | epoch 1/20




step 0100 | loss 2.3024 | acc 0.1035
step 0200 | loss 2.3013 | acc 0.1082
step 0300 | loss 2.2707 | acc 0.1350
train  | loss 2.2060 acc 0.1606 time 270.7s
valid  | loss 1.8994 acc 0.2955 macro_f1 0.2446

==> alexnet-cifar10 | epoch 2/20
step 0100 | loss 1.8874 | acc 0.2709
step 0200 | loss 1.8454 | acc 0.2919
step 0300 | loss 1.8092 | acc 0.3096
train  | loss 1.7721 acc 0.3248 time 294.3s
valid  | loss 1.5812 acc 0.4044 macro_f1 0.3747

==> alexnet-cifar10 | epoch 3/20
step 0100 | loss 1.5969 | acc 0.3925
step 0200 | loss 1.5710 | acc 0.4049
step 0300 | loss 1.5461 | acc 0.4173
train  | loss 1.5246 acc 0.4266 time 281.8s
valid  | loss 1.3690 acc 0.4812 macro_f1 0.4483

==> alexnet-cifar10 | epoch 4/20
step 0100 | loss 1.4078 | acc 0.4814
step 0200 | loss 1.3772 | acc 0.4886
step 0300 | loss 1.3583 | acc 0.4976
train  | loss 1.3427 acc 0.5048 time 238.2s
valid  | loss 1.1928 acc 0.5558 macro_f1 0.5450

==> alexnet-cifar10 | epoch 5/20
step 0100 | loss 1.2291 | acc 0.5494
step 0200 | los

### Evaluación final del modelo

In [None]:
# evaluación final en test set
final_stats_cifar10 = evaluate(alexnet, val_loader_cifar10, criterion_cifar10, device, num_classes=10)

print("\n== Resultados finales AlexNet en CIFAR-10 ==")
print(f"Test Loss: {final_stats_cifar10['val_loss']:.4f}")
print(f"Test Accuracy: {final_stats_cifar10['val_acc']:.4f}")
print(f"Test Macro-F1: {final_stats_cifar10['val_macro_f1']:.4f}")

final_stats_cifar10


== Resultados finales AlexNet en CIFAR-10 ==
Test Loss: 0.4930
Test Accuracy: 0.8287
Test Macro-F1: 0.8306


{'val_loss': 0.49304004202160656,
 'val_acc': 0.8287183544303798,
 'val_macro_f1': 0.8305532336235046}

## Métrica de desempeño:

Las métricas utilizadas para evaluar el rendimiento del modelo son la precisión (accuracy), la matriz de confusión y el Macro-F1 Score. La precisión mide la proporción de predicciones correctas sobre el total de ejemplos, siendo una métrica básica pero importante para evaluar el rendimiento global del modelo. La matriz de confusión permite analizar el desempeño del modelo en términos de verdaderos positivos, falsos positivos, verdaderos negativos y falsos negativos, proporcionando una visión detallada de cómo el modelo clasifica cada clase. Finalmente, el Macro-F1 Score es crucial en este contexto porque toma en cuenta el balance entre precisión y recall para cada clase y luego calcula un promedio, lo que lo hace especialmente útil cuando se desea una evaluación equilibrada de todas las clases, incluso en conjuntos de datos con un número equilibrado de clases, como es el caso de CIFAR-10. Estas métricas, en conjunto, permiten una evaluación más completa del modelo, ayudando a identificar tanto la exactitud general como el rendimiento por clase.