In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR100
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [2]:
# Modelo LeNet5
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, padding=2)
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)

        # Fully connected layers
        self.fc1 = nn.Linear(in_features=480, out_features=84)
        self.fc2 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        conv1_out = torch.tanh(self.conv1(x))
        pool1_out = self.pool1(conv1_out)
        conv2_out = torch.tanh(self.conv2(pool1_out))
        pool2_out = self.pool2(conv2_out)
        conv3_out = torch.tanh(self.conv3(pool2_out))
        
        # Flatten
        x_flatten = conv3_out.view(conv3_out.size(0), -1)
        
        # Ajustar dinámicamente el tamaño de fc1 si es necesario
        if x_flatten.size(1) != self.fc1.in_features:
            self.fc1 = nn.Linear(x_flatten.size(1), 84)
        
        fc1_out = torch.tanh(self.fc1(x_flatten))
        output = torch.softmax(self.fc2(fc1_out), dim=1)

        return conv1_out, pool1_out, conv2_out, pool2_out, conv3_out, fc1_out, output

# Transformaciones
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor()
])

# Dataset y DataLoader
train_ds = CIFAR100(root='./data', train=True, download=True, transform=transform)
valid_ds = CIFAR100(root='./data', train=False, download=True, transform=transform)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=64, shuffle=False)

# Configuración del modelo, pérdida y optimizador
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LeNet5().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Entrenamiento
num_epochs = 10
train_loss_list = []
test_loss_list = []
train_accuracy_list = []
test_accuracy_list = []

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    correct_train = 0
    total_train = 0

    # Entrenamiento
    for inputs, labels in train_dl:
        inputs, labels = inputs.to(device), labels.to(device)

        # Asegurar que las etiquetas sean tensores válidos
        if labels.ndim == 2 and labels.shape[1] > 1:
            labels = torch.argmax(labels, dim=1)

        # Forward
        _, _, _, _, _, _, fc2_out = model(inputs)
        loss = criterion(fc2_out, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(fc2_out.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    # Guardar métricas de entrenamiento
    train_loss_list.append(train_loss / len(train_dl))
    train_accuracy_list.append(100 * correct_train / total_train)

    # Evaluación
    model.eval()
    test_loss = 0
    correct_test = 0
    total_test = 0

    with torch.no_grad():
        for inputs, labels in valid_dl:
            inputs, labels = inputs.to(device), labels.to(device)
            if labels.ndim == 2 and labels.shape[1] > 1:
                labels = torch.argmax(labels, dim=1)
                
            _, _, _, _, _, _, fc2_out = model(inputs)
            loss = criterion(fc2_out, labels)
            
            test_loss += loss.item()
            _, predicted = torch.max(fc2_out.data, 1)
            total_test += labels.size(0)
            correct_test += (predicted == labels).sum().item()

    # Guardar métricas de validación
    test_loss_list.append(test_loss / len(valid_dl))
    test_accuracy_list.append(100 * correct_test / total_test)

    # Imprimir resultados por época
    print(f'Epoch [{epoch+1}/{num_epochs}], '
          f'Train Loss: {train_loss_list[-1]:.4f}, '
          f'Train Accuracy: {train_accuracy_list[-1]:.2f}%, '
          f'Test Loss: {test_loss_list[-1]:.4f}, '
          f'Test Accuracy: {test_accuracy_list[-1]:.2f}%')

# Visualización de las métricas
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_loss_list, label='Train Loss')
plt.plot(test_loss_list, label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss vs Epochs')

plt.subplot(1, 2, 2)
plt.plot(train_accuracy_list, label='Train Accuracy')
plt.plot(test_accuracy_list, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.title('Accuracy vs Epochs')

plt.tight_layout()
plt.show()


Files already downloaded and verified
Files already downloaded and verified


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
