In [1]:
import torch
import torchvision
import math
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
def make_layers(cfg):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)

            layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(3* 32 * 32, 480)
        self.fc2 = nn.Linear(480, 240)
        self.fc3 = nn.Linear(240, 120)
        self.fc4 = nn.Linear(120, 10)

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x


class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


class Vgg(nn.Module):
    def __init__(self, cfg):
        super().__init__()

        self.features = make_layers(cfg)
        self.classifier = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Linear(512, 10),
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                m.bias.data.zero_()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [3]:
def LoadCIFAR10(batch_size, use_data_augmentation, donwload=True):
    if use_data_augmentation:
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        train_transform = transforms.Compose([
                            transforms.RandomHorizontalFlip(),
                            transforms.RandomCrop(32, 4),
                            transforms.ToTensor(),
                            normalize,
                        ])
        test_transform = transforms.Compose([
                            transforms.ToTensor(),
                            normalize,
                        ])
    else:
        train_transform = transforms.Compose( 
            [transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        test_transform = train_transform


    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=donwload, transform=train_transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=donwload, transform=test_transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    return trainloader, testloader


def GetPrecision(net, dataloader, device, criterion):
    correct = 0
    total = 0
    loss = 0.0
    with torch.no_grad():
        for data in dataloader:
            # images, labels = data
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            loss += criterion(outputs, labels) 
    return loss, 100 * correct / total

In [4]:
# Hyper-Parámetros 
batch_size = 64
weight_decay = 0.0001   # Factor para la regularización L2
learning_rate = 0.0001
use_data_augmentation = True

In [14]:
# Acá hay que descomentar la red que se quiera usar
# net = MLP()
# net = SimpleCNN()
net = Vgg([64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']) # VGG-11
# net = Vgg([64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']) # VGG-16

In [15]:
# Cargamos el set de entrenamiento y test
trainloader, testloader = LoadCIFAR10(batch_size, use_data_augmentation, donwload=False)

In [20]:
device = torch.device('mps')
print(device)
net.to(device)

mps


Vgg(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [21]:
# Elegimos la función de pérdida y el optimizador.
# ADAM incluye internamente un regularizador L2 y su factor es el 'weight_decay'
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [25]:
# Comenzamos el loop de entrenamiento
for epoch in range(10): 

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        
        # Torch nos fuerza a indicarle que queremos borrar los gradientes guardados
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    current_epoch = epoch + 1

    if (current_epoch % 1) == 0:
        # Mostramos estadísticas del aprendizaje (incluyendo rendimiento en train y test)

        train_loss, train_performance = GetPrecision(net, trainloader, device, criterion)
        test_loss, test_performance = GetPrecision(net, testloader, device, criterion)
        
        # Nomalizo las funciones de pérdida para que sean comparables entre train y test
        # Ocurre que el set de train es 5 veces más grande que el set de test... por eso las multiplico por 5
        train_loss *= batch_size
        test_loss *= batch_size * 5

        print(f'Ep. {current_epoch}. Train loss: {train_loss :.3f}. Test loss: {test_loss :.3f}. Train acc: {train_performance :.2f}. Test acc: {test_performance :.2f}')


print('Finished Training')

Ep. 1. Train loss: 27470.992. Test loss: 32133.564. Train acc: 80.75. Test acc: 78.33
Ep. 2. Train loss: 24386.395. Test loss: 29366.170. Train acc: 83.04. Test acc: 79.86
Ep. 3. Train loss: 23755.852. Test loss: 29054.002. Train acc: 83.35. Test acc: 80.37
Ep. 4. Train loss: 21992.410. Test loss: 27676.531. Train acc: 84.81. Test acc: 81.48
Ep. 5. Train loss: 21184.656. Test loss: 28039.496. Train acc: 85.27. Test acc: 80.56
Ep. 6. Train loss: 18032.859. Test loss: 25399.078. Train acc: 87.65. Test acc: 82.74
Ep. 7. Train loss: 17733.602. Test loss: 26362.535. Train acc: 87.59. Test acc: 82.58
Ep. 8. Train loss: 17074.381. Test loss: 26845.926. Train acc: 88.17. Test acc: 82.45
Ep. 9. Train loss: 15911.893. Test loss: 25726.545. Train acc: 89.11. Test acc: 83.04
Ep. 10. Train loss: 14752.659. Test loss: 25511.240. Train acc: 89.81. Test acc: 84.09
Finished Training
