In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import random_split
from datetime import datetime
import numpy as np

torch.manual_seed(123)
torch.set_default_dtype(torch.double)

In [2]:
def load_cifar(train_val_split=0.9, data_path='../data/', preprocessor=None):
    
    # Define preprocessor if not already given
    if preprocessor is None:
        preprocessor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4915, 0.4823, 0.4468),
                                (0.2470, 0.2435, 0.2616))
        ])
    
    # load datasets
    data_train_val = datasets.CIFAR10(
        data_path,       
        train=True,      
        download=True,
        transform=preprocessor)

    data_test = datasets.CIFAR10(
        data_path, 
        train=False,
        download=True,
        transform=preprocessor)

    # train/validation split
    n_train = int(len(data_train_val)*train_val_split)
    n_val =  len(data_train_val) - n_train

    # Add seed so that we get the same dataloaders
    data_train, data_val = random_split(
        data_train_val, 
        [n_train, n_val],
        generator=torch.Generator().manual_seed(123)
    )
    
    # Now define a lighter version of CIFAR10: cifar
    label_map = {0: 0, 2: 1}

    # For each dataset, keep only airplanes and birds
    cifar2_train = [(img, label_map[label]) for img, label in data_train if label in [0, 2]]
    cifar2_val = [(img, label_map[label]) for img, label in data_val if label in [0, 2]]
    cifar2_test = [(img, label_map[label]) for img, label in data_test if label in [0, 2]]

    print('Size of the training dataset: ', len(cifar2_train))
    print('Size of the validation dataset: ', len(cifar2_val))
    print('Size of the test dataset: ', len(cifar2_test))
    
    return (cifar2_train, cifar2_val, cifar2_test)

data_train, data_val, data_test = load_cifar()

def train(n_epochs, optimizer, model, loss_fn, train_loader):
    
    n_batch = len(train_loader)
    losses_train = []
    model.train()
    optimizer.zero_grad(set_to_none=True)
    
    for epoch in range(1, n_epochs + 1):
        
        loss_train = 0.0
        for imgs, labels in train_loader:

            imgs = imgs.to(device=device, dtype=torch.double) 
            labels = labels.to(device=device)

            outputs = model(imgs)
            
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            loss_train += loss.item()
            
        losses_train.append(loss_train / n_batch)

        if epoch == 1 or epoch % 5 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.5f}'.format(
                datetime.now().time(), epoch, loss_train / n_batch))
    return losses_train

def compute_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in loader:
            imgs = imgs.to(device=device, dtype=torch.double) 
            labels = labels.to(device=device)

            outputs = model(imgs)
            _, predicted = torch.max(outputs, dim=1)
            total += labels.shape[0]
            correct += int((predicted == labels).sum())

    acc =  correct / total
    print("Accuracy: {:.2f}".format(acc))
    return acc

Files already downloaded and verified
Files already downloaded and verified
Size of the training dataset:  9017
Size of the validation dataset:  983
Size of the test dataset:  2000


In [3]:
class MyNet(nn.Module):
    def __init__(self):
        super().__init__()
        # No need to declare activation functions nor maxpool layers anymore
        self.fc1 = nn.Linear(32*32*3, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 2)
        
    def forward(self, x):
        # Activation functions now come from the functional API 
        out = torch.flatten(x, 1)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = F.relu(self.fc3(out))
        # Note that we don't need a softmax function in the output layer if we
        # use nn.CrossEntropyLoss as the loss function
        out = self.fc4(out)
        return out

In [4]:
def train_manual_update(n_epochs, model, loss_fn, train_loader, lr=1e-2, momentum_coeff=0., weight_decay=0.):
    
    model.train()
    n_batch = len(train_loader)
    losses_train = []
    
    # To store the previous gradients
    dict_momentum = {}
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            
            imgs = imgs.to(device=device, dtype=torch.double) 
            labels = labels.to(device=device)

            outputs = model(imgs)
            
            loss = loss_fn(outputs, labels)
            loss.backward()

            with torch.no_grad():
                for name, p in model.named_parameters():
                    if p.grad is not None:
                        
                            grad = p.grad

                            # L2 regularization
                            if weight_decay:
                                grad = grad + weight_decay * p.data

                            # Momentum version
                            if momentum_coeff:
                                # If previous gradients available
                                # then grad = grad + previous_grad * momentum_coeff
                                if name in dict_momentum:
                                    grad = grad + dict_momentum[name] * momentum_coeff
                                dict_momentum[name] = grad
                                
                            # Weight update formula here
                            p.data = p.data - lr*grad

                            # Still need to zero out the gradient 
                            p.grad = torch.zeros_like(p.grad)                     

            loss_train += loss.item()

        losses_train.append(loss_train / n_batch)
        
        if epoch == 1 or epoch % 5 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.5f}'.format(
                datetime.now().time(), epoch,
                loss_train / n_batch))
    return losses_train

In [5]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Training on device {device}.")

n_epochs = 30
batch_size = 256
seed = 265

train_loader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=False)
val_loader = torch.utils.data.DataLoader(data_val, batch_size=batch_size, shuffle=False)

loss_fn = nn.CrossEntropyLoss()

list_lr = [0.01]*6
list_momentum = [0, 0, 0.9, 0.9, 0.9, 0.8]
list_decay = [0, 0.01, 0, 0.01, 0.001, 0.01]

params = [{
        "lr" : list_lr[i],
        'mom' : list_momentum[i],
        'decay' : list_decay[i],
    } for i in range(len(list_lr))]

print("\n   Global parameters:")
print("batch_size = ", batch_size)
print("n_epoch = ", n_epochs)
print("loss_fn = ", nn.CrossEntropyLoss())
print("seed = ", seed)

accuracies = []
models = []

for i in range(len(list_lr)):
    
    print("\n ========================================================= ")
    
    print("   Current parameters: ")
    print("".join(['%s = %s\n' % (key, value) for (key, value) in params[i].items()]))
    
    print(" --------- Using Pytorch's SGD --------- ")
    
    torch.manual_seed(seed)
    model = MyNet()
    model.to(device=device)

    optimizer = optim.SGD(
        model.parameters(), 
        lr= params[i]["lr"], 
        momentum = params[i]["mom"], 
        weight_decay = params[i]["decay"]
    )

    loss_train = train(
        n_epochs = n_epochs,
        optimizer = optimizer, 
        model = model,
        loss_fn = loss_fn,
        train_loader = train_loader,
    )
    
    print("\n --- Accuracies --- ")
    print("Training")
    compute_accuracy(model, train_loader)
    print("Validation")
    acc = compute_accuracy(model, val_loader)

    print("\n --------- Using manual update ---------- ")
    torch.manual_seed(seed)
    model = MyNet()
    model.to(device=device) 

    loss_train = train_manual_update(
        n_epochs = n_epochs,
        model = model,
        loss_fn = loss_fn,
        train_loader = train_loader,
        lr = params[i]["lr"],
        momentum_coeff = params[i]["mom"], 
        weight_decay = params[i]["decay"],
    )

    print("\n --- Accuracies --- ")
    print("Training")
    compute_accuracy(model, train_loader)
    print("Validation")
    acc = compute_accuracy(model, val_loader)
    
    # For model selection
    accuracies.append(acc)
    models.append(model)

  return torch._C._cuda_getDeviceCount() > 0


Training on device cpu.

   Global parameters:
batch_size =  256
n_epoch =  30
loss_fn =  CrossEntropyLoss()
seed =  265

   Current parameters: 
lr = 0.01
mom = 0
decay = 0

 --------- Using Pytorch's SGD --------- 
15:27:35.605270  |  Epoch 1  |  Training loss 0.68093
15:27:40.688064  |  Epoch 5  |  Training loss 0.54943
15:27:47.054778  |  Epoch 10  |  Training loss 0.46524
15:27:53.257568  |  Epoch 15  |  Training loss 0.42144
15:27:59.615822  |  Epoch 20  |  Training loss 0.38399
15:28:04.971721  |  Epoch 25  |  Training loss 0.35094
15:28:10.766444  |  Epoch 30  |  Training loss 0.31793

 --- Accuracies --- 
Training
Accuracy: 0.88
Validation
Accuracy: 0.83

 --------- Using manual update ---------- 
15:28:13.014504  |  Epoch 1  |  Training loss 0.68093
15:28:19.681324  |  Epoch 5  |  Training loss 0.54943
15:28:26.238622  |  Epoch 10  |  Training loss 0.46524
15:28:32.575133  |  Epoch 15  |  Training loss 0.42144
15:28:39.409641  |  Epoch 20  |  Training loss 0.38399
15:28:47.86

In [6]:
i_best_model = np.argmax(accuracies)
best_model = models[i_best_model]
params_best_model = params[i_best_model]
print(
    "\nThe best model was trained with",
    "".join(['\n    %s = %s' % (key, value) for (key, value) in params[i_best_model].items()]))

print("Training accuracy of the best model: ")
compute_accuracy(best_model, train_loader)
print("Validation accuracy of the best model: ")
compute_accuracy(best_model, val_loader)


The best model was trained with 
    lr = 0.01
    mom = 0.9
    decay = 0.01
Training accuracy of the best model: 
Accuracy: 0.96
Validation accuracy of the best model: 
Accuracy: 0.84


0.8392675483214649

In [7]:
test_loader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, shuffle=False)

print("Test accuracy of the best model: ")
compute_accuracy(best_model, test_loader)

Test accuracy of the best model: 
Accuracy: 0.84


0.8435