In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import random_split
from datetime import datetime
from collections import Counter


torch.manual_seed(265)
torch.set_default_dtype(torch.double)

device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))


# Task 3.1.1

In [67]:
def load_cifar(train_val_split=0.9, data_path='../data/', preprocessor=None):
    
    # Define preprocessor if not already given
    if preprocessor is None:
        preprocessor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4915, 0.4823, 0.4468),
                                (0.2470, 0.2435, 0.2616))
        ])
    
    # load datasets
    data_train_val = datasets.CIFAR10(
        data_path,       
        train=True,      
        download=True,
        transform=preprocessor)

    data_test = datasets.CIFAR10(
        data_path, 
        train=False,
        download=True,
        transform=preprocessor)

    # train/validation split
    n_train = int(len(data_train_val)*train_val_split)
    n_val =  len(data_train_val) - n_train

    data_train, data_val = random_split(
        data_train_val, 
        [n_train, n_val],
        generator=torch.Generator().manual_seed(123)
    )
    
    return (data_train, data_val, data_test)

# Loading entire dataset 
cifar10_train, cifar10_val, cifar10_test = load_cifar()

# Defining the two labels: plane, bird:
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']

# For each dataset, keep only plane and birds
cifar2_train = [(img, label_map[label]) for img, label in cifar10_train if label in [0, 2]]
cifar2_val = [(img, label_map[label]) for img, label in cifar10_val if label in [0, 2]]
cifar2_test = [(img, label_map[label]) for img, label in cifar10_test if label in [0, 2]]




Files already downloaded and verified
Files already downloaded and verified


In [68]:
Counter([label for _, label in cifar2_train])

Counter({1: 4504, 0: 4513})

# Task 3.1.2

In [69]:
class MyMLP(nn.Module): 

    def __init__(self, in_dim, out_dim) -> None:
        super().__init__()
        self.dimension = in_dim
        self.fc1 = nn.Linear(in_dim, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 32)
        self.af = nn.ReLU()

    def forward(self, x): 
        out = torch.flatten(x, 1)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = F.relu(self.fc3(out))
        return out


# Task 3.1.3

In [70]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
    
    n_batch = len(train_loader)
    losses_train = []
    model.train()
    optimizer.zero_grad(set_to_none=True)
    
    for epoch in range(1, n_epochs + 1):
        
        loss_train = 0.0
        for imgs, labels in train_loader:

            imgs = imgs.to(device=device, dtype=torch.double) 
            labels = labels.to(device=device)

            outputs = model(imgs)
            
            loss = loss_fn(outputs, labels)
            loss.backward()
            
            optimizer.step()
            optimizer.zero_grad()

            loss_train += loss.item()
            
        losses_train.append(loss_train / n_batch)

        if epoch == 1 or epoch % 5 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.3f}'.format(
                datetime.now().time(), epoch, loss_train / n_batch))
    return losses_train


# Task 3.1.4, 3.1.6, 3.1.7 

In [71]:
def train_manual_update(n_epochs, lr, model, loss_fn, train_loader, weight_decay = 0, momentum = 0):
    
    n_batch = len(train_loader)
    losses_train = []
    model.train()
    
    t = 0
    b = []

    for epoch in range(1, n_epochs + 1):
        
        loss_train = 0.0
        for imgs, labels in train_loader:

            imgs = imgs.to(device=device, dtype=torch.double) 
            labels = labels.to(device=device)

            outputs = model(imgs)
            
            loss = loss_fn(outputs, labels)
            loss.backward()
 
            index = 0

            for p in model.parameters(): 

                g = p.grad 

                # L2 regularization 0 < weight_decay < 1, 
                # 0 if not  L2 regularization
                g += weight_decay*p.data

                # Momentum          0 < momentum < 1, 
                # 0 if not momentum

                if t == 0: 
                    b.append(g)
                else: 
                    b[index] = momentum*b[index] + g

                g = b[index]
                p.data = p.data - lr*g             #Formel 3 
                p.grad = torch.zeros_like(p.grad)
                index += 1
                
            t += 1

            
            
            loss_train += loss.item()
            
        losses_train.append(loss_train / n_batch)

        if epoch == 1 or epoch % 5 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.3f}'.format(
                datetime.now().time(), epoch, loss_train / n_batch))
    return losses_train


# Task 3.1.5

In [72]:
# cifar2_train 
# cifar2_val 
# cifar2_test 

train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=64, shuffle=False) 

# Instantiate the loss function (here we use cross entropy)
loss_fn = nn.CrossEntropyLoss()

torch.manual_seed(123)
model_train = MyMLP(3072, 2)
optimizer = optim.SGD(model_train.parameters(), lr=1e-2, weight_decay=0.5, momentum=0.5)


# Now all we have to do is calling the training loop
# WARNING THIS MIGHT BE EXTREMELY SLOW. STOP YOUR KERNEL TO STOP THE TRAINING
train(
    n_epochs = 21,
    optimizer = optimizer,
    model = model_train,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
print(' ')

torch.manual_seed(123)
model_train_manual_update = MyMLP(3072, 2)

train_manual_update(
    n_epochs = 21,
    lr = 1e-2,
    model = model_train_manual_update,
    loss_fn = loss_fn,
    train_loader = train_loader,
    weight_decay=0.5,
    momentum=0.5
)



14:37:30.143786  |  Epoch 1  |  Training loss 1.117
14:37:37.456390  |  Epoch 5  |  Training loss 0.748
14:37:47.138513  |  Epoch 10  |  Training loss 0.746
14:37:56.018489  |  Epoch 15  |  Training loss 0.745
14:38:05.297155  |  Epoch 20  |  Training loss 0.745
 
14:38:09.592313  |  Epoch 1  |  Training loss 1.117
14:38:20.093011  |  Epoch 5  |  Training loss 0.748
14:38:33.653460  |  Epoch 10  |  Training loss 0.746
14:38:46.762490  |  Epoch 15  |  Training loss 0.745
14:38:59.057699  |  Epoch 20  |  Training loss 0.745


[1.1167964031696134,
 0.7556445065022518,
 0.7511422467673443,
 0.7490721194427126,
 0.747860450199053,
 0.7470661515259628,
 0.7465170398577246,
 0.7461190914341219,
 0.7458358342895215,
 0.7456563293877976,
 0.7454918606574988,
 0.7454121516793184,
 0.7453679508661936,
 0.745338394823468,
 0.7453206327056063,
 0.7452971163841055,
 0.7452781646487129,
 0.7452469508327764,
 0.74522061382151,
 0.745207550636384,
 0.7452008092028434]

# 3.1.8 

In [73]:
def compute_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)

            outputs = model(imgs)
            _, predicted = torch.max(outputs, dim=1)
            total += labels.shape[0]
            correct += int((predicted == labels).sum())

    acc =  correct / total
    print("Accuracy: {:.2f}".format(acc))
    return acc

In [74]:
import pandas as pd

best_model = None 
best_acc = 0
best_lr = None 
best_weight_decay = None 
best_momentum = None 


torch.manual_seed(123)
train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=64, shuffle=False) 
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)


model1 = MyMLP(3072, 2)
model2 = MyMLP(3072, 2)
model3 = MyMLP(3072, 2)
model4 = MyMLP(3072, 2)
model5 = MyMLP(3072, 2)


train_manual_update(n_epochs = 21, lr = 0.001, model = model1, loss_fn = loss_fn, train_loader = train_loader, weight_decay=0.1, momentum=0.9)
print(f'MODEL1')
print("Training accuracy:")
train_acc_1 = compute_accuracy(model1, train_loader)
print("Validation accuracy:")
val_acc_1 = compute_accuracy(model1, val_loader)

if best_acc < val_acc_1: 
    best_model = model1 
    best_acc = val_acc_1
    best_lr =  0.0010
    best_weight_decay =  0.1
    best_momentum =  0.9



train_manual_update(n_epochs = 21, lr = 0.01, model = model2, loss_fn = loss_fn, train_loader = train_loader, weight_decay=0.1, momentum=0.25)
print(f'MODEL2')
print("Training accuracy:")
train_acc_2 = compute_accuracy(model2, train_loader)
print("Validation accuracy:")
val_acc_2 = compute_accuracy(model2, val_loader)

if best_acc < val_acc_2: 
    best_model = model2
    best_acc = val_acc_2
    best_lr = 0.01 
    best_weight_decay = 0.1
    best_momentum = 0.25 



train_manual_update(n_epochs = 21, lr = 0.01, model = model3, loss_fn = loss_fn, train_loader = train_loader, weight_decay=0.1, momentum=0.5)
print(f'MODEL3')
print("Training accuracy:")
train_acc_3 = compute_accuracy(model3, train_loader)
print("Validation accuracy:")
val_acc_3 = compute_accuracy(model3, val_loader)

if best_acc < val_acc_3: 
    best_model = model3 
    best_acc = val_acc_3
    best_lr = 0.01 
    best_weight_decay = 0.1
    best_momentum = 0.5 



train_manual_update(n_epochs = 21, lr = 0.001, model = model4, loss_fn = loss_fn, train_loader = train_loader, weight_decay=0.1, momentum=0.75)
print(f'MODEL4')
print("Training accuracy:")
train_acc_4 = compute_accuracy(model4, train_loader)
print("Validation accuracy:")
val_acc_4 = compute_accuracy(model4, val_loader)

if best_acc < val_acc_4: 
    best_model = model4
    best_acc = val_acc_4
    best_lr = 0.001 
    best_weight_decay = 0.1 
    best_momentum = 0.75 



train_manual_update(n_epochs = 21, lr = 0.01, model = model5, loss_fn = loss_fn, train_loader = train_loader, weight_decay=0.1, momentum=0.1)
print(f'MODEL5')
print("Training accuracy:")
train_acc_5 = compute_accuracy(model5, train_loader)
print("Validation accuracy:")
val_acc_5 = compute_accuracy(model5, val_loader)

if best_acc < val_acc_5: 
    best_model = model5 
    best_acc = val_acc_5
    best_lr = 0.01 
    best_weight_decay = 0.1 
    best_momentum = 0.1 



14:39:04.222012  |  Epoch 1  |  Training loss 1.416
14:39:14.009795  |  Epoch 5  |  Training loss 0.486
14:39:26.177106  |  Epoch 10  |  Training loss 0.449
14:39:39.580322  |  Epoch 15  |  Training loss 0.430
14:39:53.298103  |  Epoch 20  |  Training loss 0.418
MODEL1
Training accuracy:
Accuracy: 0.85
Validation accuracy:
Accuracy: 0.83
14:39:59.359519  |  Epoch 1  |  Training loss 1.093
14:40:10.268266  |  Epoch 5  |  Training loss 0.474
14:40:23.626039  |  Epoch 10  |  Training loss 0.444
14:40:37.722405  |  Epoch 15  |  Training loss 0.429
14:40:51.523850  |  Epoch 20  |  Training loss 0.420
MODEL2
Training accuracy:
Accuracy: 0.85
Validation accuracy:
Accuracy: 0.82
14:40:57.432339  |  Epoch 1  |  Training loss 0.873
14:41:08.919387  |  Epoch 5  |  Training loss 0.465
14:41:22.192279  |  Epoch 10  |  Training loss 0.439
14:41:36.046806  |  Epoch 15  |  Training loss 0.428
14:41:51.277644  |  Epoch 20  |  Training loss 0.422
MODEL3
Training accuracy:
Accuracy: 0.85
Validation accur

In [77]:
print(best_model )
print(best_acc)
print(best_lr )
print(best_weight_decay )
print(best_momentum)

MyMLP(
  (fc1): Linear(in_features=3072, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=32, bias=True)
  (af): ReLU()
)
0.8260427263479145
0.001
0.1
0.9


In [78]:
test_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)
test_acc = compute_accuracy(best_model, test_loader)

print(test_acc)

Accuracy: 0.83
0.8260427263479145


In [None]:
class MyLeNet5(nn.Module):
    def __init__(self, num_classes):
        super().__init__() 
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=2)  
        self.norm1 = nn.BatchNorm2d(6)
        self.pool1 = nn.MaxPool2d(2)

        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.norm2 = nn.BatchNorm2d(16)
        self.pool2 = nn.MaxPool2d(2)
        
        self.flat = nn.Flatten()
        self.fc1 = nn.Linear(in_features=9*12*16, out_features=120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        
        out = self.conv1(x)
        out = self.norm1(out)
        out = torch.relu(self.pool1(out))
        
        out = self.conv2(out)
        out = self.norm2(out)
        out = torch.relu(self.pool2(out))
        
        out = self.flat(out)
        out = torch.relu(self.fc1(out))
        out = torch.relu(self.fc2(out))
        out = self.fc3(out)

        return out
    