In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import torch.optim as optim
from torch.utils.data import random_split, DataLoader
from collections import Counter
from classes.MyMLP import MyMLP


In [2]:
SEED = 808
torch.manual_seed(SEED)

DEVICE = (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
print(f"Using device: {DEVICE}")

torch.set_default_dtype(torch.double)

Using device: cuda


In [3]:
def load_CIFAR2(train_val_split=.9, data_path='data', preprocessor=None):
    if preprocessor is None:
        preprocessor = transforms.Compose([
            transforms.ToTensor(),
        ])

    data_train_val = datasets.CIFAR10(
        data_path,
        train=True,
        download=True,
        transform=preprocessor)
    
    data_test = datasets.CIFAR10(
        data_path,
        train=False,
        download=True,
        transform=preprocessor)

    n_train = int(len(data_train_val)*train_val_split)
    n_val = len(data_train_val) - n_train

    data_train, data_val = random_split(
        data_train_val,
        [n_train, n_val],
        generator=torch.Generator()
    )
    
    label_map = {0: 0, 2: 1}
    class_names = ['airplane', 'bird']
    
    data_train = [(img, label_map[label]) for img, label in data_train if label in [0, 2]]
    data_val = [(img, label_map[label]) for img, label in data_val if label in [0, 2]]
    data_test = [(img, label_map[label]) for img, label in data_test if label in [0, 2]]

    print("Size of training set: ", len(data_train))
    print("Size of validation set: ", len(data_val))
    print("Size of test set: ", len(data_test))

    return (data_train, data_val, data_test)

cifar_train, cifar_val, cifar_test = load_CIFAR2()

Files already downloaded and verified
Files already downloaded and verified
Size of training set:  8956
Size of validation set:  1044
Size of test set:  2000


In [4]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
    print(f"Training {model} with optimizer")
    n_batch = len(train_loader)
    losses_train = []
    losses_val = []

    for epoch in range(1, n_epochs+1):
        model.train()
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=DEVICE, dtype=torch.double)
            labels = labels.to(device=DEVICE)

            outputs = model(imgs)

            loss = loss_fn(outputs,labels)
            loss.backward()

            optimizer.step()
            optimizer.zero_grad()

            loss_train += loss.item()

        losses_train.append(loss_train / n_batch)

        # if epoch == 1 or epoch % 10 == 0:
        print(f"{datetime.now().time()}, {epoch}, train_loss: {loss/n_batch}")

In [5]:
def update_parameters(parameters, gradients, loss, learning_rate):
    return parameters - learning_rate*gradients
    
def train_manual_update(n_epochs, lr, model, loss_fn, train_loader):
    print(f"Training {model} with manual update")
    n_batch = len(train_loader)
    losses_train = []
    losses_val = []

    for epoch in range(1, n_epochs+1):
        model.train()
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=DEVICE, dtype=torch.double)
            labels = labels.to(device=DEVICE)

            outputs = model(imgs)

            loss = loss_fn(outputs,labels)
            loss.backward()

            # Implement gradient descent here:
            with torch.no_grad():
                for p in model.parameters():
                    new_values = update_parameters(p, p.grad, loss, learning_rate)
                    p.copy_(new_values)
                model.zero_grad()

            loss_train += loss.item()

        losses_train.append(loss_train / n_batch)

        # if epoch == 1 or epoch % 10 == 0:
        print(f"{datetime.now().time()}, {epoch}, train_loss: {loss/n_batch}")
    pass

In [6]:
learning_rate = 1e-2
loss_fn = nn.CrossEntropyLoss()
torch.manual_seed(SEED)
model_auto = MyMLP()
model_auto.to(device=DEVICE)
optimizer = optim.SGD(model_auto.parameters(), lr=learning_rate)

torch.manual_seed(SEED)
model_manual = MyMLP()
model_manual.to(device=DEVICE)

MyMLP(
  (input_layer): Linear(in_features=3072, out_features=512, bias=True)
  (h1): Linear(in_features=512, out_features=128, bias=True)
  (h2): Linear(in_features=128, out_features=32, bias=True)
  (output_layer): Linear(in_features=32, out_features=2, bias=True)
)

In [7]:
train_loader = DataLoader(cifar_train, shuffle=False, batch_size=1)
val_loader = DataLoader(cifar_val, shuffle=False, batch_size=1)

In [8]:
n_epochs = 10
train(n_epochs, optimizer, model_auto, loss_fn, train_loader)
train_manual_update(n_epochs, learning_rate, model_manual, loss_fn, train_loader)

Training MyMLP with optimizer
11:05:39.899756, 1, train_loss: 0.00010952912793141069
11:05:45.042024, 2, train_loss: 5.283247569553806e-05
11:05:50.331483, 3, train_loss: 4.344831406477836e-05
11:05:55.856810, 4, train_loss: 2.7311411381614114e-05
11:06:01.160560, 5, train_loss: 2.3984670643872053e-05
11:06:07.202230, 6, train_loss: 2.1332854115226365e-05
11:06:13.713791, 7, train_loss: 2.0628499053169388e-05
11:06:20.255842, 8, train_loss: 2.329091972699752e-05
11:06:26.401080, 9, train_loss: 2.073580252017654e-05
11:06:32.619339, 10, train_loss: 2.7415180050618997e-05
Training MyMLP with manual update
11:06:38.914601, 1, train_loss: 0.00010952912793141069
11:06:45.044169, 2, train_loss: 5.283247569553807e-05
11:06:51.077819, 3, train_loss: 4.3448314064778376e-05
11:06:57.113550, 4, train_loss: 2.7311411381614114e-05
11:07:03.148714, 5, train_loss: 2.3984670643872053e-05
11:07:09.221421, 6, train_loss: 2.1332854115226365e-05
11:07:15.265427, 7, train_loss: 2.0628499053169388e-05
11:07