In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7cdcd42897f0>

In [3]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']


In [4]:
from torchvision import datasets, transforms
data_path = '../data-unversioned/p1ch6/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data-unversioned/p1ch6/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 83956816.76it/s]


Extracting ../data-unversioned/p1ch6/cifar-10-python.tar.gz to ../data-unversioned/p1ch6/


In [5]:
cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified


In [6]:
first_model = nn.Sequential(
                nn.Linear(3072, 512),
                nn.Tanh(),
                nn.Linear(512, 10),
                nn.LogSoftmax(dim=1))

In [7]:
numel_list = [p.numel() for p in first_model.parameters()]
sum(numel_list), numel_list

(1578506, [1572864, 512, 5120, 10])

In [8]:
model = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.Tanh(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 8, kernel_size=3, padding=1),
            nn.Tanh(),
            nn.MaxPool2d(2),
            # ... <1>
            nn.Linear(8 * 8 * 8, 32),
            nn.Tanh(),
            nn.Linear(32, 2))

In [9]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8) # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [10]:
model = Net()

numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

(18354, [432, 16, 1152, 8, 16384, 32, 320, 10])

In [11]:
import torch
torch.cuda.get_device_properties("cuda:0")

_CudaDeviceProperties(name='Tesla T4', major=7, minor=5, total_memory=15101MB, multi_processor_count=40)

In [12]:
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [13]:
import datetime  # <1>

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):  # <2>
        loss_train = 0.0
        for imgs, labels in train_loader:  # <3>
            imgs, labels = imgs.to(torch.device("cuda:0")), labels.to(torch.device("cuda:0"))
            outputs = model(imgs)  # <4>

            loss = loss_fn(outputs, labels)  # <5>

            optimizer.zero_grad()  # <6>

            loss.backward()  # <7>

            optimizer.step()  # <8>

            loss_train += loss.item()  # <9>

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))  # <10>

In [31]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=1024 * 1000,
                                           shuffle=True)  # <1>

model = Net()  #  <2>
model = model.to(torch.device("cuda:0"))
optimizer = optim.SGD(model.parameters(), lr=1e-2)  #  <3>
loss_fn = nn.CrossEntropyLoss()  #  <4>

training_loop(  # <5>
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2023-12-12 23:07:49.123229 Epoch 1, Training loss 2.31667160987854


In [13]:
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 128, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = F.max_pool2d(torch.tanh(self.conv3(out)), 2)
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
model = Net()

numel_list = [p.numel() for p in model.parameters()]
sum(numel_list)

In [14]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):  # <2>
        loss_train = 0.0
        for imgs, labels in train_loader:  # <3>
            imgs, labels = imgs.to(torch.device("cuda:0")), labels.to(torch.device("cuda:0"))
            outputs = model(imgs)  # <4>

            loss = loss_fn(outputs, labels)  # <5>

            optimizer.zero_grad()  # <6>

            loss.backward()  # <7>

            optimizer.step()  # <8>

            loss_train += loss.item()  # <9>

        if epoch == 1 or epoch % 10 == 0:
            print('Epoch {}, Training loss {}'.format(
                epoch,
                loss_train / len(train_loader)))  # <10>

In [None]:
import gc

model.cpu()
del model
gc.collect()
torch.cuda.empty_cache()

train_loader = torch.utils.data.DataLoader(cifar10, batch_size=1024,
                                           shuffle=True)  # <1>

model = Net()  #  <2>
model = model.to(torch.device("cuda:0"))
optimizer = optim.SGD(model.parameters(), lr=1e-2)  #  <3>
loss_fn = nn.CrossEntropyLoss()  #  <4>

training_loop(  # <5>
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

Epoch 1, Training loss 2.292744524624883
Epoch 10, Training loss 1.958862489583541
Epoch 20, Training loss 1.7709223129311387
Epoch 30, Training loss 1.621062500136239
Epoch 40, Training loss 1.4997605207015057
Epoch 50, Training loss 1.406019536816344
Epoch 60, Training loss 1.337945006331619
Epoch 70, Training loss 1.2825653066440506
Epoch 80, Training loss 1.233365309481718
Epoch 90, Training loss 1.1903399910245622
Epoch 100, Training loss 1.150615212868671
Epoch 110, Training loss 1.1127490437760645
Epoch 120, Training loss 1.0757551850104818
Epoch 130, Training loss 1.0398193013911345
Epoch 140, Training loss 1.0053152904218556
Epoch 150, Training loss 0.9712200578378172
Epoch 160, Training loss 0.9402082744909792
Epoch 170, Training loss 0.908155155425169
Epoch 180, Training loss 0.8790141052129318
Epoch 190, Training loss 0.8565642712067585
Epoch 200, Training loss 0.8349437494667209
Epoch 210, Training loss 0.8112078272566503
Epoch 220, Training loss 0.7957611789508742
Epoch 2

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=2048,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=2048,
                                         shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                imgs, labels = imgs.to(torch.device("cuda:0")), labels.to(torch.device("cuda:0"))
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

Accuracy train: 0.77
Accuracy val: 0.73


Resnet

In [14]:
class NetRes_10(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,
                               padding=1)
        self.conv3 = nn.Conv2d(n_chans1 // 2, n_chans1 // 2,
                               kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4 * 4 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out1 = out
        out = F.max_pool2d(torch.relu(self.conv3(out)) + out1, 2)
        out = out.view(-1, 4 * 4 * self.n_chans1 // 2)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [15]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):  # <2>
        loss_train = 0.0
        for imgs, labels in train_loader:  # <3>
            imgs, labels = imgs.to(torch.device("cuda:0")), labels.to(torch.device("cuda:0"))
            outputs = model(imgs)  # <4>

            loss = loss_fn(outputs, labels)  # <5>

            optimizer.zero_grad()  # <6>

            loss.backward()  # <7>

            optimizer.step()  # <8>

            loss_train += loss.item()  # <9>

        if epoch == 1 or epoch % 10 == 0:
            print('Epoch {}, Training loss {}'.format(
                epoch,
                loss_train / len(train_loader)))  # <10>

In [16]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=4096,
                                           shuffle=True)  # <1>
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=4096,
                                         shuffle=False)

model = NetRes_10(n_chans1=32).to(torch.device("cuda:0"))
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

Epoch 1, Training loss 2.31243450825031
Epoch 10, Training loss 2.2879948982825646
Epoch 20, Training loss 2.214522985311655
Epoch 30, Training loss 2.0922749042510986
Epoch 40, Training loss 1.9942488578649669
Epoch 50, Training loss 1.8939153506205633
Epoch 60, Training loss 1.8272024943278387
Epoch 70, Training loss 1.7720042100319495
Epoch 80, Training loss 1.747085919746986
Epoch 90, Training loss 1.6687112221351037
Epoch 100, Training loss 1.6500557936154878
Epoch 110, Training loss 1.6016983619103065
Epoch 120, Training loss 1.5889384746551514
Epoch 130, Training loss 1.592147552050077
Epoch 140, Training loss 1.5214084203426654
Epoch 150, Training loss 1.5292710249240582
Epoch 160, Training loss 1.5102150256817157
Epoch 170, Training loss 1.4817390441894531
Epoch 180, Training loss 1.472618213066688
Epoch 190, Training loss 1.4305044320913463
Epoch 200, Training loss 1.3904639665897076
Epoch 210, Training loss 1.3782437489582942
Epoch 220, Training loss 1.3870530220178456
Epoch

In [17]:
def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                imgs, labels = imgs.to(torch.device("cuda:0")), labels.to(torch.device("cuda:0"))
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

Accuracy train: 0.56
Accuracy val: 0.55


In [21]:
def training_loop_l2reg(n_epochs, optimizer, model, loss_fn,
                        train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device="cuda:0")
            labels = labels.to(device="cuda:0")
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            l2_lambda = 0.001
            l2_norm = sum(p.pow(2.0).sum()
                          for p in model.parameters())  # <1>
            loss = loss + l2_lambda * l2_norm

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()
        if epoch == 1 or epoch % 10 == 0:
            print('Epoch {}, Training loss {}'.format(
                epoch,
                loss_train / len(train_loader)))

In [24]:
model = Net().to(device="cuda:0")
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop_l2reg(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

Epoch 1, Training loss 2.324833943293645
Epoch 10, Training loss 2.220768561730018
Epoch 20, Training loss 2.098164906868568
Epoch 30, Training loss 2.0166374536661
Epoch 40, Training loss 1.9657123639033391
Epoch 50, Training loss 1.9271488281396718
Epoch 60, Training loss 1.8946729715053852
Epoch 70, Training loss 1.8618481250909658
Epoch 80, Training loss 1.819174784880418
Epoch 90, Training loss 1.7816865627582257
Epoch 100, Training loss 1.741009391271151
Epoch 110, Training loss 1.712285802914546
Epoch 120, Training loss 1.682334909072289
Epoch 130, Training loss 1.6567858457565308
Epoch 140, Training loss 1.6345323140804584
Epoch 150, Training loss 1.6116433418714082
Epoch 160, Training loss 1.5992481800226064
Epoch 170, Training loss 1.5840789446463952
Epoch 180, Training loss 1.570944254214947
Epoch 190, Training loss 1.5566963507578924
Epoch 200, Training loss 1.5403416248468251
Epoch 210, Training loss 1.5310522409585805
Epoch 220, Training loss 1.5148938802572398
Epoch 230,

In [25]:
validate(model, train_loader, val_loader)

Accuracy train: 0.51
Accuracy val: 0.51


In [None]:
class NetDropout(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_dropout = nn.Dropout2d(p=0.3)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,
                               padding=1)
        self.conv2_dropout = nn.Dropout2d(p=0.3)
        self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = self.conv1_dropout(out)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = self.conv2_dropout(out)
        out = out.view(-1, 8 * 8 * self.n_chans1 // 2)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
model = NetDropout(n_chans1=32).to(device="cuda:0")
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

In [None]:
validate(model, train_loader, val_loader)

In [None]:
class NetBatchNorm(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chans1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,
                               padding=1)
        self.conv2_batchnorm = nn.BatchNorm2d(num_features=n_chans1 // 2)
        self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = self.conv1_batchnorm(self.conv1(x))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = self.conv2_batchnorm(self.conv2(out))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = out.view(-1, 8 * 8 * self.n_chans1 // 2)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
model = NetBatchNorm(n_chans1=32).to(device="cuda:0")
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

In [None]:
validate(model, train_loader, val_loader)