In [1]:
%matplotlib inline
from argparse import Namespace
import datetime
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7ff009358cb0>

In [2]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

In [3]:
from torchvision import datasets, transforms
data_path = 'data-unversioned/p1ch6/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified


In [4]:
cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified


In [None]:
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']
cifar2 = [(img, label_map[label])
          for img, label in cifar10
          if label in [0, 2]]
cifar2_val = [(img, label_map[label])
              for img, label in cifar10_val
              if label in [0, 2]]

In [None]:
connected_model = nn.Sequential(
            nn.Linear(3072, 1024),
            nn.Tanh(),
            nn.Linear(1024, 512),
            nn.Tanh(),
            nn.Linear(512, 128),
            nn.Tanh(),
            nn.Linear(128, 2))

In [None]:
numel_list = [p.numel()
              for p in connected_model.parameters()
              if p.requires_grad == True]
sum(numel_list), numel_list

In [None]:
first_model = nn.Sequential(
                nn.Linear(3072, 512),
                nn.Tanh(),
                nn.Linear(512, 2),
                nn.LogSoftmax(dim=1))

In [None]:
numel_list = [p.numel() for p in first_model.parameters()]
sum(numel_list), numel_list

In [None]:
linear = nn.Linear(3072, 1024)

linear.weight.shape, linear.bias.shape

In [None]:
conv = nn.Conv2d(3, 16, kernel_size=3)
conv

In [None]:
conv.weight.shape, conv.bias.shape

In [None]:
img, _ = cifar2[0]
output = conv(img.unsqueeze(0))
img.unsqueeze(0).shape, output.shape

In [None]:
plt.imshow(output[0, 0].detach(), cmap='gray')
plt.show()

In [None]:
#dir(nn.Conv2d)
params = Namespace(
    in_values=3,
    out_values=1,
    kernel_size=3,
    padding=1)

conv = nn.Conv2d(params.in_values, params.out_values, kernel_size=params.kernel_size,
                padding=params.padding)
output=conv(img.unsqueeze(0))
img.unsqueeze(0).shape, output.shape

In [None]:
with torch.no_grad():
    conv.bias.zero_()
    
with torch.no_grad():
    conv.weight.fill_(1.0 / 9.0)

In [None]:
output = conv(img.unsqueeze(0))
plt.imshow(output[0, 0].detach(), cmap='gray')
plt.show()

In [None]:
params = Namespace(in_values=3,
                  out_values=1,
                  kernel_size=3,
                  padding=1)

conv = nn.Conv2d(params.in_values, params.out_values, kernel_size=params.kernel_size,
                 padding=params.padding)

with torch.no_grad():
    conv.weight[:] = torch.tensor([[-1, 0, 1],
                                  [-1, 0, 1],
                                  [-1, 0, 1]])
    conv.bias.zero_()

In [None]:
pool = nn.MaxPool2d(2)
output = pool(img.unsqueeze(0))

img.unsqueeze(0).shape, output.shape

In [None]:
model = nn.Sequential(
    nn.Conv2d(3, 16, kernel_size=3, padding=1),
    nn.Tanh(),
    nn.MaxPool2d(2),
    nn.Conv2d(16, 8, kernel_size=3, padding=1),
    nn.MaxPool2d(2),
    #...
    )

In [None]:
model = nn.Sequential(
    nn.Conv2d(3, 16, kernel_size=3, padding=1),
    nn.Tanh(),
    nn.MaxPool2d(2),
    nn.Conv2d(16, 8, kernel_size=3, padding=1),
    nn.MaxPool2d(2),
    #...
    nn.Linear(8 * 8 * 8, 32),
    nn.Tanh(),
    nn.Linear(32, 2)
    )

In [None]:
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

In [None]:
try:
    model(img.unsqueeze(0))
except RuntimeError as e:
    display(e)

In [None]:
model = model = nn.Sequential(
    nn.Conv2d(3, 16, kernel_size=3, padding=1),
    nn.Tanh(),
    nn.MaxPool2d(2),
    nn.Conv2d(16, 8, kernel_size=3, padding=1),
    nn.MaxPool2d(2),
    nn.Flatten(), #added to keep using nn.Sequential in models
    nn.Linear(8 * 8 * 8, 32),
    nn.Tanh(),
    nn.Linear(32, 2)
    )

In [None]:
try:
    model(img.unsqueeze(0))
except RuntimeError as e:
    display(e)

In [None]:
model(img.unsqueeze(0))

In [None]:
numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8)
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
model = Net()

numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list

In [None]:
model(img.unsqueeze(0))

In [None]:
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.fc2 = nn.Linear(32, 2)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
model = Net()
model(img.unsqueeze(0))

In [None]:
import datetime  # <1>

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):  # <2>
        loss_train = 0.0
        for imgs, labels in train_loader:  # <3>
            
            outputs = model(imgs)  # <4>
            
            loss = loss_fn(outputs, labels)  # <5>

            optimizer.zero_grad()  # <6>
            
            loss.backward()  # <7>
            
            optimizer.step()  # <8>

            loss_train += loss.item()  # <9>

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))  # <10>

In [None]:
model = Net()

params = Namespace(n_epochs=100,
                  optimizer=optim.SGD(model.parameters(), lr=1e-2),
                  model=model,
                  loss_fn=nn.CrossEntropyLoss(),
                  train_loader=torch.utils.data.DataLoader(cifar2, batch_size=64,
                                                          shuffle=True))

training_loop(
    n_epochs=params.n_epochs,
    optimizer=params.optimizer,
    model=params.model,
    loss_fn=params.loss_fn,
    train_loader=params.train_loader)

In [None]:
train_params = Namespace(
    data=cifar2, batch_size=64,
    shuffle=False)

val_params = Namespace(
    data=cifar2_val, batch_size=64,
    shuffle=False)

train_loader = torch.utils.data.DataLoader(train_params.data, batch_size=train_params.batch_size,
                                          shuffle=train_params.shuffle)

val_loader = torch.utils.data.DataLoader(val_params.data, batch_size=val_params.batch_size,
                                        shuffle=val_params.shuffle)

def validate(model, train_loader, val_loader):
    '''Takes a model and compares the results on the training and validation datasets.'''
    for name, loader in [('train', train_loader), ('val', val_loader)]:
        correct = 0
        total = 0
        
        with torch.no_grad():
            for imgs, labels in loader:
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1)
                total += labels.shape[0]
                correct += int ((predicted == labels).sum())
                
            print("Accuracy {}: {:.2f}".format(name, correct/total))
            
validate(model, train_loader, val_loader)