In [4]:
# Intstall PyTorch and download data
!pip3 install torch torchvision

!wget -c http://ufldl.stanford.edu/housenumbers/train_32x32.mat http://ufldl.stanford.edu/housenumbers/test_32x32.mat

--2020-05-14 13:54:02--  http://ufldl.stanford.edu/housenumbers/train_32x32.mat
Resolving ufldl.stanford.edu (ufldl.stanford.edu)... 171.64.68.10
Connecting to ufldl.stanford.edu (ufldl.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 182040794 (174M) [text/plain]
Saving to: ‘train_32x32.mat’


2020-05-14 13:54:06 (48.2 MB/s) - ‘train_32x32.mat’ saved [182040794/182040794]

--2020-05-14 13:54:06--  http://ufldl.stanford.edu/housenumbers/test_32x32.mat
Reusing existing connection to ufldl.stanford.edu:80.
HTTP request sent, awaiting response... 200 OK
Length: 64275384 (61M) [text/plain]
Saving to: ‘test_32x32.mat’


2020-05-14 13:54:08 (32.6 MB/s) - ‘test_32x32.mat’ saved [64275384/64275384]

FINISHED --2020-05-14 13:54:08--
Total wall clock time: 5.6s
Downloaded: 2 files, 235M in 5.5s (42.8 MB/s)


In [0]:
from collections import namedtuple

import matplotlib.pyplot as plt
import numpy as np
import PIL
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dset
from torch.utils.data.sampler import SubsetRandomSampler

from torchvision import transforms

In [0]:
device = torch.device("cuda:0") # Let's make sure GPU is available!

In [0]:
data_train = dset.SVHN('./', 
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(mean=[0.43,0.44,0.47],
                                               std=[0.20,0.20,0.20])                           
                       ])
                      )

In [0]:
data_test = dset.SVHN('./', split='test', transform=transforms.Compose([
                           transforms.Grayscale(num_output_channels=1),
                           transforms.ToTensor(),
                           transforms.Normalize(mean=[0.43,0.44,0.47],
                                               std=[0.20,0.20,0.20])                           
                       ]))

In [0]:
batch_size = 64

data_size = data_train.data.shape[0]
validation_split = .2
split = int(np.floor(validation_split * data_size))
indices = list(range(data_size))
np.random.shuffle(indices)

train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, 
                                           sampler=train_sampler)
val_loader = torch.utils.data.DataLoader(data_train, batch_size=batch_size,
                                         sampler=val_sampler)

In [0]:
class Flattener(nn.Module):
    def forward(self, x):
        batch_size, *_ = x.shape
        return x.view(batch_size, -1)

In [0]:
# TODO: Implement LeNet-like architecture for SVHN task
lenet_model = nn.Sequential(
              nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5),
              nn.ReLU(inplace=True),
              nn.MaxPool2d(2),
              nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
              nn.ReLU(inplace=True),
              nn.MaxPool2d(2),
              Flattener(),
              nn.Linear(5*5*16, 120),
              nn.ReLU(inplace=True),
              nn.Linear(120, 84),
              nn.ReLU(inplace=True),
              nn.Linear(84, 10)
          )

In [30]:
lenet_model.type(torch.cuda.FloatTensor)

Sequential(
  (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU(inplace=True)
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flattener()
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): ReLU(inplace=True)
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): ReLU(inplace=True)
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [31]:
lenet_model.to(device)

Sequential(
  (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU(inplace=True)
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flattener()
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): ReLU(inplace=True)
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): ReLU(inplace=True)
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [0]:
loss = nn.CrossEntropyLoss().type(torch.cuda.FloatTensor)

In [0]:
optimizer = optim.SGD(lenet_model.parameters(), lr=1e-1, weight_decay=1e-4)

In [0]:
def train_model(model, train_loader, val_loader, loss, optimizer, num_epochs, step_size=1, gamma=1):    
    loss_history = []
    train_history = []
    val_history = []

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

    for epoch in range(num_epochs):
        model.train() # Enter train mode
        
        loss_accum = 0
        correct_samples = 0
        total_samples = 0
        for i_step, (x, y) in enumerate(train_loader):
          
            x_gpu = x.to(device)
            y_gpu = y.to(device)
            prediction = model(x_gpu)    
            loss_value = loss(prediction, y_gpu)
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            _, indices = torch.max(prediction, 1)
            correct_samples += torch.sum(indices == y_gpu)
            total_samples += y.shape[0]
            
            loss_accum += loss_value

        scheduler.step()

        ave_loss = loss_accum / i_step
        train_accuracy = float(correct_samples) / total_samples
        val_accuracy = compute_accuracy(model, val_loader)
        
        loss_history.append(float(ave_loss))
        train_history.append(train_accuracy)
        val_history.append(val_accuracy)
        
        print("Average loss: %f, Train accuracy: %f, Val accuracy: %f" % (ave_loss, train_accuracy, val_accuracy))
        
    return loss_history, train_history, val_history

In [0]:
def compute_accuracy(model, loader):
    """
    Computes accuracy on the dataset wrapped in a loader
    
    Returns: accuracy as a float value between 0 and 1
    """
    model.eval() # Evaluation mode
    
    # TODO: Copy implementation from previous assignment
    # Don't forget to move the data to device before running it through the model!
    
    # raise Exception("Not implemented")
    accuracy = 0
    i_step = 0
    for x, y in loader:

        x_gpu = x.to(device)
        y_gpu = y.to(device)

        prediction = torch.argmax(model(x_gpu), dim=1)
        
        for i in range(len(y_gpu)):
            if prediction[i] == y_gpu[i]:
                accuracy += 1
            
            i_step += 1
            
    accuracy = accuracy/(i_step)
        
    return accuracy

In [0]:
# Data augmentation

tfs = transforms.Compose([
    transforms.ColorJitter(hue=.50, saturation=.50),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(50, resample=PIL.Image.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.43,0.44,0.47],
                       std=[0.20,0.20,0.20])                           
])

# Create augmented train dataset
data_aug_train = dset.SVHN('./', 
                       transform=tfs
                      )

train_aug_loader = torch.utils.data.DataLoader(data_aug_train, batch_size=batch_size, 
                                           sampler=train_sampler)

In [0]:
# Let's train it!
loss_history, train_history, val_history = train_model(lenet_model, train_aug_loader, val_loader, loss, optimizer, 10)

In [0]:
# TODO: Implement LeNet-like architecture for SVHN task
lenet_model = nn.Sequential(
              # CL1 3@32x32 -> 16@32x32
              nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2),
              nn.BatchNorm2d(num_features=16),
              nn.ReLU(inplace=True),
              
              # Pooling 16@32x32 -> 16@16x16
              nn.MaxPool2d(2),
              
              # 16@16x16 -> 16@14x14
              nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
              nn.BatchNorm2d(num_features=32),
              nn.ReLU(inplace=True),
              
              # 16@14x14 -> 32@7x7
              nn.MaxPool2d(2),

              # 32@7x7 => 64@5x5
              nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
              nn.BatchNorm2d(num_features=64),
              nn.ReLU(inplace=True),

              Flattener(),

              nn.Linear(5*5*64, 120),
              nn.ReLU(inplace=True),

              nn.Linear(120, 84),
              nn.ReLU(inplace=True),

              nn.Linear(84, 10)
          )

In [61]:
lenet_model.type(torch.cuda.FloatTensor)

Sequential(
  (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU(inplace=True)
  (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU(inplace=True)
  (11): Flattener()
  (12): Linear(in_features=1600, out_features=120, bias=True)
  (13): ReLU(inplace=True)
  (14): Linear(in_features=120, out_features=84, bias=True)
  (15): ReLU(inplace=True)
  (16): Linear(in_features=84, out_features=10, bias=True)
)

In [62]:
lenet_model.to(device)

Sequential(
  (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU(inplace=True)
  (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU(inplace=True)
  (11): Flattener()
  (12): Linear(in_features=1600, out_features=120, bias=True)
  (13): ReLU(inplace=True)
  (14): Linear(in_features=120, out_features=84, bias=True)
  (15): ReLU(inplace=True)
  (16): Linear(in_features=84, out_features=10, bias=True)
)

In [0]:
loss = nn.CrossEntropyLoss().type(torch.cuda.FloatTensor)

In [0]:
optimizer = optim.Adam(lenet_model.parameters())

In [65]:
loss_history, train_history, val_history = train_model(
                                                        model=lenet_model,
                                                        train_loader=train_loader,
                                                        val_loader=val_loader,
                                                        loss=loss,
                                                        optimizer=optimizer,
                                                        num_epochs=30,
                                                        step_size=1,
                                                        gamma=0.9
                                                        )

Average loss: 0.607806, Train accuracy: 0.806402, Val accuracy: 0.874753
Average loss: 0.335215, Train accuracy: 0.897929, Val accuracy: 0.896253
Average loss: 0.271822, Train accuracy: 0.918575, Val accuracy: 0.900416
Average loss: 0.231103, Train accuracy: 0.930826, Val accuracy: 0.909562
Average loss: 0.197812, Train accuracy: 0.942224, Val accuracy: 0.917071
Average loss: 0.167546, Train accuracy: 0.950944, Val accuracy: 0.916934
Average loss: 0.143599, Train accuracy: 0.959066, Val accuracy: 0.917821
Average loss: 0.119724, Train accuracy: 0.966164, Val accuracy: 0.917275
Average loss: 0.102581, Train accuracy: 0.971112, Val accuracy: 0.915023
Average loss: 0.083539, Train accuracy: 0.976470, Val accuracy: 0.916661
Average loss: 0.068096, Train accuracy: 0.981691, Val accuracy: 0.912634
Average loss: 0.057384, Train accuracy: 0.984711, Val accuracy: 0.917207
Average loss: 0.045870, Train accuracy: 0.988056, Val accuracy: 0.917548
Average loss: 0.035623, Train accuracy: 0.991417, V

In [67]:
# Как всегда, в конце проверяем на test set
data_test = dset.SVHN('./', split='test', transform=transforms.Compose([
                           #transforms.Grayscale(num_output_channels=1),
                           transforms.ToTensor(),
                           transforms.Normalize(mean=[0.43,0.44,0.47],
                                               std=[0.20,0.20,0.20])                           
                       ]))
test_loader = torch.utils.data.DataLoader(data_test, batch_size=batch_size)
test_accuracy = compute_accuracy(lenet_model, test_loader)
print("Test accuracy: %2.4f" % test_accuracy)

Test accuracy: 0.9113


In [0]:
def get_model():
    model = nn.Sequential(
              # CL1 3@32x32 -> 16@32x32
              nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=2),
              nn.BatchNorm2d(num_features=16),
              nn.ReLU(inplace=True),
              
              # Pooling 16@32x32 -> 16@16x16
              nn.MaxPool2d(2),
              
              # 16@16x16 -> 16@14x14
              nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
              nn.BatchNorm2d(num_features=32),
              nn.ReLU(inplace=True),
              
              # 16@14x14 -> 32@7x7
              nn.MaxPool2d(2),

              # 32@7x7 => 64@5x5
              nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
              nn.BatchNorm2d(num_features=64),
              nn.ReLU(inplace=True),

              Flattener(),

              nn.Linear(5*5*64, 120),
              nn.ReLU(inplace=True),

              nn.Linear(120, 84),
              nn.ReLU(inplace=True),

              nn.Linear(84, 10)
          )
    return model

In [0]:
def train_model(model, train_loader, val_loader, loss, optimizer, num_epochs, scheduler):    
    loss_history = []
    train_history = []
    val_history = []

    for epoch in range(num_epochs):
        model.train() # Enter train mode
        
        loss_accum = 0
        correct_samples = 0
        total_samples = 0
        for i_step, (x, y) in enumerate(train_loader):
          
            x_gpu = x.to(device)
            y_gpu = y.to(device)
            prediction = model(x_gpu)    
            loss_value = loss(prediction, y_gpu)
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            _, indices = torch.max(prediction, 1)
            correct_samples += torch.sum(indices == y_gpu)
            total_samples += y.shape[0]
            
            loss_accum += loss_value

        scheduler.step()

        ave_loss = loss_accum / i_step
        train_accuracy = float(correct_samples) / total_samples
        val_accuracy = compute_accuracy(model, val_loader)
        
        loss_history.append(float(ave_loss))
        train_history.append(train_accuracy)
        val_history.append(val_accuracy)
        
        print("Average loss: %f, Train accuracy: %f, Val accuracy: %f" % (ave_loss, train_accuracy, val_accuracy))
        
    #return loss_history, train_history, val_history

In [0]:
lenet_model = get_model()
lenet_model.type(torch.cuda.FloatTensor)
lenet_model.to(device)
loss = nn.CrossEntropyLoss().type(torch.cuda.FloatTensor)
optimizer = optim.Adam(lenet_model.parameters(), lr=1e-3, weight_decay = 0)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
train_model(
            model=lenet_model,
            train_loader=train_loader,
            val_loader=val_loader,
            loss=loss,
            optimizer=optimizer,
            num_epochs=10,
            scheduler=scheduler
            )

In [82]:
learning_rates = [1e-1, 1e-3, 1e-5]
reg = [1e-3, 1e-5, 1e-7]

epoch_num = 5

for lr in learning_rates:
    for reg_strength in reg:
        lenet_model = get_model()
        lenet_model.type(torch.cuda.FloatTensor)
        lenet_model.to(device)
        loss = nn.CrossEntropyLoss().type(torch.cuda.FloatTensor)
        optimizer = optim.Adam(lenet_model.parameters(), lr=lr, weight_decay=reg_strength)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
        print('================')
        print(lr, reg_strength)
        print('================')
        train_model(
                    model=lenet_model,
                    train_loader=train_loader,
                    val_loader=val_loader,
                    loss=loss,
                    optimizer=optimizer,
                    num_epochs=epoch_num,
                    scheduler=scheduler
                    )

0.1 0.001
Average loss: 2.449093, Train accuracy: 0.187063, Val accuracy: 0.183810
Average loss: 2.245471, Train accuracy: 0.187575, Val accuracy: 0.118081
Average loss: 2.245936, Train accuracy: 0.187814, Val accuracy: 0.183810
Average loss: 2.245087, Train accuracy: 0.188889, Val accuracy: 0.183810
Average loss: 2.243063, Train accuracy: 0.189452, Val accuracy: 0.183810
0.1 1e-05
Average loss: 3.047603, Train accuracy: 0.186687, Val accuracy: 0.183810
Average loss: 2.245769, Train accuracy: 0.187421, Val accuracy: 0.183810
Average loss: 2.244361, Train accuracy: 0.188513, Val accuracy: 0.183810
Average loss: 2.251459, Train accuracy: 0.188155, Val accuracy: 0.183810
Average loss: 2.244565, Train accuracy: 0.189827, Val accuracy: 0.183810
0.1 1e-07
Average loss: 2.735297, Train accuracy: 0.186090, Val accuracy: 0.183810
Average loss: 2.245817, Train accuracy: 0.189571, Val accuracy: 0.183810
Average loss: 2.245216, Train accuracy: 0.190100, Val accuracy: 0.183810
Average loss: 2.24392

In [0]:
lenet_model = get_model()
lenet_model.type(torch.cuda.FloatTensor)
lenet_model.to(device)
loss = nn.CrossEntropyLoss().type(torch.cuda.FloatTensor)
optimizer = optim.Adam(lenet_model.parameters(), lr=1e-3, weight_decay = 1e-7)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
train_model(
            model=lenet_model,
            train_loader=train_loader,
            val_loader=val_loader,
            loss=loss,
            optimizer=optimizer,
            num_epochs=30,
            scheduler=scheduler
            )

Average loss: 0.640346, Train accuracy: 0.797819, Val accuracy: 0.870726
Average loss: 0.343763, Train accuracy: 0.895181, Val accuracy: 0.897004
Average loss: 0.278658, Train accuracy: 0.917193, Val accuracy: 0.900416
Average loss: 0.237584, Train accuracy: 0.929478, Val accuracy: 0.914409
Average loss: 0.201493, Train accuracy: 0.940996, Val accuracy: 0.916866
Average loss: 0.171460, Train accuracy: 0.950688, Val accuracy: 0.915774
Average loss: 0.146668, Train accuracy: 0.957752, Val accuracy: 0.917890
Average loss: 0.123091, Train accuracy: 0.965464, Val accuracy: 0.917275
Average loss: 0.102671, Train accuracy: 0.971215, Val accuracy: 0.917412
Average loss: 0.085089, Train accuracy: 0.976538, Val accuracy: 0.917139
Average loss: 0.070373, Train accuracy: 0.981111, Val accuracy: 0.916866


In [0]:
# Как всегда, в конце проверяем на test set
data_test = dset.SVHN('./', split='test', transform=transforms.Compose([
                           #transforms.Grayscale(num_output_channels=1),
                           transforms.ToTensor(),
                           transforms.Normalize(mean=[0.43,0.44,0.47],
                                               std=[0.20,0.20,0.20])                           
                       ]))
test_loader = torch.utils.data.DataLoader(data_test, batch_size=batch_size)
test_accuracy = compute_accuracy(lenet_model, test_loader)
print("Test accuracy: %2.4f" % test_accuracy)