# 4. Convolutional networks

- Used as part of INFO8010 Deep Learning (Gilles Louppe, 2018-2019).
- Originally adapted from [Pytorch tutorial for Deep Learning researchers](https://github.com/yunjey/pytorch-tutorial) (Yunvey Choi, 2018).

---

In [0]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch 
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

# Hyper-parameters

In [0]:
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

# Data

In [0]:
# MNIST Dataset (Images and Labels)
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

# Dataset Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:02, 3901209.68it/s]                             


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 134267.69it/s]           
  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 2016527.30it/s]                            
0it [00:00, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 51294.98it/s]            

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!





# Model

In [0]:
# Model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding = 2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding = 2),# CONV,
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc = nn.Linear(7*7*32, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

model = CNN()

# Loss and optimizer

In [0]:
criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  
# Set parameters to be updated.


# Training the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' 
                  % (epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss))

Epoch: [1/5], Step: [100/600], Loss: 0.1497
Epoch: [1/5], Step: [200/600], Loss: 0.1004
Epoch: [1/5], Step: [300/600], Loss: 0.1092
Epoch: [1/5], Step: [400/600], Loss: 0.0211
Epoch: [1/5], Step: [500/600], Loss: 0.0444
Epoch: [1/5], Step: [600/600], Loss: 0.0167
Epoch: [2/5], Step: [100/600], Loss: 0.0268
Epoch: [2/5], Step: [200/600], Loss: 0.0402
Epoch: [2/5], Step: [300/600], Loss: 0.0333
Epoch: [2/5], Step: [400/600], Loss: 0.0672
Epoch: [2/5], Step: [500/600], Loss: 0.0463
Epoch: [2/5], Step: [600/600], Loss: 0.0567
Epoch: [3/5], Step: [100/600], Loss: 0.0214
Epoch: [3/5], Step: [200/600], Loss: 0.0300
Epoch: [3/5], Step: [300/600], Loss: 0.0086
Epoch: [3/5], Step: [400/600], Loss: 0.0242
Epoch: [3/5], Step: [500/600], Loss: 0.0334
Epoch: [3/5], Step: [600/600], Loss: 0.0077
Epoch: [4/5], Step: [100/600], Loss: 0.0266
Epoch: [4/5], Step: [200/600], Loss: 0.0096
Epoch: [4/5], Step: [300/600], Loss: 0.0134
Epoch: [4/5], Step: [400/600], Loss: 0.0298
Epoch: [4/5], Step: [500/600], L

# Test the model

In [0]:
# Test the Model
model.eval()  # Change model to 'eval' mode (BN uses moving mean/var).
correct = 0
total = 0

for images, labels in test_loader:
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    
print('Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the model on the 10000 test images: 98 %


# Using a pretrained model

In [0]:
# Download and load pretrained vgg16.
import torchvision

We load vgg16 a CNN trained on imagenet. 
![VGG16:](http://zike.io/upload_img/cnn/vgg-16-receptive-field.png)

In [0]:
vgg16 = torchvision.models.vgg16(pretrained=True).eval()

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/checkpoints/vgg16-397923af.pth
100%|██████████| 553433881/553433881 [00:05<00:00, 109123027.00it/s]


## Simple "real life" example with a pre-trained neural network.

We download a mapping between the class label as an integer into text.

In [0]:
!wget https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json
import json
class_idx = json.load(open("imagenet_class_index.json"))
idx2label = np.array([class_idx[str(k)][1] for k in range(len(class_idx))])

--2019-07-15 07:22:45--  https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.19.35
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.19.35|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 35363 (35K) [application/octet-stream]
Saving to: ‘imagenet_class_index.json’


2019-07-15 07:22:46 (429 KB/s) - ‘imagenet_class_index.json’ saved [35363/35363]



vgg16 was trained on imagenet dataset which inputs have been normalized as follows.

In [0]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), 
                                                                            std=(0.229, 0.224, 0.225))])

In [0]:
from PIL import Image
import requests
from io import BytesIO
response = requests.get('https://lafeber.com/pet-birds/wp-content/uploads/2018/06/Indian-Ring-Necked-Parakeet-300x300.jpg')
im = Image.open(BytesIO(response.content))
im = transform(im)
print(im.shape)
probas = nn.Softmax()(vgg16(im.unsqueeze(0))).view(-1)
sorted_ouputs = torch.argsort(probas, descending=True)
for i in range(5):
    print("Class label: {:s} - Probability: {:4f}".format(
        idx2label[sorted_ouputs[i]], probas[sorted_ouputs[i]]))

torch.Size([3, 300, 300])
Class label: lorikeet - Probability: 0.505242
Class label: bee_eater - Probability: 0.186730
Class label: macaw - Probability: 0.113632
Class label: toucan - Probability: 0.033876
Class label: fig - Probability: 0.015141


  


## Fine-tuning of pre-trained neural network

<div class="alert alert-success">
<b>EXERCISE</b>:

Investigate the output that is given by the network.
Is it interpretable in terms of probabilities? If not, transform it in a way that matches what is done in [deep learning](https://en.wikipedia.org/wiki/Softmax_function).

</div>

Download CIFAR10

In [0]:
# MNIST Dataset (Images and Labels)
train_dataset = dsets.CIFAR10(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.CIFAR10(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

# Dataset Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


170500096it [00:06, 27688949.79it/s]                               


In [0]:
iterator = iter(train_loader)    
samples, labels = iterator.next()

print(vgg16.avgpool(vgg16.features(samples)).shape)
# Replace top layer for finetuning, e.g. for CIFAR10.
vgg16.classifier = nn.Linear(25088, 10)

torch.Size([100, 512, 7, 7])


In [0]:
criterion = nn.CrossEntropyLoss()# Loss function
optimizer = torch.optim.Adam(vgg16.classifier.parameters(), lr=1e-3)# Optimizer

# Training the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = vgg16(images)
        #print(outputs.shape)
        #break
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 1 == 0:
            print('Epoch: [%d/%d], Step: [%d/%d], Loss: %.4f' 
                  % (epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss))

Epoch: [2/5], Step: [19/500], Loss: 1.4260
Epoch: [2/5], Step: [20/500], Loss: 1.8924
Epoch: [2/5], Step: [21/500], Loss: 1.5116
Epoch: [2/5], Step: [22/500], Loss: 1.8302
Epoch: [2/5], Step: [23/500], Loss: 1.7025
Epoch: [2/5], Step: [24/500], Loss: 2.9106
Epoch: [2/5], Step: [25/500], Loss: 1.9810
Epoch: [2/5], Step: [26/500], Loss: 1.4249
Epoch: [2/5], Step: [27/500], Loss: 2.1832
Epoch: [2/5], Step: [28/500], Loss: 1.7107
Epoch: [2/5], Step: [29/500], Loss: 1.9202
Epoch: [2/5], Step: [30/500], Loss: 1.5315
Epoch: [2/5], Step: [31/500], Loss: 1.7533
Epoch: [2/5], Step: [32/500], Loss: 2.1113
Epoch: [2/5], Step: [33/500], Loss: 1.6176
Epoch: [2/5], Step: [34/500], Loss: 1.7087
Epoch: [2/5], Step: [35/500], Loss: 2.4554
Epoch: [2/5], Step: [36/500], Loss: 2.0677
Epoch: [2/5], Step: [37/500], Loss: 2.0490
Epoch: [2/5], Step: [38/500], Loss: 1.7464
Epoch: [2/5], Step: [39/500], Loss: 1.7873
Epoch: [2/5], Step: [40/500], Loss: 2.1472
Epoch: [2/5], Step: [41/500], Loss: 1.7758
Epoch: [2/5