# Transfer Learning with Resnet

In this notebook we load a small datasets that contains dolphins and elephants. We classify the images using CNNs and compare two approaches and see what worsk better:
1. Training a CNN from scratch against
2. Finetuning a pretrained ResNet.

In [1]:
import torch
from torchvision import transforms
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets

torch.manual_seed(0)

<torch._C.Generator at 0x7fcab066d450>

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

Let's load our data a have a look at the shape of some images:

In [3]:
dataset = datasets.ImageFolder(root='../data/animals')
for i, data in enumerate(dataset):
    print(data)
    if i == 10:
        break

(<PIL.Image.Image image mode=RGB size=300x179 at 0x7FCAAF5CBDF0>, 0)
(<PIL.Image.Image image mode=RGB size=300x179 at 0x7FCAAF5CBE50>, 0)
(<PIL.Image.Image image mode=RGB size=300x166 at 0x7FCAAF5CBF40>, 0)
(<PIL.Image.Image image mode=RGB size=300x259 at 0x7FCAAF5CB9A0>, 0)
(<PIL.Image.Image image mode=RGB size=300x225 at 0x7FCAAF5CBDF0>, 0)
(<PIL.Image.Image image mode=RGB size=300x277 at 0x7FCAAF5CBFA0>, 0)
(<PIL.Image.Image image mode=RGB size=300x183 at 0x7FCAAF5CBF40>, 0)
(<PIL.Image.Image image mode=RGB size=300x225 at 0x7FCAAF5CBE50>, 0)
(<PIL.Image.Image image mode=RGB size=300x214 at 0x7FCAAF5CBDF0>, 0)
(<PIL.Image.Image image mode=RGB size=300x223 at 0x7FCAAF5CB9A0>, 0)
(<PIL.Image.Image image mode=RGB size=300x277 at 0x7FCAAF5CBF40>, 0)


We see that the pictures have all width=300 but a varying height. To use them in transfer learning they need to have the standard shape (224, 224), which is the data format of ImageNet (on which most pretrained models are trained on).  

To get them into this shape, we first increase the height to 224 (this will also increase the height) and then take the 224 square which is center in the middle.

In [4]:
image_transforms = transforms.Compose([
             transforms.Resize(size=224),
             transforms.CenterCrop(size=224),
             transforms.ToTensor(),
             transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]) # standard normalization for transfer learning
    ])

In [5]:
data = datasets.ImageFolder(root='../data/animals', transform=image_transforms)

print(len(data), "data points")

129 data points


Next, we split the data into train and test and define the data loaders.

In [6]:
train_set, test_set = torch.utils.data.random_split(data, [100, 29])

batch_size = 10
trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size,
                                          shuffle=True)
testloader = torch.utils.data.DataLoader(test_set, batch_size=29,
                                         shuffle=False)

## Tasks:
### Task 1.

In [7]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(3, 4, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc = nn.Linear(4*111*111, 2)

    def forward(self, x):
        x = F.relu(self.conv(x))
        x = self.pool(x)
        x = x.view(-1, 4*111*111)
        return self.fc(x)

In [8]:
import torch.optim as optim

net = Net()
net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01)

for epoch in range(20):
    
    running_loss = 0
    for i, data in enumerate(trainloader, 0):
        
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        
    print(f'Epoch:{epoch + 1}, loss: {running_loss:.5f}')
    running_loss = 0

print('Finished Training')

Epoch:1, loss: 327.91980
Epoch:2, loss: 18.60620
Epoch:3, loss: 6.36712
Epoch:4, loss: 4.56148
Epoch:5, loss: 4.13382
Epoch:6, loss: 3.93716
Epoch:7, loss: 3.84604
Epoch:8, loss: 3.66799
Epoch:9, loss: 3.49616
Epoch:10, loss: 3.59663
Epoch:11, loss: 3.19508
Epoch:12, loss: 3.03328
Epoch:13, loss: 2.87331
Epoch:14, loss: 2.71855
Epoch:15, loss: 2.60966
Epoch:16, loss: 2.42548
Epoch:17, loss: 2.27410
Epoch:18, loss: 2.13496
Epoch:19, loss: 1.99203
Epoch:20, loss: 1.87148
Finished Training


In [9]:
correct = 0
total = 0

wrong_images = []
wrong_labesl = []

with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images)
        predicted = torch.argmax(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct // total} %')

Accuracy of the network on the test images: 82 %


### Task 2:
Instead of training a CNN from scratch, load a pretrained ResNet18 and only train the last layer. Train again for 20 epochs and compare the results.
Use this PyTorch tutorial to see how this works (section `ConvNet as fixed feature extractor`):
https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

In [13]:
model = torchvision.models.resnet18(weights='IMAGENET1K_V1')

for param in model.parameters():
    param.requires_grad = False

my_layer = nn.Linear(model.fc.in_features, 2) # new last layer
model.fc = my_layer # replace fc layer from ResNet with my layer
model.to(device);

In [11]:
import torch.optim as optim

net = model

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01)

for epoch in range(20):

    loss_batch = 0.0
    for i, data in enumerate(trainloader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        
        #print(outputs.shape)
        #print(outputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        loss_batch += loss.item()
        
    print(f'{epoch + 1}, loss: {loss_batch:.5f}')
    loss_batch = 0

print('Finished Training')

1, loss: 6.86336
2, loss: 2.45239
3, loss: 2.25217
4, loss: 2.71460
5, loss: 1.18777
6, loss: 1.98528
7, loss: 1.68125
8, loss: 0.67823
9, loss: 2.76517
10, loss: 2.69081
11, loss: 0.58167
12, loss: 0.46639
13, loss: 0.85015
14, loss: 0.52710
15, loss: 0.63971
16, loss: 0.65163
17, loss: 0.31829
18, loss: 0.55537
19, loss: 0.39761
20, loss: 0.75264
Finished Training


In [12]:
correct = 0
total = 0

wrong_images = []
wrong_labesl = []

with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images)
        predicted = torch.argmax(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct // total} %')

Accuracy of the network on the test images: 100 %
