### In the lecture you have been introduced to VGG16. For this problem your task is to implement a VGG like CNN architecture for classification on the CIFAR10 dataset.

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

### 1. Load the dataset (0.5 point)
To load the dataset, you can use the inbuilt dataloader for CIFAR10 provided in the torchvision package. Load both test set and trainset separately. Define the transformations you might need to load the data appropriately.

In [2]:

batch_size = 128
transform = transforms.Compose([transforms.ToTensor(), 
             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 
trainset = torchvision.datasets.CIFAR10(root='./cifar10', train=True,
                                                    download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                                    shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./cifar10', train=False,
                                                    download=True, transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                                    shuffle=True, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')






Files already downloaded and verified
Files already downloaded and verified


### Create the model architecture (1.0 point)
Implement the class below such that the final architecture follows the same pattern of layers as VGG16.

In [3]:
class Net(nn.Module):
    def __init__(self, input_channels = 3, n_classes= 10):
        super(Net, self).__init__()

        layers = [64, 64, "maxpool", 128, 128, "maxpool", 256, 256, 256, "maxpool", 512, 512, 512, "maxpool", 512, 512, 512, "maxpool"]
        feature_extracters = []

        for out_channels in layers:
          if out_channels == "maxpool":
            feature_extracters.append(nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)))

          else:
            feature_extracters.append(nn.Conv2d(in_channels=input_channels, out_channels=out_channels, 
                                              kernel_size=(3,3), padding=(1,1), stride=(1,1)))
            feature_extracters.append(nn.BatchNorm2d(out_channels))
            feature_extracters.append(nn.ReLU())
            input_channels = out_channels
        
        self.conv_layers = nn.Sequential(*feature_extracters)

        self.fc_layers = nn.Sequential(
            nn.Linear(512 * 1 * 1, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, n_classes),
        )

      
        

    def forward(self, x):
        out = self.conv_layers(x)
        out = out.view(-1, 512*1*1)
        out = self.fc_layers(out)
        return out


net = Net(input_channels=3, n_classes = 10).cuda()

### Loss function and optimizer (0.5 point)
Define the loss function and optimizer to train the model.

In [4]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())

### Train the model (1.0 point)

In [5]:
epochs = 10
device = "cuda" if torch.cuda.is_available() else "cpu"
for epoch in range(epochs): 

    running_loss = 0.0
    running_corrects = 0
    for i, (inputs, labels) in enumerate(trainloader):
      inputs = inputs.to(device)
      labels = labels.to(device)
      
      optimizer.zero_grad()
      
      outputs = net(inputs)
      loss = criterion(outputs, labels)
      _, preds = torch.max(outputs, 1)

      loss.backward()
      optimizer.step()

      # statistics
      running_loss += loss.item() * inputs.size(0)
      running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(trainloader.dataset)
    epoch_acc = running_corrects.double() / len(trainloader.dataset)

    print('{} Loss: {:.4f} Acc: {:.4f}'.format(epoch, epoch_loss, epoch_acc))

        

print('Finished Training')

0 Loss: 1.9749 Acc: 0.1996
1 Loss: 1.5684 Acc: 0.3613
2 Loss: 1.3050 Acc: 0.5066
3 Loss: 1.0633 Acc: 0.6152
4 Loss: 0.8985 Acc: 0.6894
5 Loss: 0.7603 Acc: 0.7479
6 Loss: 0.6662 Acc: 0.7834
7 Loss: 0.5805 Acc: 0.8146
8 Loss: 0.4999 Acc: 0.8422
9 Loss: 0.4488 Acc: 0.8593
Finished Training


Code below generates the class wise accuracy of the model. You can use the results from the code below to decide the values of hyperparametrs.

In [6]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.cuda()
        labels = labels.cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 84 %
Accuracy of   car : 84 %
Accuracy of  bird : 73 %
Accuracy of   cat : 57 %
Accuracy of  deer : 76 %
Accuracy of   dog : 85 %
Accuracy of  frog : 80 %
Accuracy of horse : 74 %
Accuracy of  ship : 96 %
Accuracy of truck : 79 %
