<a href="https://colab.research.google.com/github/tkbharadwaj/vggnet/blob/main/VGG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Modules, Initialize Hyperparameters

In [1]:
'''
https://github.com/tkbharadwaj/vggnet
Tejas Bharadwaj
7/6/23
Classifier for CIFAR-10 Dataset
Goal: 90% testing accuracy
Some code borrowed & modified from:
https://github.com/patrickloeber/pytorchTutorial/blob/master/14_cnn.py
'''

import torch
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.optim as optim
from torchsummary import summary
import torchvision
import torchvision.transforms as transforms
import math

device = torch.device("cuda:0")
batch_size = 128
learning_rate = .001
num_epochs = 50

Load Dataset,
normalize + transform using RandomCrop and HorizontalFlip for model's resilience

Dataset used is CIFAR-10


In [2]:
#load and normalize dataset
transform = transforms.Compose(
    [ transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))],)
train_set = torchvision.datasets.CIFAR10(root = "/content", download = True, train = True, transform =transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transform]))
test_set = torchvision.datasets.CIFAR10(root = "/content", download = True, train = False, transform = transform)

train_load = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle = True)
test_load = torch.utils.data.DataLoader(test_set, batch_size = batch_size, shuffle = True)

classes = ('airplane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship',  'truck')

print(train_load)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29486395.01it/s]


Extracting /content/cifar-10-python.tar.gz to /content
Files already downloaded and verified
<torch.utils.data.dataloader.DataLoader object at 0x7feca068c5e0>


Model Architecture

In [None]:
#Implementation of VGG-13 Network by Tejas Bharadwaj
#Original Paper: https://arxiv.org/pdf/1409.1556v6.pdf

class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3,64, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64,64, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
            )
        self.layer2 = nn.Sequential(
            nn.Conv2d(64,128, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128,128, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
            )
        self.layer3 = nn.Sequential(
            nn.Conv2d(128,256, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256,256, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(256,512, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512,512, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(512,512, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512,512, kernel_size=3, stride =1, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )
        self.fc1 = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(),
        )
        self.fc2 = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(),
        )
        self.fc3 = nn.Sequential(
            nn.Linear(512, 10)
        )

  #initialize weights (paper says variance of .01, 0.1 works better)
        for m in self.modules():
            if isinstance(m, nn.Conv2d): #or isinstance(m, nn.Linear):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                #m.weight.data.normal_(0,  math.sqrt(2. / n))
                #m.weight.data.normal_(0, 0.01)
                torch.nn.init.xavier_uniform_(m.weight.data, gain=.1) #Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010)
                m.bias.data.zero_()

    def forward(self, x):
      #conv layers
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
      #fully connected layers
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x


model = VGG()
model = model.to(device)
summary(model, (3, 32 , 32))

In [9]:
def learning_rate_decay(model, optimizer, decay = 0.5):
  for param_group in optimizer.param_groups:
        param_group['lr'] = decay*param_group['lr']

Train the model

In [None]:
#train the model
num_epochs = 250
count = 1
learning_rate = .01
lrs = np.random.rand(20)*.005+.001
decay_after_epochs = 20
print("\nlearning rate: ", learning_rate)
print("\n")
model = VGG().to(device)
wd = 5*10**(-4) #from original paper
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, weight_decay = wd)

n_total_steps = len(train_load)


for epoch in range(num_epochs):
    model.train()
    #learning rate decay ** make into function
    if count % decay_after_epochs == 0:
      learning_rate_decay(model, optimizer)
    count += 1
    for i, (images, labels) in enumerate(train_load):
        images = images.to(device)
        outputs = model(images)
        outputs = outputs.to(device)
        labels = labels.to(device)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 50 == 0:
            print(images.shape)
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

    model.eval()
    with torch.no_grad():
      n_correct = 0
      n_samples = 0
      n_class_correct = [0 for i in range(10)]
      n_class_samples = [0 for i in range(10)]
      for images, labels in test_load:
          images = images.to(device)
          outputs = model(images)
          labels = labels.to(device)
          outputs = outputs.to(device)
          _, predicted = torch.max(outputs, 1)
          n_samples += labels.size(0)
          n_correct += (predicted == labels).sum().item()
          for i in range(batch_size):
              if i >= len(labels):
                break;
              #print(labels.shape)
              label = labels[i]
              pred = predicted[i]
              if (label == pred):
                  n_class_correct[label] += 1
              n_class_samples[label] += 1

      acc = 100.0 * n_correct / n_samples
      print(f'Accuracy of the network: {acc} %')



learning rate:  0.01


torch.Size([128, 3, 32, 32])
Epoch [1/250], Step [50/391], Loss: 1.9682
torch.Size([128, 3, 32, 32])
Epoch [1/250], Step [100/391], Loss: 1.9911
torch.Size([128, 3, 32, 32])
Epoch [1/250], Step [150/391], Loss: 1.6952
torch.Size([128, 3, 32, 32])
Epoch [1/250], Step [200/391], Loss: 1.5244
torch.Size([128, 3, 32, 32])
Epoch [1/250], Step [250/391], Loss: 1.5366
torch.Size([128, 3, 32, 32])
Epoch [1/250], Step [300/391], Loss: 1.3928
torch.Size([128, 3, 32, 32])
Epoch [1/250], Step [350/391], Loss: 1.4072
Accuracy of the network: 48.48 %
torch.Size([128, 3, 32, 32])
Epoch [2/250], Step [50/391], Loss: 1.4235
torch.Size([128, 3, 32, 32])
Epoch [2/250], Step [100/391], Loss: 1.3112
torch.Size([128, 3, 32, 32])
Epoch [2/250], Step [150/391], Loss: 1.1779
torch.Size([128, 3, 32, 32])
Epoch [2/250], Step [200/391], Loss: 1.0078
torch.Size([128, 3, 32, 32])
Epoch [2/250], Step [250/391], Loss: 1.0235
torch.Size([128, 3, 32, 32])
Epoch [2/250], Step [300/391], Loss: 1.1