# VGG classifier training and fine tunning comparison

In this notebook I want to compare the accuracy of a VGG model build from scratch and trained on the target dataset (bike category recognition) with a pre trained model (ImageNet) and fine tunned model.

I'll be using discriminative fine-tunning in this notebook 

In [None]:
!pip3 install -r requirements.txt

In [38]:
from dotenv import load_dotenv
from utils.load_dataset import load_dataset
import os

load_dotenv()
dataset_url = os.getenv("DATASET_REPO_URL")

load_dataset(dataset_url, extract_dir="dataset")



‚ÑπÔ∏è  Detected classes: ['road', 'gravel', 'fold', 'mtb', 'hybrid']
üîÑ Processing: road...
üîÑ Processing: gravel...
üîÑ Processing: fold...
üîÑ Processing: mtb...
üîÑ Processing: hybrid...

‚úÖ Done! Dataset organized at: dataset_split


In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import _LRScheduler
import torch.utils.data as data

import torchvision.transforms as transforms
import torchvision.datasets as datasets

from sklearn import decomposition
from sklearn import manifold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from tqdm.notebook import tqdm, trange
import matplotlib.pyplot as plt
import numpy as np

import copy
import random
import time
import os

In [12]:
SEED = 43

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

## Architecture definition

In [13]:
class VGG(nn.Module):
    def __init__(self, features, output_dim):
        super().__init__()

        self.features = features

        self.avgpool = nn.AdaptiveAvgPool2d(7)

        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, output_dim),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        h = x.view(x.shape[0], -1)
        x = self.classifier(h)
        return x, h

### VGG configuration arrays

In [14]:
vgg11_config = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

vgg13_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512,
                512, 'M']

vgg16_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512,
                'M', 512, 512, 512, 'M']

vgg19_config = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512,
                512, 512, 'M', 512, 512, 512, 512, 'M']

In [15]:
def get_vgg_layers(config, batch_norm):

    layers = []
    in_channels = 3

    for c in config:
        assert c == 'M' or isinstance(c, int)
        if c == 'M':
            layers += [nn.MaxPool2d(kernel_size=2)]
        else:
            conv2d = nn.Conv2d(in_channels, c, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(c), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = c

    return nn.Sequential(*layers)

In [41]:
vgg11_layers = get_vgg_layers(vgg11_config, batch_norm=True)

vgg16_layers = get_vgg_layers(vgg16_config, batch_norm=True)

print(vgg11_layers)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU(inplace=True)
  (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU(inplace=True)
  (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (13): ReLU(inplace=True)
  (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, cei

In [17]:
from torch.utils.data import DataLoader

def data_loader(data_dir, batch_size, set_type='train', shuffle=True):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        # sprawdzic dodanie normalizacji
    ])

    set_path = os.path.join(data_dir, set_type)

    # ImageFolder
    dataset = datasets.ImageFolder(set_path, transform=transform)

    # DataLoader
    loader = DataLoader(dataset, batch_size, shuffle=shuffle)

    size = len(dataset)
    return loader, size

## Define train set augmentations

In [None]:
# train_transforms = transforms.Compose([
# 			transforms.RandomRotation(5),
# 			transforms.RandomHorizontalFlip(0.5),
# 			transforms.RandomCrop(32, padding=2),
# 			transforms.ToTensor(),
# 			transforms.Normalize(mean=means,
# 													std=stds)
# 	])

# test_transforms = transforms.Compose([
# 			transforms.ToTensor(),
# 			transforms.Normalize(mean=means,
# 													std=stds)
# 	])

## VGG Training

In [44]:
num_classes = 5
num_epochs = 50
batch_size = 32
learning_rate = 1e-4
weight_decay = 1e-4

model = VGG(vgg11_layers, num_classes)

model16 = VGG(vgg16_layers, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = weight_decay)
optimizer16 = torch.optim.Adam(model16.parameters(), lr=learning_rate, weight_decay = weight_decay)  

### Training loop

In [36]:
def train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs):
	total_step = len(train_loader)
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	for epoch in range(num_epochs):
		model.train()

		for images, labels in train_loader:  
			# Move tensors to the configured device
			images = images.to(device)
			labels = labels.to(device)
			
			# Forward pass
			outputs, h = model(images)
			loss = criterion(outputs, labels)
			
			# Backward and optimize
			optimizer.zero_grad()
			loss.backward()
			optimizer.step()

		print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
										.format(epoch+1, num_epochs, epoch+1, total_step, loss.item()))
						
		# Validation
		model.eval()
		with torch.no_grad():
			correct = 0
			total = 0
			
			for images, labels in valid_loader:
				images = images.to(device)
				labels = labels.to(device)
				outputs, h = model(images)
				_, predicted = torch.max(outputs.data, 1)
				total += labels.size(0)
				correct += (predicted == labels).sum().item()
				del images, labels, outputs
	
			print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total)) 

### Data loaders

In [45]:
DATASET_SPLIT_PATH = 'dataset_split/real'

train_loader, train_size = data_loader(DATASET_SPLIT_PATH, batch_size, set_type='train', shuffle=True)
valid_loader, valid_size = data_loader(DATASET_SPLIT_PATH, batch_size, set_type='val', shuffle=False)


In [46]:
# result = train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs)

result = train_model(model16, train_loader, valid_loader, criterion, optimizer16, num_epochs)
print("-" * 30)
print(result)

Epoch [1/50], Step [1/2], Loss: 1.7130
Accuracy of the network on the 5000 validation images: 33.333333333333336 %
Epoch [2/50], Step [2/2], Loss: 1.0697
Accuracy of the network on the 5000 validation images: 40.0 %
Epoch [3/50], Step [3/2], Loss: 0.9825
Accuracy of the network on the 5000 validation images: 46.666666666666664 %
Epoch [4/50], Step [4/2], Loss: 1.3874
Accuracy of the network on the 5000 validation images: 60.0 %
Epoch [5/50], Step [5/2], Loss: 0.6614
Accuracy of the network on the 5000 validation images: 46.666666666666664 %
Epoch [6/50], Step [6/2], Loss: 0.9899
Accuracy of the network on the 5000 validation images: 53.333333333333336 %
Epoch [7/50], Step [7/2], Loss: 0.6997
Accuracy of the network on the 5000 validation images: 46.666666666666664 %
Epoch [8/50], Step [8/2], Loss: 0.4332
Accuracy of the network on the 5000 validation images: 60.0 %
Epoch [9/50], Step [9/2], Loss: 0.5098
Accuracy of the network on the 5000 validation images: 60.0 %
Epoch [10/50], Step [

In [None]:
MODEL_DIR = 'models'

torch.save(model16.state_dict(), os.path.join(MODEL_DIR, 'vgg16_bike_classifier.pth'))
print("Model saved to vgg16_bike_classifier.pth")

Model saved to vgg16_bike_classifier.pth
