# 08-VGG

[1. The Oxford-IIIT Pet Dataset](#1)  

[2. Data Preprocessing](#2)  
 - [2-1.Compute Mean, Std of Training Set](#2-1)
 - [2-2.Define Torch Dataset](#2-2)

[3.Define VGG19](#3)

[4.Training](#4)
 - [4-1.Hyper Parameters](#4-1)
 - [4-2.Torch DataLoader](#4-2)
 - [4-3.Train function](#4-3)
 - [4-4.Valid function](#4-4)
 - [4-5.Train Loop](#4-5)

In [None]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
import albumentations as A
import matplotlib.pyplot as plt

from torch import nn
from tqdm import tqdm
from torchsummary import summary
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

<a id="1"></a>
## 1. 100 Sports Image Classification
[https://www.kaggle.com/datasets/gpiosenka/sports-classification/data](https://www.kaggle.com/datasets/gpiosenka/sports-classification/data)

In [None]:
data_dir = "/home/pervinco/Datasets/sports-classification/train"
classes = sorted(os.listdir(data_dir))
print(classes)

In [None]:
images, labels = [], []
for str_label in classes:
    img_files = os.listdir(f"{data_dir}/{str_label}")
    for file in img_files:
        if file.endswith(".jpg"):
            images.append(f"{data_dir}/{str_label}/{file}")
            labels.append(str_label)

print(f"Num of Classes : {len(classes)}")
print(f"Num of files & labels : {len(images)}, {len(labels)}")
print(images[0])

In [None]:
classes = list(classes)
sample_per_class = {}
for label in labels:
    if not label in sample_per_class:
        sample_per_class[label] = 0
    else:
        sample_per_class[label] += 1

plt.figure(figsize=(16, 8))
plt.bar(sample_per_class.keys(), sample_per_class.values(), color='skyblue')
plt.xlabel('Class')
plt.ylabel('Number of Samples')
plt.title('Number of Samples per Class')
plt.xticks(rotation=90)
plt.show()

<a id="2"></a>
## 2. Data Preprocessing

In [None]:
train_x, valid_x, train_y, valid_y = train_test_split(images, labels, test_size=0.1, shuffle=True, random_state=42)

print(len(train_x), len(train_y))
print(len(valid_x), len(valid_y))

<a id="2-1"></a>
### 2-1.Compute Mean, Std of Training Set

In [None]:
def compute_mean_std(files):
    images = np.zeros((len(files), 224, 224, 3), dtype=np.uint8)
    for idx, file in enumerate(files):
        image = cv2.imread(file)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (224, 224))
        images[idx, :, : ,:] = image
    
    mean_rgb = np.mean(images, axis=(0, 1, 2))
    std_rgb = np.std(images, axis=(0, 1, 2))

    return mean_rgb, std_rgb

mean, std = compute_mean_std(train_x)
print(mean, std)

<a id="2-2"></a>
### 2-2.Define Torch Dataset

In [None]:
class PetDataset(Dataset):
    def __init__(self, images, labels, classes, mean, std, augmentation=False):
        self.classes = classes
        self.images = images
        self.labels = labels

        if augmentation:
            self.augmentation = A.Compose([
                A.OneOf([
                    A.Resize(224, 224, p=0.5),
                    A.SmallestMaxSize(max_size=[256, 512], p=0.5)
                ], p=1),
                A.RandomCrop(width=224, height=224),
                A.HorizontalFlip(),
                A.RGBShift(),
                A.Normalize(mean=mean, std=std),
                ToTensorV2()
            ])
        else:
            self.augmentation = A.Compose([
                A.Resize(224, 224, p=1),
                A.Normalize(mean=mean, std=std),
                ToTensorV2()
            ])


    def __len__(self):
        return len(self.images)


    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        image = cv2.imread(image)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.classes.index(label)

        image = self.augmentation(image=image)["image"]

        return image, label

In [None]:
train_dataset = PetDataset(train_x, train_y, classes, mean, std, augmentation=True)
valid_dataset = PetDataset(valid_x, valid_y, classes, mean, std, augmentation=False)

In [None]:
sample_image, sample_label = train_dataset[0]
print(sample_image.shape, sample_label)

<a id="3"></a>
## 3. Define VGG19

In [None]:
def conv_bn_act(in_channels, out_channels, num_layers):
    block = []
    for idx in range(num_layers):
        if idx > 0:
            in_channels = out_channels

        block.extend([nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=1),
                     nn.BatchNorm2d(out_channels),
                     nn.ReLU(inplace=True)])
        
    block.extend([nn.MaxPool2d(kernel_size=2, stride=2)])

    return nn.Sequential(*block)

In [None]:
class VGG19(nn.Module):
    def __init__(self, num_classes, drop_rate=0.5, init_weights=False):
        super().__init__()

        self.block1 = conv_bn_act(in_channels=3, out_channels=64, num_layers=2)
        self.block2 = conv_bn_act(in_channels=64, out_channels=128, num_layers=2)
        self.block3 = conv_bn_act(in_channels=128, out_channels=256, num_layers=4)
        self.block4 = conv_bn_act(in_channels=256, out_channels=512, num_layers=4)
        self.block5 = conv_bn_act(in_channels=512, out_channels=512, num_layers=4)

        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(nn.Linear(512 * 7 * 7, 4096),
                                        nn.ReLU(inplace=True),
                                        nn.Dropout(drop_rate),
                                        nn.Linear(4096, 4096),
                                        nn.ReLU(inplace=True),
                                        nn.Dropout(drop_rate),
                                        nn.Linear(4096, num_classes))
        
        if init_weights:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode="fan_in", nonlinearity="relu")
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                    elif isinstance(m, nn.BatchNorm2d):
                        nn.init.constant_(m.weight, 1)
                        nn.init.constant_(m.bias, 0)
                    elif isinstance(m, nn.Linear):
                        nn.init.normal_(m.weight, 0, 0.01)
                        nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1) ## [512, 7, 7] to 25088

        x = self.classifier(x)

        return x

In [None]:
model = VGG19(len(classes), init_weights=True)
summary(model, input_size=(3, 224, 224), device="cpu")

dummy_input = torch.randn(1, 3, 224, 224)
output = model(dummy_input)

<a id="4"></a>
## 4.Training

<a id="4-1"></a>
### 4-1.Hyper Parameters

In [None]:
epochs = 100
batch_size = 64
learning_rate = 0.01
weight_decay = 0.0005
momentum = 0.9

num_workers = os.cpu_count()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

<a id="4-2"></a>
### 4-2. Torch DataLoader

In [None]:
train_dataloder = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, num_workers=num_workers)

### 4-3.Train function

In [None]:
def train(model, dataloader, criterion, optimizer, device):
    model.train()

    train_cost, train_acc = 0.0, 0.0
    for x, y in tqdm(dataloader, desc="Train", leave=False):
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()
        y_pred = model(x)

        cost = criterion(y_pred, y)
        cost.backward()
        optimizer.step()

        train_cost += cost.item() * x.size(0)

        _, pred_labels = torch.max(y_pred, 1)
        train_acc += (pred_labels == y).sum().item()

    train_cost /= len(dataloader.dataset)
    train_acc /= len(dataloader.dataset)

    return train_cost, train_acc

<a id="4-4"></a>
### 4-4.Valid function

In [None]:
def valid(model, dataloader, criterion, device):
    model.eval()

    valid_cost, valid_acc = 0.0, 0.0
    with torch.no_grad():
        for x, y in tqdm(dataloader, desc="Valid", leave=False):
            x, y = x.to(device), y.to(device)
            y_pred = model(x)

            cost = criterion(y_pred, y)

            valid_cost += cost.item() * x.size(0)

            _, pred_labels = torch.max(y_pred, 1)
            valid_acc += (pred_labels == y).sum().item()

        valid_cost /= len(dataloader.dataset)
        valid_acc /= len(dataloader.dataset)

    return valid_cost, valid_acc

<a id="4-5"></a>
### 4-5.Train Loop

In [None]:
model = VGG19(len(classes), init_weights=True)
summary(model, input_size=(3, 224, 224), device="cpu")

model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

In [None]:
for epoch in range(epochs):
    print(f"\nEpoch : {epoch + 1} | {epochs}")
    train_cost, train_acc = train(model, train_dataloder, criterion, optimizer, device)
    print(f"\tTrain Loss : {train_cost:.4f}, Train Acc : {train_acc:.4f}")

    valid_cost, valid_acc = valid(model, valid_dataloader, criterion, device)
    print(f"\tValid Loss : {valid_cost:.4f}, Valid Acc : {valid_acc:.4f}")