# 1. Setup


In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import v2
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid
from torchvision.transforms import Resize, ToTensor, Lambda
from sklearn.metrics import confusion_matrix

In [34]:
# Check if gpu is available
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [None]:
if device == "cuda":
    # Desired memory limit in MB
    memory_limit_mb = 4095.5
    # Get the total memory of GPU 0 in bytes
    total_memory = torch.cuda.get_device_properties(0).total_memory
    # Convert memory limit to bytes
    memory_limit = memory_limit_mb * 1024 ** 2
    # Calculate the fraction of total memory
    memory_fraction = memory_limit / total_memory
    # Set the memory fraction for GPU 0
    torch.cuda.set_per_process_memory_fraction(memory_fraction, device=0)
    print(f"Set GPU 0 memory fraction to {memory_fraction:.2%}")

Set GPU 0 memory fraction to 100.00%


# 2. Load & Transform Data


In [36]:
# First, create transforms without normalization to calculate dataset statistics
initial_transforms = v2.Compose([
    v2.Resize((256, 256)),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True)
])

In [37]:
label_transforms = v2.Lambda(
    lambda y: torch.zeros(54, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y, dtype=torch.long), value=1)
)

In [38]:
# Create temporary dataset to calculate mean and std
temp_dataset = ImageFolder(root='../data/data_pool', transform=initial_transforms, target_transform=label_transforms)
temp_loader = DataLoader(temp_dataset, batch_size=256, shuffle=False, num_workers=6, pin_memory=True)

In [39]:
channels_sum = torch.zeros(3, device=device)
channels_sqrd_sum = torch.zeros(3, device=device)
num_batches = 0

for data, _ in temp_loader:
    data = data.to(device)  # Move data to the specified device
    channels_sum += torch.mean(data, dim=[0, 2, 3])
    channels_sqrd_sum += torch.mean(data ** 2, dim=[0, 2, 3])
    num_batches += 1

# Compute final mean and standard deviation
mean = channels_sum / num_batches
std = torch.sqrt(channels_sqrd_sum / num_batches - mean ** 2)

# Move mean and std to CPU and convert to list (if needed)
mean = mean.cpu().tolist()
std = std.cpu().tolist()

# Calculate mean and std
print(f"Dataset mean: {mean}")
print(f"Dataset std: {std}")

Dataset mean: [0.3618468642234802, 0.3524152636528015, 0.348279744386673]
Dataset std: [0.2003939151763916, 0.21212677657604218, 0.2538084089756012]


In [40]:
train_transforms = v2.Compose([
    v2.Resize((256, 256)),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomRotation(degrees=10),
    v2.RandomAffine(degrees=0, translate=(0.05, 0.05)),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=mean, std=std)
])

In [41]:
val_transforms = v2.Compose([
    v2.Resize((256, 256)),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=mean, std=std),
])

In [42]:
# Load the dataset with appropriate transforms
train_dataset = ImageFolder(root='../data/data_pool', transform=train_transforms)

# 3. Split Data


In [43]:
# Define split ratios
train_ratio = 0.75
val_ratio = 0.15
test_ratio = 0.10

In [44]:
# Calculate lengths
total_size = len(train_dataset)
train_size = int(train_ratio * total_size)
val_size = int(val_ratio * total_size)
test_size = total_size - train_size - val_size

In [45]:
# Create train/val/test datasets with appropriate transforms
train_data, val_data, test_data = random_split(
    train_dataset, 
    [train_size, val_size, test_size],
    generator=torch.Generator().manual_seed(42)  # For reproducibility
)

# Override transforms for validation and test sets
val_data.dataset.transform = val_transforms
test_data.dataset.transform = val_transforms

# Create DataLoaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

# 4. Building the Neural Network


In [46]:
# Input shape constants
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3

REG_FACTOR = 1e-4

In [47]:
class ConvolutionalNeuralNetwork(nn.Module):
    def __init__(self):
        super(ConvolutionalNeuralNetwork, self).__init__()
        
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding='same', bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 16, 3, padding='same', bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(16, 32, 3, padding='same', bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding='same', bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        
        self.conv_block3 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding='same', bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding='same', bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        
        self.flatten = nn.Flatten()
        
        self.fc_layers = nn.Sequential(
            nn.Linear(64 * (IMG_HEIGHT // 8) * (IMG_WIDTH // 8), 256, bias=False),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(256, 128, bias=False),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(128, 128, bias=False),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(128, 64, bias=False),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(64, 64, bias=False),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(64, 54)
        )
        
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.conv_block3(x)
        x = self.flatten(x)
        x = self.fc_layers(x)
        return x

In [48]:
model = ConvolutionalNeuralNetwork()
model

ConvolutionalNeuralNetwork(
  (conv_block1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, 

# 5. Optimising the Model Parameters

In [49]:
learning_rate = 1e-3
batch_size = 32
epochs = 50

In [50]:
# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()

In [51]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=REG_FACTOR)

In [52]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, model, loss_fn, optimizer)
    test_loop(val_loader, model, loss_fn)
print("Done!")

# 6. Saving the Model

In [None]:
model = models.vgg16(weights='IMAGENET1K_V1')
torch.save(model.state_dict(), 'model_weights.pth')

In [None]:
model = models.vgg16() # we do not specify ``weights``, i.e. create untrained model
model.load_state_dict(torch.load('model_weights.pth', weights_only=True))
model.eval()

In [None]:
torch.save(model, 'model.pth')

In [None]:
model = torch.load('model.pth', weights_only=False),