# Import the necessary libraries

In [1]:
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import numpy as np
from torchvision.models import resnet50, ResNet50_Weights
from torch.utils.data import DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter
from torchvision.transforms import v2
import matplotlib.pyplot as plt
import time
from datetime import datetime
import os
import random
from collections import defaultdict
from PIL import Image
from tempfile import TemporaryDirectory
from functions import LoadEncoderDecoder, progress
from create_pytorch_dataset import CustomImageDataset

cudnn.benchmark = True
plt.ion()   # interactive mode

<contextlib.ExitStack at 0x25b7213aea0>

# Set up variables

In [2]:
########## VARIABLES ##########
encoder_dictionary_filename = 'image_decoder.pkl'   # File to save the encoder and decoder
datasource_filepath = 'cleaned_data/training_data.csv'    # File to save the training data
cleaned_img_dir = 'cleaned_data/images/'  # Folder containing the cleaned images
final_size = 224    # Final size of the images
batch_size = 64    # Batch size for the DataLoader, 

# Paths to save the training, validation and test datasets
train_indices_file = 'datasets/train_indices.pt'
val_indices_file = 'datasets/val_indices.pt'
test_indices_file = 'datasets/test_indices.pt'

# Data transformations
data_transforms = {
    'train': v2.Compose([
        v2.ToImage(), # Convert to tensor, only needed for PIL images
        v2.RandomResizedCrop(size=(final_size, final_size), antialias=True),
        v2.RandomHorizontalFlip(p=0.5),
        # v2.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
        # v2.RandomRotation(20),
        v2.ToDtype(torch.float32, scale=True), # this has replaced ToTensor()
        v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.255]),
    ]),
    'val': v2.Compose([
        v2.ToImage(), # Convert to tensor, only needed for PIL images
        v2.Resize(256),
        v2.CenterCrop(final_size),
        v2.ToDtype(torch.float32, scale=True), # this has replaced ToTensor()
        v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': v2.Compose([
        v2.ToImage(),  # Convert to tensor, only needed for PIL images
        v2.Resize(256),  # Resize to a standard size
        v2.CenterCrop(final_size),  # Center crop to the final input size
        v2.ToDtype(torch.float32, scale=True),  # Convert to tensor with dtype
        v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize with the same mean and std as training
    ]),
}

# Encoder and Decoder

In [3]:
# Load the encoder and decoder
print('\n########## Loading encoder and decoder ##########')
encoder, decoder = LoadEncoderDecoder(encoder_dictionary_filename)



########## Loading encoder and decoder ##########


########## Load Decoder Dictionary ##########
----> Decoder dictionary loaded successfully
----> Encoder and decoder extracted from the dictionary
----> Encoder:
 {'Home & Garden': 0, 'Baby & Kids Stuff': 1, 'DIY Tools & Materials': 2, 'Music, Films, Books & Games': 3, 'Phones, Mobile Phones & Telecoms': 4, 'Clothes, Footwear & Accessories': 5, 'Other Goods': 6, 'Health & Beauty': 7, 'Sports, Leisure & Travel': 8, 'Appliances': 9, 'Computers & Software': 10, 'Office Furniture & Equipment': 11, 'Video Games & Consoles': 12}
----> Decoder:
 {0: 'Home & Garden', 1: 'Baby & Kids Stuff', 2: 'DIY Tools & Materials', 3: 'Music, Films, Books & Games', 4: 'Phones, Mobile Phones & Telecoms', 5: 'Clothes, Footwear & Accessories', 6: 'Other Goods', 7: 'Health & Beauty', 8: 'Sports, Leisure & Travel', 9: 'Appliances', 10: 'Computers & Software', 11: 'Office Furniture & Equipment', 12: 'Video Games & Consoles'}


# Dataset and Dataloder

## Create custom dataset

In [4]:
print('\n########## Custom Image Dataset ##########')
# Create the full dataset using training transformations (initially)
full_dataset = CustomImageDataset(datasource_file=datasource_filepath, img_dir=cleaned_img_dir, transform=None)
# full_dataset = CustomImageDataset(datasource_file=datasource_filepath, img_dir=cleaned_img_dir, transform=data_transforms['train'])
print('----> Custom Image Dataset created')
print(f"----> Number of samples in the dataset: {len(full_dataset)}")


########## Custom Image Dataset ##########
----> Custom Image Dataset created
----> Number of samples in the dataset: 12604


## OPTIONAL - use smaller dataset for testing

In [5]:
# Use a smaller subset of the dataset for quick testing
# small_dataset_size = 100  # Adjust as necessary
# full_dataset = Subset(full_dataset, range(small_dataset_size))

## Split the dataset

### Check the distribution of the labels

In [6]:
# Count the number of images per label
label_counts = full_dataset.img_labels['label'].value_counts()

# Get the minimum number of images per class
min_images_per_class = label_counts.min()
print(f"Minimum number of images per class: {min_images_per_class}\n")

# Map label numbers to names using the decoder
label_names = label_counts.index.map(decoder)

# Print the number of images for each label
print("Number of images per label:")
for label, count in zip(label_names, label_counts):
    print(f"{count} - {label}")

Minimum number of images per class: 691

Number of images per label:
1471 - Home & Garden
1177 - Office Furniture & Equipment
1136 - Computers & Software
1088 - Health & Beauty
1033 - Music, Films, Books & Games
938 - DIY Tools & Materials
917 - Appliances
908 - Other Goods
860 - Sports, Leisure & Travel
828 - Video Games & Consoles
786 - Phones, Mobile Phones & Telecoms
771 - Clothes, Footwear & Accessories
691 - Baby & Kids Stuff


### Divide indices and save or load if previously created

In [7]:
# Function to save indices
def save_indices(indices, filename):
    # check if the folder exitsts and create if not
    target_folder = filename.split('/')[0]
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)
        print(f'----> Target folder created successfully: {target_folder}')
        
    # save the file
    torch.save(indices, filename)
    
# Function to load indices
def load_indices(filename):
    return torch.load(filename)


# Check if saved datasets exist
if os.path.exists(train_indices_file) and os.path.exists(val_indices_file) and os.path.exists(test_indices_file):
    print('----> Loading existing datasets...')
    train_indices = load_indices(train_indices_file)
    val_indices = load_indices(val_indices_file)
    test_indices = load_indices(test_indices_file)
    print('----> Existing Datasets loaded\n')
else:
    print('----> Creating new datasets...')
    # Group indices by class
    class_indices = defaultdict(list)
    for idx, (_, label) in enumerate(full_dataset):
        class_indices[label.item()].append(idx)
    
    # Calculate the minimum number of images per class
    min_images_per_class = min(len(indices) for indices in class_indices.values())
    train_limit = int(min_images_per_class * 0.95)  # 95% for training
    
    train_indices = []
    val_indices = []
    test_indices = []

    for class_label, indices in class_indices.items():
        random.shuffle(indices)  # Shuffle the indices
        
        train_indices.extend(indices[:train_limit])  # First 95% for training
        remaining_indices = indices[train_limit:]    # Remaining 5%
        half_remaining = len(remaining_indices) // 2
        
        val_indices.extend(remaining_indices[:half_remaining])  # 50% of remaining for validation
        test_indices.extend(remaining_indices[half_remaining:]) # 50% of remaining for testing
    
    # Save the indices
    save_indices(train_indices, train_indices_file)
    save_indices(val_indices, val_indices_file)
    save_indices(test_indices, test_indices_file)
    print('----> Datasets saved successfully\n')

----> Loading existing datasets...
----> Existing Datasets loaded



  return torch.load(filename)


### Create train, validataion and test datasets

In [8]:
# Create datasets using the loaded or newly created indices
train_dataset = Subset(CustomImageDataset(datasource_file=datasource_filepath, img_dir=cleaned_img_dir, transform=data_transforms['train']), train_indices)
val_dataset = Subset(CustomImageDataset(datasource_file=datasource_filepath, img_dir=cleaned_img_dir, transform=data_transforms['val']), val_indices)
test_dataset = Subset(CustomImageDataset(datasource_file=datasource_filepath, img_dir=cleaned_img_dir, transform=data_transforms['test']), test_indices)

print('----> Dataset split into training, validation, and test sets completed')
print(f"----> Training dataset size: {len(train_dataset)}")
print(f"----> Validation dataset size: {len(val_dataset)}")
print(f"----> Test dataset size: {len(test_dataset)}")

----> Dataset split into training, validation, and test sets completed
----> Training dataset size: 8528
----> Validation dataset size: 2035
----> Test dataset size: 2041


## Create dataloaders

In [9]:
# Create DataLoaders
print('\n\n########## Creating DataLoaders ##########')
# Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

print('----> DataLoaders created successfully')
print(f"----> Number of batches in the training DataLoader: {len(train_dataloader)}")
print(f"----> Number of batches in the validation DataLoader: {len(val_dataloader)}")
print(f"----> Number of batches in the test DataLoader: {len(test_dataloader)}")

# Create a dictionary of DataLoaders and dataset sizes
dataloaders = {
    'train': train_dataloader,
    'val': val_dataloader
}

dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset)
}



########## Creating DataLoaders ##########
----> DataLoaders created successfully
----> Number of batches in the training DataLoader: 134
----> Number of batches in the validation DataLoader: 32
----> Number of batches in the test DataLoader: 32


# Model

## Initialise model

In [10]:
print('\n\n########## Preparing pretrained ResNet-50 model ##########')
# Load the pre-trained ResNet-50 model with the 'weights' parameter
model = resnet50(weights=ResNet50_Weights.DEFAULT) # Best available weights (currently alias for IMAGENET1K_V2)
print('----> Model loaded successfully')

# Disable gradients on all model parameters to freeze the weights
for param in model.parameters():
    param.requires_grad = False

# Get the number of input features for the final linear layer
num_features = model.fc.in_features
print(f'----> Default number of features in the model: {num_features}')

# Replace the final linear layer with a new one (`num_classes` is the number of categories)
num_classes = len(encoder)  # Get the number of categories
print(f'----> Required number of classes in the model: {num_classes}')

# Replace the final linear layer

# Adding dropout in a fully connected layer
model.fc = nn.Sequential(
    nn.Linear(num_features, 1024),  # First linear layer (reduce dimensions)
    nn.ReLU(),                     # ReLU activation
    nn.Dropout(p=0.5),             # Dropout layer to prevent overfitting
    nn.Linear(1024, num_classes)    # Final linear layer to match the number of classes
)

# model.fc = torch.nn.Linear(num_features, num_classes)   
print(f'----> Final linear layer replaced with required number of classess')




########## Preparing pretrained ResNet-50 model ##########
----> Model loaded successfully
----> Default number of features in the model: 2048
----> Required number of classes in the model: 13
----> Final linear layer replaced with required number of classess


### Freezing / Unfreezing layers

In [11]:
# Unfreeze the last few layers of the model
for param in model.fc.parameters():
    param.requires_grad = True
    
for param in model.layer4.parameters():
    param.requires_grad = True

for param in model.layer3.parameters():
    param.requires_grad = True

print(f'----> Layers unfrozen')


----> Layers unfrozen


### Loss function

In [12]:
# Define the loss function
criterion = torch.nn.CrossEntropyLoss()


### Move model to device

In [13]:
print(torch.__version__)

2.4.0+cu118


In [14]:
# Move the model to the GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')   # Use the GPU if available
model = model.to(device)    # Move the model to the device
print(f"----> Model moved to {device}")   # Print the device
if torch.cuda.is_available():
    criterion.cuda()
    devNumber = torch.cuda.current_device() # Get the current device number
    print(f"  ----> Current device number is: {devNumber}") # Print the current device number
    devName = torch.cuda.get_device_name(devNumber) # Get the device name
    print(f"  ----> GPU name is: {devName}")    # Print the device name
    
print('-' * 75)

----> Model moved to cuda
  ----> Current device number is: 0
  ----> GPU name is: NVIDIA GeForce RTX 3060 Ti
---------------------------------------------------------------------------


### Optimizer & Scheduler

In [15]:
optimizer = torch.optim.SGD(model.parameters(), lr = 0.0015, momentum = 0.875, weight_decay = 3.0517578125e-05)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 50, eta_min = 1E-6, last_epoch = -1) 


## Training the model

### Initialise tensorboard writer

In [17]:
# Initialize TensorBoard SummaryWriter
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
model_dir = f'model_evaluation/model_{timestamp}'
os.makedirs(model_dir, exist_ok=True)
weights_dir = os.path.join(model_dir, 'weights')
os.makedirs(weights_dir, exist_ok=True)
writer = SummaryWriter(log_dir=model_dir)

### Training function

In [18]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    '''
    Function to train the model
    '''
    since = time.time()

    best_model_params_path = 'best_model_params.pt'
    torch.save(model.state_dict(), best_model_params_path)
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 75)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0           
            
            loop_no = 0
            no_of_dataloaders = len(dataloaders[phase])
            if phase == 'train':
                print('----> Training:')
            else:
                print('----> Validation:')
            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                loop_no += 1
                
                # Show progress for the epoch
                progress(loop_no, no_of_dataloaders)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    # print(f'  --> Loss: {loss.item()}')
                    
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'\n{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Log the loss and accuracy to TensorBoard
            writer.add_scalar(f'{phase} Loss', epoch_loss, epoch)
            writer.add_scalar(f'{phase} Accuracy', epoch_acc, epoch)
            
            # Save the model weights at the end of each epoch
            epoch_weights_path = os.path.join(weights_dir, f'epoch_{epoch}_weights.pth')
            torch.save(model.state_dict(), epoch_weights_path)
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                torch.save(model.state_dict(), best_model_params_path)

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(torch.load(best_model_params_path))
    writer.flush() # Flush the TensorBoard writer
    writer.close()  # Close the TensorBoard writer
    return model


### Model training

In [19]:
model_ft = train_model(model, criterion, optimizer, scheduler, num_epochs=50)

Epoch 0/49
---------------------------------------------------------------------------
----> Training:
[############################################################] 100.0%  [134 / 134]
train Loss: 2.5182 Acc: 0.1795
----> Validation:
[############################################################] 100.0%  [32 / 32]
val Loss: 2.4357 Acc: 0.3813

Epoch 1/49
---------------------------------------------------------------------------
----> Training:
[############################################################] 100.0%  [134 / 134]
train Loss: 2.3303 Acc: 0.3610
----> Validation:
[############################################################] 100.0%  [32 / 32]
val Loss: 2.1335 Acc: 0.4442

Epoch 2/49
---------------------------------------------------------------------------
----> Training:
[############################################################] 100.0%  [134 / 134]
train Loss: 1.9654 Acc: 0.4212
----> Validation:
[############################################################] 100.0%  [3

  model.load_state_dict(torch.load(best_model_params_path))


### Model evaluation

In [20]:
# Evaluate on the test dataset
model_ft.eval()
test_running_corrects = 0

for inputs, labels in test_dataloader:
    inputs = inputs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

    test_running_corrects += torch.sum(preds == labels.data)

test_acc = test_running_corrects.double() / len(test_dataset)
print(f'Test Acc: {test_acc:.4f}')

# Save test accuracy to a file
test_metrics_path = os.path.join(model_dir, 'test_metrics.txt')
with open(test_metrics_path, 'w') as f:
    f.write(f'Test Accuracy: {test_acc:.4f}\n')

Test Acc: 0.6389


### Visualise the model predictions

In [21]:
def unnormalize(img, mean, std):
    """
    Unnormalize a tensor image with mean and std.
    """
    img = img.clone()  # clone the tensor to avoid altering the original one
    for t, m, s in zip(img, mean, std):
        t.mul_(s).add_(m)  # scale by std and add the mean
    return img

def tensor_to_image(tensor):
    """
    Convert a tensor to a numpy array suitable for display.
    """
    tensor = tensor.permute(1, 2, 0).numpy()  # Change to HWC format
    tensor = np.clip(tensor, 0, 1)  # Clip to [0, 1] if necessary
    return tensor

In [1]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()
    
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                                
                # Convert tensor to numpy array and permute dimensions
                img = inputs.cpu().data[j].permute(1, 2, 0).numpy()
                
                # Get the predicted and actual class names
                predicted_class = decoder[preds[j].item()]
                actual_class = decoder[labels[j].item()]
                
                # Set the title with both predicted and actual class
                ax.set_title(f'Predicted: {predicted_class}\nActual: {actual_class}')
                plt.imshow(img)

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [2]:
visualize_model(model_ft)

NameError: name 'model_ft' is not defined