# Import the necessary libraries

In [17]:
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import numpy as np
from torchvision.models import resnet50, ResNet50_Weights
from torch.utils.data import DataLoader, Subset
from torchvision.transforms import v2
import json
import matplotlib.pyplot as plt
import os
from functions import LoadEncoderDecoder, progress
from create_pytorch_dataset import CustomImageDataset

cudnn.benchmark = True
plt.ion()   # interactive mode

<contextlib.ExitStack at 0x1bae83cf230>

# Set up variables

In [18]:
########## VARIABLES ##########
encoder_dictionary_filename = 'image_decoder.pkl'   # File to save the encoder and decoder
datasource_filepath = 'cleaned_data/training_data.csv'    # File to save the training data
cleaned_img_dir = 'cleaned_data/images/'  # Folder containing the cleaned images
final_size = 224    # Final size of the images
batch_size = 64    # Batch size for the DataLoader, 

# Paths to save the training, validation and test datasets
train_indices_file = 'datasets/train_indices.pt'
val_indices_file = 'datasets/val_indices.pt'
test_indices_file = 'datasets/test_indices.pt'

# Final model
best_model_weights_path = 'best_model_params.pt'    # Path to the best model weights
final_model_dir = 'final_model' # Directory to save the final model
final_model_weights = 'image_model.pt' # Name of the final model weights file

# Embedings
model_weights_path = f'{final_model_dir}/{final_model_weights}'  # Path to the model weights                        
embeddings_output_path = 'image_embeddings.json'    # Path to save the image embeddings

# Data transformations
data_transforms = {
    'train': v2.Compose([
        v2.ToImage(), # Convert to tensor, only needed for PIL images
        v2.RandomResizedCrop(size=(final_size, final_size), antialias=True),
        v2.RandomHorizontalFlip(p=0.5),
        # v2.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
        # v2.RandomRotation(20),
        v2.ToDtype(torch.float32, scale=True), # this has replaced ToTensor()
        v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.255]),
    ]),
    'val': v2.Compose([
        v2.ToImage(), # Convert to tensor, only needed for PIL images
        v2.Resize(256),
        v2.CenterCrop(final_size),
        v2.ToDtype(torch.float32, scale=True), # this has replaced ToTensor()
        v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': v2.Compose([
        v2.ToImage(),  # Convert to tensor, only needed for PIL images
        v2.Resize(256),  # Resize to a standard size
        v2.CenterCrop(final_size),  # Center crop to the final input size
        v2.ToDtype(torch.float32, scale=True),  # Convert to tensor with dtype
        v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize with the same mean and std as training
    ]),
}

# Encoder and Decoder

In [19]:
# Load the encoder and decoder
print('\n########## Loading encoder and decoder ##########')
encoder, decoder = LoadEncoderDecoder(encoder_dictionary_filename)



########## Loading encoder and decoder ##########


########## Load Decoder Dictionary ##########
----> Decoder dictionary loaded successfully
----> Encoder and decoder extracted from the dictionary
----> Encoder:
 {'Home & Garden': 0, 'Baby & Kids Stuff': 1, 'DIY Tools & Materials': 2, 'Music, Films, Books & Games': 3, 'Phones, Mobile Phones & Telecoms': 4, 'Clothes, Footwear & Accessories': 5, 'Other Goods': 6, 'Health & Beauty': 7, 'Sports, Leisure & Travel': 8, 'Appliances': 9, 'Computers & Software': 10, 'Office Furniture & Equipment': 11, 'Video Games & Consoles': 12}
----> Decoder:
 {0: 'Home & Garden', 1: 'Baby & Kids Stuff', 2: 'DIY Tools & Materials', 3: 'Music, Films, Books & Games', 4: 'Phones, Mobile Phones & Telecoms', 5: 'Clothes, Footwear & Accessories', 6: 'Other Goods', 7: 'Health & Beauty', 8: 'Sports, Leisure & Travel', 9: 'Appliances', 10: 'Computers & Software', 11: 'Office Furniture & Equipment', 12: 'Video Games & Consoles'}


# Model

## Initialise model

In [20]:
# Recreate the model architecture
print('########## Initialising the model ##########')
# Load the pre-trained ResNet-50 model with the 'weights' parameter
model = resnet50(weights=ResNet50_Weights.DEFAULT) # Best available weights (currently alias for IMAGENET1K_V2)
print('----> Model loaded successfully')

# Disable gradients on all model parameters to freeze the weights
for param in model.parameters():
    param.requires_grad = False

# Get the number of input features for the final linear layer
num_features = model.fc.in_features
print(f'----> Default number of features in the model: {num_features}')

# Replace the final linear layer with a new one (`num_classes` is the number of categories)
num_classes = len(encoder)  # Get the number of categories
print(f'----> Required number of classes in the model: {num_classes}')

# Replace the final linear layer

# Adding dropout in a fully connected layer
model.fc = nn.Sequential(
    nn.Linear(num_features, 1024),  # First linear layer (reduce dimensions)
    nn.ReLU(),                     # ReLU activation
    nn.Dropout(p=0.5),             # Dropout layer to prevent overfitting
    nn.Linear(1024, num_classes)    # Final linear layer to match the number of classes
)
  
print(f'----> Final linear layer replaced with required number of classess')


# Load the best model parameters
if torch.cuda.is_available():
    model.load_state_dict(torch.load(best_model_weights_path, weights_only=True))
else:
    model.load_state_dict(torch.load(best_model_weights_path, weights_only=True, map_location=torch.device('cpu')))

# Replace the final linear layer with a new one (1000 neurons for feature extraction)
model.fc = torch.nn.Linear(num_features, 1000)



########## Initialising the model ##########
----> Model loaded successfully
----> Default number of features in the model: 2048
----> Required number of classes in the model: 13
----> Final linear layer replaced with required number of classess


### Move model to device

In [21]:
# Move the model to the GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')   # Use the GPU if available
model = model.to(device)    # Move the model to the device
print(f"----> Model moved to {device}")   # Print the device
if torch.cuda.is_available():
    devNumber = torch.cuda.current_device() # Get the current device number
    print(f"  ----> Current device number is: {devNumber}") # Print the current device number
    devName = torch.cuda.get_device_name(devNumber) # Get the device name
    print(f"  ----> GPU name is: {devName}")    # Print the device name
    
print('-' * 75)

----> Model moved to cuda
  ----> Current device number is: 0
  ----> GPU name is: NVIDIA GeForce RTX 3060 Ti
---------------------------------------------------------------------------


## Save the final model weights

In [22]:
# Save the final model weights
os.makedirs(final_model_dir, exist_ok=True)
final_model_weights_path = os.path.join(final_model_dir, final_model_weights)
torch.save(model.state_dict(), final_model_weights_path)
print(f"----> Final model weights saved to {final_model_weights_path}")


----> Final model weights saved to final_model\image_model.pt


# Dataset and Dataloder

## Create dataset

In [23]:
# Create dataset
print('########## Custom Image Dataset ##########')
dataset = CustomImageDataset(datasource_file=datasource_filepath, img_dir=cleaned_img_dir, transform=data_transforms['val'])
print('----> Custom Image Dataset created')
print(f"----> Number of samples in the dataset: {len(dataset)}")


########## Custom Image Dataset ##########
----> Custom Image Dataset created
----> Number of samples in the dataset: 12604


## Create dataloader

In [24]:
print('\n\n########## Creating DataLoader ##########')
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4)
print('----> DataLoader created successfully')
print(f"----> Number of batches in the DataLoader: {len(dataloader)}")




########## Creating DataLoader ##########
----> DataLoader created successfully
----> Number of batches in the DataLoader: 197


# Extract embeddings

In [25]:
# Extract embeddings
image_embeddings = {}

current_image = 0   # Counter to keep track of the number of images processed
no_of_images = len(dataset)
with torch.no_grad():
    for images, labels in dataloader:
        
        images = images.to(device)
        outputs = model(images)
        outputs = outputs.cpu().numpy()
        
        
        for idx, output in enumerate(outputs):
            try:
                image_id = dataset.img_labels.iloc[current_image, 1] # Use current_image as the index
                image_embeddings[image_id] = output.tolist()  # Convert numpy array to list for JSON serialization
                current_image += 1  # Increment the counter
                progress(current_image, no_of_images)
            except Exception as e:
                print(f"----> Image processing failed for: {dataset.img_labels.iloc[current_image, 1]}")
                print(f'----> Error occured when trying to filter the data: {e}')

# Save embeddings dictionary as JSON
with open(embeddings_output_path, 'w') as f:
    json.dump(image_embeddings, f)

print(f"----> Total of {len(image_embeddings)} image embeddings saved to {embeddings_output_path}")

----> Total of 12604 image embeddings saved to image_embeddings.json%  [12604 / 12604]
