**1. Importing Libraries**

In [29]:
import os
import torch
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch.optim as optim
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split

**Create Dataset Class of CSV file**

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

        # Create a mapping of age groups to integers
        self.label_map = {label: idx for idx, label in enumerate(self.data_frame.iloc[:, 1].unique())}

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 0])  # Get filename
        image = Image.open(img_name).convert('RGB')  # Load the image
        label = self.data_frame.iloc[idx, 1]  # Get the label

        if self.transform:
            image = self.transform(image)

    # Convert the label to its corresponding integer
        label = self.label_map[label]

        return image, label

**2. Load and Preprocess Data**

In [None]:
# Define transformations for training and validation sets
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),           # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images
])

# Load the entire dataset
train_csv_file = r'D:\dev\work\dataset\imdb_wiki\imdb_train_new_1024.csv'
val_csv_file = r'D:\dev\work\dataset\imdb_wiki\imdb_valid_new_1024.csv'
root_dir = r'D:\dev\work\dataset\imdb_wiki\imdb-clean-1024'

# Split the dataset into training and validation sets #use if user has single CSV file with images sorted into relevant folder groups 
#data_frame = pd.read_csv()
#train_df, val_df = train_test_split(data_frame, test_size=0.2, random_state=42)

# Create datasets
train_dataset = CustomImageDataset(csv_file=train_csv_file, root_dir=root_dir, transform=transform)
val_dataset = CustomImageDataset(csv_file=val_csv_file, root_dir=root_dir, transform=transform)


# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1024, shuffle=False)

**3. Load Pre-trained ResNet Model**

In [None]:
# Load a pre-trained ResNet model
model = models.resnet50(pretrained=True)

# Modify the final layer dynamically
num_classes = len(train_dataset.label_map)  # Get number of classes from dataset dynamically
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)



**4. Define Loss Function and Optimiser**

In [33]:
criterion = nn.CrossEntropyLoss()  # Loss function for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer


**5. Model Training**

In [None]:
num_epochs = 10
accumulation_steps = 4  # Accumulate gradients over 4 batches; adjust accordingly

scaler = torch.cuda.amp.GradScaler()  # Create a GradScaler; #Enable if out-of-memory

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    optimizer.zero_grad()  # Zero gradients at the start of the epoch
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
    with torch.cuda.amp.autocast():  # Enable autocasting; #Enable if out-of-memory
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss = loss / accumulation_steps  # Scale the loss

        scaler.scale(loss).backward()  # Scale the loss before backpropagation ; #Enable if out-of-memory
        #loss.backward() #Off when using scaler

        if (i + 1) % accumulation_steps == 0:
            scaler.step(optimizer)  # Update weights using the scaler; #Enable if out-of-memory
            scaler.update()  # Update the scaler; #Enable if out-of-memory
            #optimizer.step()  # Update weights; #Off when using scaler
            optimizer.zero_grad()  # Zero gradients after accumulation

        running_loss += loss.item() * inputs.size(0) * accumulation_steps  # Adjust loss accumulation

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

FileNotFoundError: [Errno 2] No such file or directory: 'D:\\dev\\work\\dataset\\imdb_wiki\\imdb-clean-1024\\01\\nm1107001_rm2678573824_1978-9-23_2014.jpg'

**6. Model Validation**

In [None]:
model.eval()  # Set the model to evaluation mode

correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Validation Accuracy: {100 * correct / total:.2f}%')
