### 1. Imports and Setup
Import necessary libraries such as PyTorch, torchvision, and any other dependencies used for data handling, transformations, and model training.

**Description**: This section loads all required modules and libraries needed for setting up data transformations, model creation, and training processes.

In [None]:
from torchvision import transforms
import numpy as np
def imshow(img, label, prediction=None):
    img = img.numpy().transpose((1, 2, 0))  # Rearrange to H x W x C
    img = np.clip(img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]), 0, 1)  # De-normalize
    plt.imshow(img)
    if prediction is not None:
        plt.title(f"True Label: {label * 10}-{label * 10 + 10}\nPredicted Label: {prediction * 10}-{prediction * 10 + 10}")
    else:
        plt.title(f"Label: {label * 10}-{label * 10 + 10}")    
    plt.axis('off')
    plt.show()

## Hyperparamters 
## We will use the following hyperparameters for training the model:
- Batch size: 512
- Learning rate: 0.01
- Number of epochs: 1000 
<br>
<br>
<br>
 We don’t have data for certain ages (e.g no image of a 94 year old person), so we can group ages into ranges (like 90–95, 95–100) instead of predicting each specific age. 
This is called class binning. By using age ranges, the model can handle missing data more effectively and make more accurate predictions, even with a smaller dataset. 

In [None]:
import math
SIZE_OF_BIN = 10
NUM_OF_CLASSES = math.ceil(116 / SIZE_OF_BIN)
BATCH_SIZE = 128
LEARNING_RATE = 0.001
EPOCHS = 1000

### 3. Dataset and DataLoader Setup
**Custom Dataset Class**: Define a custom dataset class if required, to load data from a specific directory or dataset.

**DataLoader Initialization**: Initialize DataLoaders for training and testing, setting batch size and shuffling options.

**Description**: Here, we define the dataset and data loader. A custom dataset class helps load the data according to specific requirements, while DataLoader batches and prepares the data for the model during training and evaluation.

In [None]:
from torch.utils.data import Dataset, DataLoader, random_split, WeightedRandomSampler

import matplotlib.pyplot as plt
from PIL import Image
import os
import torch
from torchvision import transforms
# coutainer of classes

class CustomDataset(Dataset):
    def __init__(self, images_path):
        self.image_files = [os.path.join(images_path, f) for f in os.listdir(images_path) 
                            if os.path.isfile(os.path.join(images_path, f)) and f.lower().endswith('.jpg')]
        self.transform = transforms.Compose([
            transforms.Resize([256], transforms.InterpolationMode.BILINEAR),
            transforms.CenterCrop([224]),
            
            # Subtle augmentations because a person normally e.g does not stand on upside down
            transforms.RandomHorizontalFlip(p=0.2),       # Small chance of flipping
            transforms.RandomRotation(degrees=5),         # Small rotation range
            # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),  # Subtle color jitter
            
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image = Image.open(self.image_files[idx]).convert("RGB")
        image.verify()
        image = self.transform(image)
        
        age = self.image_files[idx].split("\\")[-1].split('_')[0]
        return image, int(age) // SIZE_OF_BIN 


dataset = CustomDataset(r"C:\Users\morit\Downloads\UTKface_inthewild-20241024T082001Z-001\UTKface_inthewild")
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False)

train_size = int(0.8 * len(dataset))






val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size

# Split the dataset
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                          shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
for images, labels in train_loader:
    for image, label in zip(images, labels):
        imshow(image)
        
    break


<h1> Calculate Class Weights </h1>
Since the calculation of class takes a long time, we will use the hardcoded output of the calculation.



Due to the large imbalance of the dataset, we need to calculate the class weights to ensure that the model does not overfit to the majority class.
 
We will use the inverse frequency of each class as the class weight. This will help the model to pay more attention to the minority classes.

In [None]:
from torch import tensor
class_weights_tensor = tensor([0.1378, 0.0637, 0.3042, 0.1891, 0.0946, 0.0966, 0.0550, 0.0298, 0.0220,
        0.0058, 0.0007, 0.0006], dtype=torch.float)

In [None]:
# import torch
# from collections import defaultdict
# 
# # Step 1: Count occurrences of each class
# class_counts = defaultdict(int)
# 
# for _, labels in train_loader:
#     for label in labels.numpy():
#             
#         class_counts[label] += 1
# 
# # Step 2: Calculate class weights
# total_samples = sum(class_counts.values())
# 
# # Calculate weights: inverse frequency
# class_weights = {cls: count/ total_samples for cls, count in class_counts.items()}
# # Convert t a tensor for PyTorch
# class_weights_tensor = torch.tensor([class_weights[i] for i in range(NUM_OF_CLASSES)], dtype=torch.float)
# 
# print("Class weights:", class_weights_tensor)


In [None]:
# import matplotlib.pyplot as plt
# import numpy as np
# 
# all_labels = []
# 
# for _, labels in dataloader:
#     all_labels.extend(labels.numpy())  
# 
# all_labels = np.array(all_labels)
# 
# plt.figure(figsize=(10, 6))
# plt.hist(all_labels, bins=range(int(all_labels.min()), int(all_labels.max()) + 2), edgecolor='black', alpha=0.7)
# plt.title('Age Distribution in Dataset')
# plt.xlabel('Age')
# plt.ylabel('Frequency')
# plt.grid(axis='y', linestyle='--', alpha=0.7)
# plt.show()

### 4. Model Definition
**Define Neural Network Architecture**: Specify the layers, activation functions, and architecture of the neural network.

**Description**: This section builds the neural network model. Layers and activation functions are structured to capture the underlying patterns in the input data for classification tasks.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class highLevelNN(nn.Module):
    def __init__(self):
        super(highLevelNN, self).__init__()
        self.CNN = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Dropout(0.4),
        )

    def forward(self, x):
        return self.CNN(x)


class lowLevelNN(nn.Module):
    def __init__(self, num_out):
        super(lowLevelNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=4, stride=2, padding=1)
        self.bn1 = nn.BatchNorm2d(128)
        self.conv2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(256)

        self.fc4 = nn.Linear(in_features=256, out_features=128)
        self.fc5 = nn.Linear(in_features=128, out_features=64)
        self.fc6 = nn.Linear(in_features=64, out_features=num_out)

        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(x, kernel_size=6, stride=3, padding=1)

        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, kernel_size=6, stride=3, padding=1)

        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc4(x))
        x = self.dropout(x)
        x = F.relu(self.fc5(x))
        x = self.dropout(x)
        return self.fc6(x)


class AgeNN(nn.Module):
    def __init__(self, num_age):
        super(AgeNN, self).__init__()
        self.CNN = highLevelNN()
        self.ageNN = lowLevelNN(num_out=num_age)
        self._initialize_weights()
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                nn.init.constant_(m.bias, 0)


    def forward(self, x):
        x = self.CNN(x)
        return self.ageNN(x)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if __name__ == '__main__':
    from torchsummary import summary
    print('Testing out Multi-Label NN')
    mlNN = AgeNN(NUM_OF_CLASSES).to(device)
    
    summary(mlNN, input_size=(3, 224, 224))


### 5. Training Configuration
**Loss Function and Optimizer**: Define the loss function (e.g., Cross-Entropy Loss for classification) and optimizer (e.g., Adam or SGD) to minimize the error during training.

**Learning Rate Scheduler**: Configure any learning rate scheduler, such as StepLR, to adjust the learning rate at certain intervals.

**Description**: This part of the code sets up the parameters for model training. The loss function calculates the error between predictions and actual labels, while the optimizer updates the model weights. The scheduler gradually reduces the learning rate as training progresses to stabilize convergence.

In [None]:
import torch
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import f1_score, balanced_accuracy_score
from torch.cuda.amp import GradScaler, autocast

def train(trainloader, testloader, model, opt, scheduler, num_epoch, save_path):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)

    age_loss = nn.CrossEntropyLoss()
    print("1")

    for epoch in range(num_epoch):
        model.train()

        loop = tqdm(enumerate(trainloader), total=len(trainloader), leave=False)
        age_correct = 0
        total = 0
        

        for _, (X, y) in loop:

            age = y.to(device).long()
            X = X.to(device)

            with autocast():
                pred = model(X)
                loss = age_loss(pred, age)

            opt.zero_grad()
            loss.backward()
            opt.step()

            age_correct += (pred.argmax(1) == age).type(torch.float).sum().item()
            total += age.size(0)

            loop.set_description(f"Epoch [{epoch+1}/{num_epoch}]")
            loop.set_postfix(loss=loss.item())
        
        scheduler.step()
        age_acc = age_correct / total
        print(f'Epoch: {epoch+1}/{num_epoch}, Age Accuracy: {age_acc * 100:.2f}%')

        torch.save(model.state_dict(), f"{save_path}/model_epoch_{epoch+1}.pth")

        if (epoch + 1) % 10 == 0:
            evaluate(testloader, model, epoch + 1)
            
def evaluate(loader, model, epoch):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.eval()
    test_correct = 0
    test_total = 0
    all_preds = []
    all_labels = []
    good_images = []
    bad_images = []
    
    for X_test, y_test in loader:
        X_test, y_test = X_test.to(device), y_test.to(device).long()
        test_pred = model(X_test)
        
        preds = test_pred.argmax(1)

        test_correct += (test_pred.argmax(1) == y_test).type(torch.float).sum().item()
        test_total += y_test.size(0)
        
        all_preds.extend(test_pred.argmax(1).cpu().numpy())
        all_labels.extend(y_test.cpu().numpy())
        for i in range(len(y_test)):
            if preds[i] == y_test[i] and len(good_images) < 10:  # Correctly classified
                good_images.append((X_test[i].cpu(), y_test[i].cpu()))
            elif preds[i] != y_test[i] and len(bad_images) < 50:  # Incorrectly classified
                bad_images.append((X_test[i].cpu(), y_test[i].cpu(), preds[i].cpu()))
            else:
                break
    
    test_acc = test_correct / test_total
    f1 = f1_score(all_labels, all_preds, average='weighted')
    balanced_acc = balanced_accuracy_score(all_labels, all_preds)
    
    print(f'Test Accuracy after Epoch {epoch}: {test_acc * 100:.2f}%')
    print(f'Test F1 Score after Epoch {epoch}: {f1 * 100:.2f}%')
    print(f'Test Balanced Accuracy after Epoch {epoch}: {balanced_acc * 100:.2f}%\n')
    
    print("Good Examples (Correctly Classified):")
    for img, label in good_images:
        imshow(img, label)
    
    print("Bad Examples (Incorrectly Classified):")
    for img, label, pred in bad_images:
        imshow(img, label, prediction=pred)

In [None]:
# import torch
# import torch.optim as optim
# from torch.utils.data import DataLoader
# from torchvision import transforms
# from torch.optim.lr_scheduler import StepLR
# # Decrease the learning rate by 0.1 every 10 epochs
# import os   
# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
# model = AgeNN(NUM_OF_CLASSES)  
# optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-5)
# scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
# # Train the model
# train(train_loader, test_loader, model, optimizer,scheduler, EPOCHS, "model_weights")

### 7. Evaluation and Testing
**Test Model Performance**: Evaluate the model on a test set to determine its accuracy, F1 score, and other metrics after training on val set.

**Description**: The evaluation section tests the model on unseen data to assess its generalization capabilities. Metrics like accuracy, F1 score, and balanced accuracy provide insights into model performance.

In [None]:
model_path = r"C:\Users\morit\PycharmProjects\AgePrediction\model_weights\model_epoch_78.pth"
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = AgeNN(NUM_OF_CLASSES).to(device)
model.load_state_dict(torch.load(model_path))    

evaluate(val_loader, model, 0)

# Single Image Prediction with Pretrained Model

This cell allows you to load a pretrained model from a specified path and make predictions on a single image. The image is preprocessed to match the model's input requirements, and the model outputs the predicted class label.

### Code Overview

- **Model Path Input**: Specify the path to the trained model file.
- **Image Path Input**: Specify the path to the image file you want to classify.
- **Class Names**: Provide a list of class names corresponding to the model’s output labels.


In [None]:
from PIL import Image
import torch
import torchvision.transforms as transforms

def predict_image(model_path, image_path):
    model = AgeNN(NUM_OF_CLASSES)
    model.load_state_dict(torch.load(model_path))    
    
    model.eval()

    transform = transforms.Compose([
        transforms.Resize((224, 224)),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
    ])

    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0) # Add Batch dim
  
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
        predicted_class = f"{predicted.item() * 10}-{predicted.item() * 10 + 10}"

    print(f'Predicted class: {predicted_class}')

model_path = r"C:\Users\morit\PycharmProjects\AgePrediction\model_weights\model_epoch_78.pth"
image_path = r"C:\Users\morit\Pictures\BNMW8637.JPG"


# Run prediction
predict_image(model_path, image_path)