In [15]:
import numpy
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import models, transforms
from doodleLoaderSimple import DoodleDatasetSimple
from matplotlib import pyplot as plt

## Training and validation for the house image classifier

In [16]:
# Number to class labels mapping
class_dict = {
    0: 'stress',
    1: 'introvert',
    2: 'extrovert'
}

# Loading the data from the .csv file
# First row is a header
data = np.genfromtxt(r'D:\COLLEGE_STUDIES\SEM-6\ML_NLP_project\data\houseData.csv', dtype=int, delimiter=',', names=True)

print(data[:5]) 

[(0, 0) (1, 1) (2, 2) (3, 0) (4, 1)]


In [17]:
# # Visualization: Plot the distribution of classes
# def plot_class_distribution(translation_dict):
#     """
#     Plots the distribution of class labels in the dataset.
    
#     :param translation_dict: Dictionary mapping image filenames to class labels.
#     """
#     class_counts = np.bincount(list(translation_dict.values()))  # Count occurrences of each class
#     class_labels = [class_dict[i] for i in range(len(class_counts))]  # Get class names
    
#     plt.figure(figsize=(4, 3))
#     plt.bar(class_labels, class_counts, color=['red', 'blue', 'green'])
#     plt.xlabel("Class Labels")
#     plt.ylabel("Number of Images")
#     plt.title("Class Distribution in Dataset")
#     plt.show()

# # Call the function to visualize the dataset distribution
# plot_class_distribution(translation_dict)

In [18]:
def count_classes(dictClass, arr):
    """
    Redundant method that counts the occurrences of each class in the dataset
    Can be used to create weights if the class distribution is unbalanced
    :param dictClass: Dictionary that maps number to class labels
    :param arr: The array that contains the data
    :return: The number of occurrences for each class in the given array
    """
    unique, count = numpy.unique(arr, return_counts=True)
    print(dict(zip(dictClass.values(), count)))
    count = 1 / count
    count = count / sum(count)
    return count

In [19]:
# Match the image IDs to the ID values in the .csv file.
translation_dict = dict( zip([f'{id}.png' for id in data['id']], data['class']))

# Prepare each image to be passed as a Tensor product to the model.
data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Prepare the data by matching it to its label and transforming it to a Tensor product.
housedata = DoodleDatasetSimple(r'D:\\COLLEGE_STUDIES\\SEM-6\\ML_NLP_project\\images\\house\\', data_transforms, translation_dict)


In [20]:
# 80% of the data for training.
train_len = int(housedata.__len__() * 0.7)
# 20% of the data for validation.
test_len = int(housedata.__len__() * 0.3 + 1)
# Split the data at a random point.
train_set, val_set = torch.utils.data.random_split(housedata, [train_len, test_len])
# Shuffle and load the labeled images in batches of 4 for training.
train_loader = DataLoader(train_set, batch_size=4, shuffle=True, num_workers=0, drop_last=True)
# Load the labeled images in batches of 4 for validation after training the model.
test_loader = DataLoader(val_set, batch_size=4, shuffle=False, num_workers=0, drop_last=True)


## Model Definition

In [21]:
import torch
from torch import nn

class MultilabelClassifier(nn.Module):
    """
    Custom CNN model to replace ResNet-34.
    It extracts hierarchical features and replaces the last layer with a classification head.
    """
    def __init__(self, n_features):
        super(MultilabelClassifier, self).__init__()

        self.feature_extractor = nn.Sequential(
            # First Convolution Block
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Second Convolution Block
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Third Convolution Block
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Fourth Convolution Block
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Adaptive Pooling to match ResNet output size
            nn.AdaptiveAvgPool2d((1, 1))
        )

        # Fully Connected Classifier
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(512, n_features)
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)  # Flatten for FC layer
        return {'class': self.classifier(x)}



In [22]:
# Set the device to use as the GPU if there is compatible hardware
# Otherwise run the model on the cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MultilabelClassifier(3).to(device)


In [23]:
def criterion(outputs, pictures):
    """
    Method used by the model as the criterion for training.
    Cross entropy loss used as the loss function
    :param outputs: Predicted labels by the model
    :param pictures: Actual labeled images from the dataset
    :return: The sum of the cross entropy loss function.
    """
    losses = 0

    for i, key in enumerate(outputs):
        loss_func = nn.CrossEntropyLoss()
        labelsTensor = pictures['class'].clone().detach()
        losses += loss_func(outputs[key], labelsTensor.long().to(device))

    return losses


In [24]:
def training(model, device, lr_rate, epochs, train_loader):
    """
    Method used by the model for training
    :param model: The model to train
    :param device: Which device to use for computation, GPU or CPU
    :param lr_rate: The learning rate used by the optimizing function
    :param epochs: How many epochs to train the model for
    :param train_loader: The loader that provides the labeled images in batches
    :return: An array containing the losses after each epoch
    """
    num_epochs = epochs
    losses = []
    checkpoint_losses = []

    optimizer = torch.optim.Adam(model.parameters(), lr=lr_rate)
    n_total_steps = len(train_loader)

    for epoch in range(num_epochs):
        for i, pictures in enumerate(train_loader):
            images = pictures['image'].to(device)

            output = model(images)

            loss = criterion(output, pictures)
            losses.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i + 1) % (int(n_total_steps / 1)) == 0:
                checkpoint_loss = torch.tensor(losses).mean().item()
                checkpoint_losses.append(checkpoint_loss)
                print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss: {checkpoint_loss:.4f}')

    # Snippet used to save the models for inferring during runtime.
    # model_save_path = r'D:\COLLEGE_STUDIES\SEM-6\ML_NLP_project\model\house\house_model_12.tar'
    # torch.save({
    #     'model_state_dict': model.state_dict(),
    #     'optimizer_state_dict': optimizer.state_dict(),
    #     'loss': checkpoint_losses,
    # }, model_save_path)



    return checkpoint_losses


In [25]:
# Call the method to train the model
checkpoint_losses = training(model, device, 0.0001, 25, train_loader)


Epoch [1/25], Step [94/94], Loss: 0.8799
Epoch [2/25], Step [94/94], Loss: 0.8277
Epoch [3/25], Step [94/94], Loss: 0.7847
Epoch [4/25], Step [94/94], Loss: 0.7413
Epoch [5/25], Step [94/94], Loss: 0.7100
Epoch [6/25], Step [94/94], Loss: 0.6808
Epoch [7/25], Step [94/94], Loss: 0.6537
Epoch [8/25], Step [94/94], Loss: 0.6384
Epoch [9/25], Step [94/94], Loss: 0.6167
Epoch [10/25], Step [94/94], Loss: 0.5994
Epoch [11/25], Step [94/94], Loss: 0.5781
Epoch [12/25], Step [94/94], Loss: 0.5613
Epoch [13/25], Step [94/94], Loss: 0.5485
Epoch [14/25], Step [94/94], Loss: 0.5382
Epoch [15/25], Step [94/94], Loss: 0.5240
Epoch [16/25], Step [94/94], Loss: 0.5095
Epoch [17/25], Step [94/94], Loss: 0.4953
Epoch [18/25], Step [94/94], Loss: 0.4822
Epoch [19/25], Step [94/94], Loss: 0.4711
Epoch [20/25], Step [94/94], Loss: 0.4591
Epoch [21/25], Step [94/94], Loss: 0.4471
Epoch [22/25], Step [94/94], Loss: 0.4374
Epoch [23/25], Step [94/94], Loss: 0.4270
Epoch [24/25], Step [94/94], Loss: 0.4162
E

In [26]:
def validation(model, dataloader, device):
    """
    Validates the model after training.

    :param model: The trained model.
    :param dataloader: Dataloader providing labeled images in batches.
    :param device: Device (CPU or GPU) for computation.
    :return: Model accuracy in percentage.
    """
    model.eval()  # Set model to evaluation mode
    n_correct = 0
    n_samples = 0

    with torch.no_grad():
        for pictures in dataloader:
            images = pictures['image'].to(device)
            labels = pictures['class'].to(device)

            outputs = model(images)['class']  # Extract class predictions
            _, predicted = torch.max(outputs, 1)  # Get predicted class indices

            n_correct += (predicted == labels).sum().item()
            n_samples += labels.size(0)

    acc = 100.0 * n_correct / n_samples if n_samples > 0 else 0.0  # Avoid division by zero
    print(f"Validation Accuracy: {acc:.2f}%")
        
    return acc


In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Ensure correct device
model.to(device)  # Move model to device

# Call the validation function
accuracy = validation(model, test_loader, device)

Validation Accuracy: 75.00%
