In [None]:
# import libraries here
import torch
import torch.nn as nn
import torchvision
from torchvision.models import resnet18, ResNet18_Weights
import torchvision.transforms as transforms
from torchvision import models
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, random_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns

In [None]:
# Write your proposed solution code here. Create more code cells if you find it necessary

transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(256, scale=(0.8, 1.0)),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]) 

if torch.cuda.is_available():
    device = torch.device(type="cuda", index=0)
else:
    device = torch.device(type="cpu", index=0)

# Load Dataset
dataset = torchvision.datasets.ImageFolder(root='./Images', transform=transform)
loader = DataLoader(dataset, batch_size=32, shuffle=False)
class_names = dataset.classes


# Split into train/val/test
total_size = len(dataset)
test_size = int(0.2 * total_size)
train_val_size = total_size - test_size

train_val_dataset, test_dataset = random_split(dataset, [train_val_size, test_size], generator=torch.Generator().manual_seed(42))

# Create DataLoader
test_loader = DataLoader(test_dataset, batch_size=32)
# K-Fold Cross-Validation
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

In [26]:
# Define Custom CNN

class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(128 * 8 * 8, 256),
            nn.Dropout(0.5),
            nn.Linear(256, 21)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [19]:
# Define an existing CNN ResNet18

def resnetModel(strategy='fine_tune'):
    model = models.resnet18(weights=ResNet18_Weights.DEFAULT)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 21)

    if strategy == 'feature_extractor':
        for param in model.parameters():
            param.requires_grad = False
        for param in model.fc.parameters():
            param.requires_grad = True

    return model

In [27]:

def trainModel(model, train_loader, val_loader, epochs=30, lr=0.01, momentum=0.9, weight_decay=0.00001):
    """
    Trains a model using SGD optimizer.
    """
    
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.1)

    train_accuracies = []
    train_losses = []
    val_accuracies = []
    val_losses = []

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        correct, total = 0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_loss /= len(train_loader.dataset)
        train_accuracy = correct / total
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        model.eval()
        val_loss = 0.0
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_loss /= len(val_loader.dataset)
        val_accuracy = correct / total
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)
        scheduler.step(val_loss)

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.3f} | Train Acc: {train_accuracy:.3f} | Validation Loss: {val_loss:.3f} | Validation Acc: {val_accuracy:.3f}")

    return model, train_losses, val_losses, train_accuracies, val_accuracies

In [28]:
# Evaluating the CNNs

def evaluateModel(model, test_loader, return_predictions=False):
    """Evaluates the model and optionally returns predictions and true labels."""

    model = model.to(device)
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.numpy())
            y_pred.extend(predicted.cpu().numpy())

    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
    print(f"Precision: {prec:.3f} | Recall: {rec:.3f} | F1 Score: {f1:.3f}")

    if return_predictions:
        return y_true, y_pred, acc, prec, rec, f1

    return None

def denormalize(tensor, mean, std):
    mean = torch.tensor(mean).view(3, 1, 1)
    std = torch.tensor(std).view(3, 1, 1)

    return tensor * std + mean

In [None]:
# Evaluating the custom CNN

def crossValidation(dataset, k=5, batch_size=32):
    """Performs k-fold cross-validation and generates confusion matrix and plots for all folds."""

    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    all_y_true, all_y_pred = [], []
    all_train_losses, all_val_losses = [], []
    all_train_accuracies, all_val_accuracies = [], []
    all_accs, all_precs, all_recs, all_f1s = [], [], [], []

    for fold, (train_idx, valid_idx) in enumerate(kf.split(dataset)):
        print(f"Fold {fold+1}/{k}")
        train_dataset = Subset(dataset, train_idx)
        valid_dataset = Subset(dataset, valid_idx)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

        model_custom = CustomCNN()
        model_custom, train_losses, val_losses, train_accuracies, val_accuracies = trainModel(model_custom, train_loader, valid_loader)
        y_true, y_pred, acc, prec, rec, f1 = evaluateModel(model_custom, valid_loader, return_predictions=True)

        all_train_losses.append(train_losses)
        all_val_losses.append(val_losses)
        all_train_accuracies.append(train_accuracies)
        all_val_accuracies.append(val_accuracies)
        all_accs.append(acc)
        all_precs.append(prec)
        all_recs.append(rec)
        all_f1s.append(f1)

        all_y_true.extend(y_true)
        all_y_pred.extend(y_pred)

        # Example image display for the current fold
        example_loader = DataLoader(valid_dataset, batch_size=5, shuffle=True)
        images, labels = next(iter(example_loader))
        images = images.to(device)
        denorm_images = denormalize(images.cpu(), mean=[-0.1497, 0.00008, 0.0594], std=[1.0153, 0.9942, 0.9404])
        outputs = model_custom(images)
        _, preds = torch.max(outputs, 1)

        fig, axes = plt.subplots(1, 5, figsize=(20, 8))
        for idx in range(5):
            img = denorm_images[idx].permute(1, 2, 0).numpy()
            pred_label = class_names[preds[idx].item()]
            actual_label = class_names[labels[idx].item()]
            axes[idx].imshow(img)
            axes[idx].set_title(f"Pred: {pred_label} | Actual: {actual_label}", fontsize=8)
            axes[idx].axis("off")
        plt.show()

    # Generate and display the overall confusion matrix
    print("Overall Confusion Matrix:")
    confusion_matrix_all = confusion_matrix(all_y_true, all_y_pred)
    plt.figure(figsize=(15, 12))
    sns.heatmap(confusion_matrix_all, annot=True, fmt="d", cmap="twilight")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Overall Confusion Matrix")
    plt.show()

    # Calculate and print average metrics
    avg_train_losses = np.mean(np.array(all_train_losses), axis=0)
    avg_val_losses = np.mean(np.array(all_val_losses), axis=0)
    avg_train_accuracies = np.mean(np.array(all_train_accuracies), axis=0)
    avg_val_accuracies = np.mean(np.array(all_val_accuracies), axis=0)
    avg_train_accuracies1 = np.mean((all_train_accuracies))
    avg_val_accuracies2 = np.mean((all_val_accuracies))
    avg_acc = np.mean(all_accs)
    avg_prec = np.mean(all_precs)
    avg_rec = np.mean(all_recs)
    avg_f1 = np.mean(all_f1s)

    print("\nOverall Metrics:")
    print(f"Average Accuracy: {avg_acc:.3f}")
    print(f"Average Traning Accuracy: {avg_train_accuracies1:.3f}")
    print(f"Average Validation Accuracy: {avg_val_accuracies2:.3f}")
    
    print(f"Average Precision: {avg_prec:.3f}")
    print(f"Average Recall: {avg_rec:.3f}")
    print(f"Average F1 Score: {avg_f1:.3f}")

    # Plotting average loss and accuracy
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(avg_train_losses, label='Average Train Loss', color="teal")
    plt.plot(avg_val_losses, label='Average Validation Loss', color="m")
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(avg_train_accuracies, label='Average Train Accuracy', color="darkblue")
    plt.plot(avg_val_accuracies, label='Average Validation Accuracy', color="crimson")
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

crossValidation(dataset)

In [None]:
# Evaluating the ResNet18

def crossValidation(dataset, k=5, batch_size=32):
    """Performs k-fold cross-validation and generates confusion matrix and plots for all folds."""

    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    all_y_true, all_y_pred = [], []
    all_train_losses, all_val_losses = [], []
    all_train_accuracies, all_val_accuracies = [], []
    all_accs, all_precs, all_recs, all_f1s = [], [], [], []

    for fold, (train_idx, valid_idx) in enumerate(kf.split(dataset)):
        print(f"Fold {fold+1}/{k}")
        train_dataset = Subset(dataset, train_idx)
        valid_dataset = Subset(dataset, valid_idx)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

        model_resnet = resnetModel(strategy='feature_extractor')
        # model_resnet = resnetModel(strategy='fine_tune')
        model_resnet, train_losses, val_losses, train_accuracies, val_accuracies = trainModel(model_resnet, train_loader, valid_loader)
        y_true, y_pred, acc, prec, rec, f1 = evaluateModel(model_resnet, valid_loader, return_predictions=True)

        all_train_losses.append(train_losses)
        all_val_losses.append(val_losses)
        all_train_accuracies.append(train_accuracies)
        all_val_accuracies.append(val_accuracies)
        all_accs.append(acc)
        all_precs.append(prec)
        all_recs.append(rec)
        all_f1s.append(f1)

        all_y_true.extend(y_true)
        all_y_pred.extend(y_pred)

        # Example image display for the current fold
        example_loader = DataLoader(valid_dataset, batch_size=5, shuffle=True)
        images, labels = next(iter(example_loader))
        images = images.to(device)
        denorm_images = denormalize(images.cpu(), [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        outputs = model_resnet(images)
        _, preds = torch.max(outputs, 1)

        fig, axes = plt.subplots(1, 5, figsize=(20, 8))
        for idx in range(5):
            img = denorm_images[idx].permute(1, 2, 0).numpy()
            pred_label = class_names[preds[idx].item()]
            actual_label = class_names[labels[idx].item()]
            axes[idx].imshow(img)
            axes[idx].set_title(f"Pred: {pred_label} | Actual: {actual_label}", fontsize=8)
            axes[idx].axis("off")
        plt.show()

    # Generate and display the overall confusion matrix
    print("Overall Confusion Matrix:")
    confusion_matrix_all = confusion_matrix(all_y_true, all_y_pred)
    plt.figure(figsize=(15, 12))
    sns.heatmap(confusion_matrix_all, annot=True, fmt="d", cmap="twilight")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Overall Confusion Matrix")
    plt.show()

    # Calculate and print average metrics
    avg_train_losses = np.mean(np.array(all_train_losses), axis=0)
    avg_val_losses = np.mean(np.array(all_val_losses), axis=0)
    avg_train_accuracies = np.mean(np.array(all_train_accuracies), axis=0)
    avg_val_accuracies = np.mean(np.array(all_val_accuracies), axis=0)
    avg_train_accuracies1 = np.mean((all_train_accuracies))
    avg_val_accuracies2 = np.mean((all_val_accuracies))
    avg_acc = np.mean(all_accs)
    avg_prec = np.mean(all_precs)
    avg_rec = np.mean(all_recs)
    avg_f1 = np.mean(all_f1s)

    print("\nOverall Metrics:")
    print(f"Average Accuracy: {avg_acc:.3f}")
    print(f"Average Traning Accuracy: {avg_train_accuracies1:.3f}")
    print(f"Average Validation Accuracy: {avg_val_accuracies2:.3f}")
    print(f"Average Precision: {avg_prec:.3f}")
    print(f"Average Recall: {avg_rec:.3f}")
    print(f"Average F1 Score: {avg_f1:.3f}")

    # Plotting average loss and accuracy
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(avg_train_losses, label='Average Train Loss', color="teal")
    plt.plot(avg_val_losses, label='Average Validation Loss', color="m")
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(avg_train_accuracies, label='Average Train Accuracy', color="darkblue")
    plt.plot(avg_val_accuracies, label='Average Validation Accuracy', color="crimson")
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

crossValidation(train_val_dataset)