In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from torch.utils.data import DataLoader,random_split
from torchvision import datasets, transforms
import torchmetrics
from sklearn.model_selection import ParameterGrid
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn.functional as F

# For evaluation metrics
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from torchmetrics.classification import F1Score

# Set the random seeds for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
#If CUDA/MPS is available...
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
# Define the dataset root directory
root_dir = "./Apples"

# Define batch size
batch_size = 64

# Set image size, modify as per dataset requirements
image_size = (128, 128)  #

dataset = datasets.ImageFolder(root=root_dir, transform=transforms.Compose([transforms.Resize(image_size), transforms.ToTensor()]))

# Define dataset split sizes
train_size = int(0.7 * len(dataset))  # 70% training
val_size = int(0.15 * len(dataset))   # 15% validation
test_size = len(dataset) - train_size - val_size  # Remaining for testing

# Split dataset
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Compute mean and std for each split
def compute_mean_std(loader):
    mean = torch.zeros(3)
    std = torch.zeros(3)
    total_samples = 0
    
    for images, _ in loader:
        batch_samples = images.size(0)  # Number of images in the batch
        images = images.view(batch_samples, 3, -1)  # Flatten H and W dimensions
        
        mean += images.mean(dim=[0, 2]) * batch_samples
        std += images.std(dim=[0, 2]) * batch_samples
        total_samples += batch_samples
    
    mean /= total_samples
    std /= total_samples
    return mean, std

# Create temporary DataLoaders for mean/std calculation
temp_train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
temp_val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
temp_test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Compute statistics
train_mean, train_std = compute_mean_std(temp_train_loader)
val_mean, val_std = compute_mean_std(temp_val_loader)
test_mean, test_std = compute_mean_std(temp_test_loader)

# Define transforms with computed mean and std
train_transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(train_mean.tolist(), train_std.tolist())
])

val_transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(val_mean.tolist(), val_std.tolist())
])

test_transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(test_mean.tolist(), test_std.tolist())
])

# Reload datasets with their respective transforms
train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_transform
test_dataset.dataset.transform = test_transform

# Create final DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [None]:
class Model1(nn.Module):
    def __init__(self):
        super(Model1, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(3*128*128, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.fc3 = nn.Linear(128, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.flatten(x)
        

        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc4(x) 
        return x

def initialize_model():
    return Model1()  

In [None]:
class Model2(nn.Module):
    def __init__(self):
        super(Model2, self).__init__()

        # Define layers
        self.layer1 = nn.Linear(128*128*3, 256)
        self.bn1 = nn.BatchNorm1d(256)

        self.layer2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)

        self.layer3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)

        self.output_layer = nn.Linear(64, 10)

        self.dropout = nn.Dropout(0.3) 

    def forward(self, x):
        x = x.view(x.size(0), -1)

        x = self.dropout(torch.relu(self.bn1(self.layer1(x)))) 
        x = self.dropout(torch.relu(self.bn2(self.layer2(x))))
        x = self.dropout(torch.relu(self.bn3(self.layer3(x))))

        return self.output_layer(x)


In [None]:
class Model3(nn.Module):
    def __init__(self):
        super(Model3, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(3*128*128, 128)
        self.bn1 = nn.BatchNorm1d(128)  # Batch normalization after first layer.
        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)   # Dropout regularization with probability 0.5.
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)     # Batch normalization after second layer.
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

In [None]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha  
        self.gamma = gamma  
        self.reduction = reduction  

    def forward(self, inputs, targets):
        log_probs = F.log_softmax(inputs, dim=1) 
        probs = torch.exp(log_probs) 
        ce_loss = F.nll_loss(log_probs, targets, reduction='none')
        focal_loss = self.alpha * (1 - probs.gather(1, targets.unsqueeze(1)).squeeze()) ** self.gamma * ce_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        return focal_loss

In [None]:
def plot_misclassified(model, test_loader, device, n=36, title="Misclassified Examples"):
    model.eval()
    misclassified_images = []
    misclassified_preds = []
    misclassified_targets = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            _, preds = torch.max(outputs, 1)
            for i in range(len(target)):
                if preds[i] != target[i]:
                    misclassified_images.append(data[i].cpu().numpy().squeeze())
                    misclassified_preds.append(preds[i].cpu().item())
                    misclassified_targets.append(target[i].cpu().item())
                if len(misclassified_images) >= n:
                    break
            if len(misclassified_images) >= n:
                break
    
    # If misclassified_images has fewer than n, fill the rest with blank images
    while len(misclassified_images) < n:
        blank_image = np.zeros((128, 128))
        misclassified_images.append(blank_image)
        misclassified_preds.append(None)
        misclassified_targets.append(None)
    
    # Create a 6x6 grid for 36 images; each subplot is smaller.
    fig, axes = plt.subplots(6, 6, figsize=(8, 8))
    fig.suptitle(title)
    idx = 0
    for i in range(6):
        for j in range(6):
            ax = axes[i, j]
            image = misclassified_images[idx]
            ax.imshow(image, cmap='gray')
            if misclassified_preds[idx] is not None:
                ax.set_title(f"P:{misclassified_preds[idx]}\nT:{misclassified_targets[idx]}", fontsize=8)
            else:
                ax.set_title("Blank", fontsize=8)
            ax.axis('off')
            idx += 1
    plt.tight_layout()
    plt.show()

In [None]:
def initialize_weights(model, method=None,constant_value = 0.1): 
    if method is None:
        print("No initialization required")
        return  

    for name, param in model.named_parameters():
        if 'weight' in name:
            if method == 'he_uniform':
                init.kaiming_uniform_(param, nonlinearity='relu')
            elif method == 'constant':
                init.constant_(param, constant_value)
            else:
                raise ValueError(f"Unsupported initialization method: {method}")
        elif 'bias' in name:
            init.constant_(param, 0.0)  # Initialize biases to 0

In [None]:

def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs, patience):
    train_losses, val_losses = [], []
    train_f1_scores, val_f1_scores = [], []
    f1_metric = F1Score(task='multiclass', num_classes=10, average='weighted').to(device)

    best_val_loss = float('inf')
    epochs_without_improvement = 0
    best_model_weights = None

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_f1 = 0.0

        # Training phase
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            preds = torch.argmax(outputs, dim=1)
            f1 = f1_metric(preds, targets)

            running_loss += loss.item() * inputs.size(0)
            running_f1 += f1.item() * inputs.size(0)

        # Compute epoch averages
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_f1 = running_f1 / len(train_loader.dataset)

        # Append results to lists (fixed issue)
        train_losses.append(epoch_loss)  
        train_f1_scores.append(epoch_f1)

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_f1 = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                preds = torch.argmax(outputs, dim=1)
                f1 = f1_metric(preds, targets)

                val_loss += loss.item() * inputs.size(0)
                val_f1 += f1.item() * inputs.size(0)

        val_loss = val_loss / len(val_loader.dataset)
        val_f1 = val_f1 / len(val_loader.dataset)

        val_losses.append(val_loss)
        val_f1_scores.append(val_f1)

        print(f'Epoch [{epoch+1}/{num_epochs}], 
              'f'Train Loss: {epoch_loss:.4f}, Train F1: {epoch_f1:.4f}, 
              'f'Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
            best_model_weights = model.state_dict()
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= patience:
            print(f'Early stopping at epoch {epoch+1} (no improvement for {patience} epochs).')
            break

    if best_model_weights is not None:
        model.load_state_dict(best_model_weights)
        print('Loaded the best model weights.')

    return train_losses, val_losses, train_f1_scores, val_f1_scores

In [None]:
def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    all_preds = []
    all_targets = []
    running_loss = 0.0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            running_loss += loss.item() * data.size(0)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(target.cpu().numpy())
    
    cm = confusion_matrix(all_targets, all_preds)
    report = classification_report(all_targets, all_preds, output_dict=True)
    
    return running_loss / len(test_loader.dataset), cm, report

In [None]:
def grid_search_adam(model, train_loader, val_loader, criterion, param_grid, device):
    best_score = 0
    best_params = None
    best_model = None
    best_train_losses, best_val_losses = [], []
    best_train_f1_scores, best_val_f1_scores = [], []
    no_improve_count = 0
    counter = 0
    
    for params in ParameterGrid(param_grid):
        model = initialize_model()
        model.to(device)
        optimizer = torch.optim.Adam(
            model.parameters(), 
            lr=params.get('lr', 0.001), 
            betas=(params.get('momentum', 0.9), 0.999), 
            weight_decay=params.get('weight_decay', 0.0)
        )
        counter += 1
        print("Combination number", counter)
        
        print("Current Parameters are ", params)
        train_losses, val_losses, train_f1_scores, val_f1_scores = train_model(
            model, train_loader, val_loader, criterion, optimizer, device, num_epochs=7, patience=3
        )
        
        # Calculate average of val_f1_scores
        avg_val_f1_score = sum(val_f1_scores) / len(val_f1_scores)

        # Check if the average F1 score is higher than the best_score
        if avg_val_f1_score > best_score:
            best_score = avg_val_f1_score
            print("New best score", best_score)
            best_params = params
            best_model = model
            best_train_losses = train_losses
            best_val_losses = val_losses
            best_train_f1_scores = train_f1_scores
            best_val_f1_scores = val_f1_scores
            no_improve_count = 0  # Reset no_improve_count if we find a better score
        else:
            no_improve_count += 1
            
    
    print("Best Parameters for Adam Optimizer:", best_params)
    print("Train Losses:", best_train_losses)
    print("Validation Losses:", best_val_losses)

    plt.figure(figsize=(12, 6))

    # Compute y-axis limits for the first plot only
    all_losses = best_train_losses + best_val_losses
    y_min, y_max = min(all_losses), max(all_losses)

    plt.subplot(1, 2, 1)
    plt.plot(best_train_losses, label='Train Loss')
    plt.plot(best_val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss Curves')
    plt.legend()
    plt.ylim(0, y_max * 1.1)  # Ensure y-axis starts from 0

    # Second subplot: F1-Score Curves
    plt.subplot(1, 2, 2)
    plt.plot(best_train_f1_scores, label='Train F1-Score')
    plt.plot(best_val_f1_scores, label='Validation F1-Score')
    plt.xlabel('Epochs')
    plt.ylabel('F1-Score')
    plt.title('Training and Validation F1-Score Curves')
    plt.legend()
    plt.ylim(0, 1)  # F1-score is typically between 0 and 1

    # Show both subplots in a single figure
    plt.tight_layout()  # Adjusts layout to prevent overlap
    plt.show()
    
    return best_model, best_params, best_score

In [None]:
def grid_search_RMSprop(model, train_loader, val_loader, criterion, param_grid, device):
    best_score = 0
    best_params = None
    best_model = None
    best_train_losses, best_val_losses = [], []
    best_train_f1_scores, best_val_f1_scores = [], []
    no_improve_count = 0

    for params in ParameterGrid(param_grid):
        model = initialize_model()
        model.to(device)
        
        optimizer = torch.optim.RMSprop(
            model.parameters(), 
            lr=params.get('lr', 0.001), 
            alpha=params.get('alpha', 0.99), 
            momentum=params.get('momentum', 0.0),  
            weight_decay=params.get('weight_decay', 0.0),  
        )
        
        train_losses, val_losses, train_f1_scores, val_f1_scores = train_model(
            model, train_loader, val_loader, criterion, optimizer, device, num_epochs=7, patience=3
        )

        avg_val_f1_score = sum(val_f1_scores) / len(val_f1_scores)

        if avg_val_f1_score > best_score:
            best_score = avg_val_f1_score
            print("New best score", best_score)
            best_params = params
            best_model = model
            best_train_losses = train_losses
            best_val_losses = val_losses
            best_train_f1_scores = train_f1_scores
            best_val_f1_scores = val_f1_scores
            no_improve_count = 0  
        else:
            no_improve_count += 1

    
        
    print("Best Parameters for RMSprop optimizer:", best_params)

    plt.figure(figsize=(12, 6))

    print("Train Losses:", best_train_losses)
    print("Validation Losses:", best_val_losses)

    # Compute y-axis limits for the first plot only
    all_losses = best_train_losses + best_val_losses
    y_min, y_max = min(all_losses), max(all_losses)

    plt.subplot(1, 2, 1)
    plt.plot(best_train_losses, label='Train Loss')
    plt.plot(best_val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss Curves')
    plt.legend()
    plt.ylim(0, y_max * 1.1)  # Ensure y-axis starts from 0

    # Second subplot: F1-Score Curves
    plt.subplot(1, 2, 2)
    plt.plot(best_train_f1_scores, label='Train F1-Score')
    plt.plot(best_val_f1_scores, label='Validation F1-Score')
    plt.xlabel('Epochs')
    plt.ylabel('F1-Score')
    plt.title('Training and Validation F1-Score Curves')
    plt.legend()
    plt.ylim(0, 1)  # F1-score is typically between 0 and 1

    # Show both subplots in a single figure
    plt.tight_layout()  # Adjusts layout to prevent overlap
    plt.show()
    
    return best_model, best_params, best_score

In [None]:
# Instantiate Model 1, Adam Optimezer, Cross Entropy
model1 = Model1().to(device)
initialize_weights(model1, method='constant',constant_value = 0.1) 
criterion = nn.CrossEntropyLoss()


param_grid = {'lr': [0.001, 0.0005], 'momentum': [0.95, 0.9], 'weight_decay': [ 1e-4,1e-5]}

best_model_1_1, best_params_1_1, best_score_1_1 = grid_search_adam(model1, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_1_1, cm, report_1_1 = evaluate_model(best_model_1_1, test_loader, criterion, device)

print("Classification Report for Model 1_1:")
print(report_1_1)

# Plot confusion matrix for Model 1
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 1_1')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Instantiate Model 1, Adam Optimizer, Focal loss
model1 = Model1().to(device)
initialize_weights(model1, method='constant',constant_value = 0.1)  
criterion = FocalLoss(alpha=1, gamma=2, reduction='mean')


param_grid = {'lr': [0.001, 0.0005], 'momentum': [0.95, 0.9], 'weight_decay': [ 1e-4,1e-5]}
best_model_1_2, best_params_1_2, best_score_1_2 = grid_search_adam(model1, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_1_2, cm, report_1_2 = evaluate_model(best_model_1_2, test_loader, criterion, device)


print("Classification Report for Model 1_2:")
print(report_1_2)

# Plot confusion matrix for Model 1
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 1_2')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Instantiate Model 1, RMS props, Cross Entropy 
model1 = Model1().to(device)
initialize_weights(model1, method='constant',constant_value = 0.1) 
criterion = nn.CrossEntropyLoss()


param_grid = {
    'lr': [0.002, 0.0008],  
    'alpha': [0.99, 0.95],  
    'momentum': [0.9, 0.8],  
    'weight_decay': [1e-4, 1e-5],  
}

best_model_1_3, best_params_1_3, best_score_1_3 = grid_search_RMSprop(model1, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_1_3, cm, report_1_3 = evaluate_model(best_model_1_3, test_loader, criterion, device)

print("Classification Report for Model 1_3:")
print(report_1_3)

# Plot confusion matrix for Model 1
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 1_3')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Instantiate Model 1, RMS props, focal loss
model1 = Model1().to(device)
initialize_weights(model1, method='constant',constant_value = 0.1) 
criterion = FocalLoss(alpha=1, gamma=2, reduction='mean')


param_grid = {
    'lr': [0.002, 0.0008],  
    'alpha': [0.99, 0.95],  
    'momentum': [0.9, 0.8],  
    'weight_decay': [1e-4, 1e-5],  
}

best_model_1_4, best_params_1_4, best_score_1_4 = grid_search_RMSprop(model1, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_1_4, cm, report_1_4 = evaluate_model(best_model_1_4, test_loader, criterion, device)

print("Classification Report for Model 1_4:")
print(report_1_4)


# Plot confusion matrix for Model 1
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 1_4')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Define the scores and losses
best_scores = {
    'best_score_1_1': best_score_1_1,
    'best_score_1_2': best_score_1_2,
    'best_score_1_3': best_score_1_3,
    'best_score_1_4': best_score_1_4
}

test_losses = {
    'best_score_1_1': test_loss_1_1,
    'best_score_1_2': test_loss_1_2,
    'best_score_1_3': test_loss_1_3,
    'best_score_1_4': test_loss_1_4
}

best_models = {
    'best_score_1_1': best_model_1_1,
    'best_score_1_2': best_model_1_2,
    'best_score_1_3': best_model_1_3,
    'best_score_1_4': best_model_1_4
}

best_params = {
    'best_score_1_1': best_params_1_1,
    'best_score_1_2': best_params_1_2,
    'best_score_1_3': best_params_1_3,
    'best_score_1_4': best_params_1_4
}

best_report = {
    'best_score_1_1': report_1_1,
    'best_score_1_2': report_1_2,
    'best_score_1_3': report_1_3,
    'best_score_1_4': report_1_4
}
optimizers = {
    'best_score_1_1': "Adam",
    'best_score_1_2': "Adam",
    'best_score_1_3': "RMSprop",
    'best_score_1_4': "RMSprop"
}

loss_functions = {
    'best_score_1_1': "Cross Entropy",
    'best_score_1_2': "Focal Loss",
    'best_score_1_3': "Cross Entropy",
    'best_score_1_4': "Focal Loss"

}
models = {
    'best_score_1_1': "Model_1_1",
    'best_score_1_2': "Model_1_2",
    'best_score_1_3': "Model_1_3",
    'best_score_1_4': "Model_1_4"
    
}


# Find the key corresponding to the best score
best_score_key = max(best_scores, key=best_scores.get)

# Assign test_loss1, best_model1, and best_params1 to the corresponding values
test_loss1 = test_losses[best_score_key]
best_model1 = best_models[best_score_key]
best_params1 = best_params[best_score_key]
best_optimizer1= optimizers[best_score_key]
best_model_name1= models[best_score_key]
best_loss_function1= loss_functions[best_score_key]
best_report1 = best_report[best_score_key]
# Print assigned values
print(f"Best F1 value for best Model 1: {best_score_key}")
print(f"Test Loss for best Model 1: {test_loss1}")
print(f"Best Model 1 is : {best_model_name1}")
print(f"Best Params used on best Model 1 are: {best_params1}")

# Create a table with all model details
data = {
    "Model": list(models.values()),
    "Optimizer": list(optimizers.values()),
    "Loss Function": list(loss_functions.values()),
    "Parameters": [str(params) for params in best_params.values()],  
    "F1 Score": list(best_scores.values())
}

df = pd.DataFrame(data)
print(df)

In [None]:
# Instantiate Model 2, Adam Optimizer, CrossEntropyLoss
model2 = Model2().to(device)
initialize_weights(model2, method='constant',constant_value = 0.1)  
criterion = nn.CrossEntropyLoss()

# Define parameter grid
param_grid = {'lr': [0.001, 0.0005], 'momentum': [0.95, 0.9], 'weight_decay': [1e-4, 1e-5]}

# Perform grid search using Adam optimizer
best_model_2_1, best_params_2_1, best_score_2_1 = grid_search_adam(model2, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_2_1, cm, report_2_1 = evaluate_model(best_model_2_1, test_loader, criterion, device)

# Print classification report
print("Classification Report for Model 2_1 :")
print(report_2_1)

# Plot confusion matrix for Model 2
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 2_1')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Instantiate Model 2, Adam Optimizer, Focal Loss
model2 = Model2().to(device)
initialize_weights(model2, method='constant',constant_value = 0.1) 
criterion = FocalLoss(alpha=1, gamma=2, reduction='mean')

param_grid = {'lr': [0.001, 0.0005], 'momentum': [0.95, 0.9], 'weight_decay': [1e-4, 1e-5]}
best_model_2_2, best_params_2_2, best_score_2_2 = grid_search_adam(model2, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_2_2, cm, report_2_2 = evaluate_model(best_model_2_2, test_loader, criterion, device)

print("Classification Report for Model 2_2:")
print(report_2_2)

# Plot confusion matrix for Model 2
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 2_2')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Instantiate Model 2, RMSprop Optimizer, Cross Entropy Loss
model2 = Model2().to(device)
initialize_weights(model2, method='constant',constant_value = 0.1)   
criterion = nn.CrossEntropyLoss()

param_grid = {
    'lr': [0.002, 0.0008],
    'alpha': [0.99, 0.95],
    'momentum': [0.9, 0.8],
    'weight_decay': [1e-4, 1e-5],
}

best_model_2_3, best_params_2_3, best_score_2_3 = grid_search_RMSprop(model2, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_2_3, cm, report_2_3 = evaluate_model(best_model_2_3, test_loader, criterion, device)

print("Classification Report for Model 2_3:")
print(report_2_3)

# Plot confusion matrix for Model 2
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 2_3')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Instantiate Model 2, RMSprop Optimizer, Focal Loss
model2 = Model2().to(device)
initialize_weights(model2, method='constant',constant_value = 0.1) 
criterion = FocalLoss(alpha=1, gamma=2, reduction='mean')

param_grid = {
    'lr': [0.002, 0.0008],
    'alpha': [0.99, 0.95],
    'momentum': [0.9, 0.8],
    'weight_decay': [1e-4, 1e-5],
}

best_model_2_4, best_params_2_4, best_score_2_4 = grid_search_RMSprop(model2, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_2_4, cm, report_2_4 = evaluate_model(best_model_2_4, test_loader, criterion, device)

print("Classification Report for Model 2_4:")
print(report_2_4)

# Plot confusion matrix for Model 2
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 2_4')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Define the scores and losses
best_scores = {
    'best_score_2_1': best_score_2_1,
    'best_score_2_2': best_score_2_2,
    'best_score_2_3': best_score_2_3,
    'best_score_2_4': best_score_2_4
}

test_losses = {
    'best_score_2_1': test_loss_2_1,
    'best_score_2_2': test_loss_2_2,
    'best_score_2_3': test_loss_2_3,
    'best_score_2_4': test_loss_2_4
}

best_models = {
    'best_score_2_1': best_model_2_1,
    'best_score_2_2': best_model_2_2,
    'best_score_2_3': best_model_2_3,
    'best_score_2_4': best_model_2_4
}

best_params = {
    'best_score_2_1': best_params_2_1,
    'best_score_2_2': best_params_2_2,
    'best_score_2_3': best_params_2_3,
    'best_score_2_4': best_params_2_4
}

best_report = {
    'best_score_2_1': report_2_1,
    'best_score_2_2': report_2_2,
    'best_score_2_3': report_2_3,
    'best_score_2_4': report_2_4
}

optimizers = {
    'best_score_2_1': "Adam",
    'best_score_2_2': "Adam",
    'best_score_2_3': "RMSprop",
    'best_score_2_4': "RMSprop"
}

loss_functions = {
    'best_score_2_1': "Cross Entropy",
    'best_score_2_2': "Focal Loss",
    'best_score_2_3': "Cross Entropy",
    'best_score_2_4': "Focal Loss"
}

models = {
    'best_score_2_1': "Model_2_1",
    'best_score_2_2': "Model_2_2",
    'best_score_2_3': "Model_2_3",
    'best_score_2_4': "Model_2_4"
}

# Find the key corresponding to the best score
best_score_key = max(best_scores, key=best_scores.get)

# Assign test_loss2, best_model2, and best_params2 to the corresponding values
test_loss2 = test_losses[best_score_key]
best_model2 = best_models[best_score_key]
best_params2 = best_params[best_score_key]
best_optimizer2 = optimizers[best_score_key]
best_model_name2 = models[best_score_key]
best_loss_function2 = loss_functions[best_score_key]
best_report2 = best_report[best_score_key]

# Print assigned values
print(f"Best F1 value for best Model 2: {best_score_key}")
print(f"Test Loss for best Model 2: {test_loss2}")
print(f"Best Model 2 is : {best_model_name2}")
print(f"Best Params used on best Model 2 are: {best_params2}")

# Create a table with all model details
data = {
    "Model": list(models.values()),
    "Optimizer": list(optimizers.values()),
    "Loss Function": list(loss_functions.values()),
    "Parameters": [str(params) for params in best_params.values()],  
    "F1 Score": list(best_scores.values())
}

df = pd.DataFrame(data)
print(df)

In [None]:
# Instantiate Model 3, Adam optimzer and Crossentropy
model3 = Model3().to(device)
initialize_weights(model3, method='constant',constant_value = 0.1)  
criterion = nn.CrossEntropyLoss()


param_grid = {'lr': [0.001, 0.0005], 'momentum': [0.95, 0.9], 'weight_decay': [ 1e-4,1e-5]}
best_model_3_1, best_params_3_1, best_score_3_1 = grid_search_adam(model3, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_3_1, cm, report_3_1 = evaluate_model(best_model_3_1, test_loader, criterion, device)


print("Classification Report for Model 3_1:")
print(report_3_1)

# Plot confusion matrix for Model 1
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 3_1')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Instantiate Model 3, Adam optimzer and focal loss
model3 = Model3().to(device)
initialize_weights(model3, method='constant',constant_value = 0.1)  
criterion = FocalLoss(alpha=1, gamma=2, reduction='mean')


param_grid = {'lr': [0.001, 0.0005], 'momentum': [0.95, 0.9], 'weight_decay': [ 1e-4,1e-5]}
best_model_3_2, best_params_3_2, best_score_3_2 = grid_search_adam(model3, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_3_2, cm, report_3_2 = evaluate_model(best_model_3_2, test_loader, criterion, device)


print("Classification Report for Model 3_2:")
print(report_3_2)

# Plot confusion matrix for Model 1
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 3_2')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Instantiate Model 3, Adam optimzer and Crossentropy
model3 = Model3().to(device)
initialize_weights(model3, method='constant',constant_value = 0.1)  
criterion = nn.CrossEntropyLoss()


param_grid = {
    'lr': [0.002, 0.0008],
    'alpha': [0.99, 0.95],
    'momentum': [0.9, 0.8],
    'weight_decay': [1e-4, 1e-5],
}

best_model_3_3, best_params_3_3, best_score_3_3 = grid_search_RMSprop(model3, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_3_3, cm, report_3_3 = evaluate_model(best_model_3_3, test_loader, criterion, device)

print("Classification Report for Model 3_3:")
print(report_3_3)

# Plot confusion matrix for Model 1
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 3_3')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Instantiate Model 3, RMSprop and Crossentropy
model3 = Model3().to(device)
initialize_weights(model3, method='constant',constant_value = 0.1)   
criterion = FocalLoss(alpha=1, gamma=2, reduction='mean')


param_grid = {
    'lr': [0.002, 0.0008],
    'alpha': [0.99, 0.95],
    'momentum': [0.9, 0.8],
    'weight_decay': [1e-4, 1e-5],
}

best_model_3_4, best_params_3_4, best_score_3_4 = grid_search_RMSprop(model3, train_loader, val_loader, criterion, param_grid, device)

# Evaluate the best model on the test set
test_loss_3_4, cm, report_3_4 = evaluate_model(best_model_3_4, test_loader, criterion, device)

print("Classification Report for Model 3_4:")
print(report_3_4)

# Plot confusion matrix for Model 1
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Model 3_4')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Define the scores and losses
best_scores = {
    'best_score_3_1': best_score_3_1,
    'best_score_3_2': best_score_3_2,
    'best_score_3_3': best_score_3_3,
    'best_score_3_4': best_score_3_4
}

test_losses = {
    'best_score_3_1': test_loss_3_1,
    'best_score_3_2': test_loss_3_2,
    'best_score_3_3': test_loss_3_3,
    'best_score_3_4': test_loss_3_4
}

best_models = {
    'best_score_3_1': best_model_3_1,
    'best_score_3_2': best_model_3_2,
    'best_score_3_3': best_model_3_3,
    'best_score_3_4': best_model_3_4
}

best_params = {
    'best_score_3_1': best_params_3_1,
    'best_score_3_2': best_params_3_2,
    'best_score_3_3': best_params_3_3,
    'best_score_3_4': best_params_3_4
}

best_report = {
    'best_score_3_1': report_3_1,
    'best_score_3_2': report_3_2,
    'best_score_3_3': report_3_3,
    'best_score_3_4': report_3_4
}

optimizers = {
    'best_score_3_1': "Adam",
    'best_score_3_2': "Adam",
    'best_score_3_3': "RMSprop",
    'best_score_3_4': "RMSprop"
}

loss_functions = {
    'best_score_3_1': "Cross Entropy",
    'best_score_3_2': "Focal Loss",
    'best_score_3_3': "Cross Entropy",
    'best_score_3_4': "Focal Loss"
}

models = {
    'best_score_3_1': "Model_3_1",
    'best_score_3_2': "Model_3_2",
    'best_score_3_3': "Model_3_3",
    'best_score_3_4': "Model_3_4"
}

# Find the key corresponding to the best score
best_score_key = max(best_scores, key=best_scores.get)

# Assign test_loss3, best_model3, and best_params3 to the corresponding values
test_loss3 = test_losses[best_score_key]
best_model3 = best_models[best_score_key]
best_params3 = best_params[best_score_key]
best_optimizer3 = optimizers[best_score_key]
best_model_name3 = models[best_score_key]
best_loss_function3 = loss_functions[best_score_key]
best_report3 = best_report[best_score_key]

# Print assigned values
print(f"Best F1 value for best Model 3: {best_score_key}")
print(f"Test Loss for best Model 3: {test_loss3}")
print(f"Best Model 3 is : {best_model_name3}")
print(f"Best Params used on best Model 3 are: {best_params3}")

# Create a table with all model details
data = {
    "Model": list(models.values()),
    "Optimizer": list(optimizers.values()),
    "Loss Function": list(loss_functions.values()),
    "Parameters": [str(params) for params in best_params.values()],  
    "F1 Score": list(best_scores.values())
}

df = pd.DataFrame(data)
print(df)

In [None]:

def extract_metrics(report):
    return {
        "Accuracy": report['accuracy'],
        "Macro Precision": report['macro avg']['precision'],
        "Macro Recall": report['macro avg']['recall'],
        "Macro F1-Score": report['macro avg']['f1-score'],
        "Weighted Precision": report['weighted avg']['precision'],
        "Weighted Recall": report['weighted avg']['recall'],
        "Weighted F1-Score": report['weighted avg']['f1-score']
    }

metrics1 = extract_metrics(best_report1)
metrics2 = extract_metrics(best_report2)
metrics3 = extract_metrics(best_report3)


# Create DataFrame
results_df = pd.DataFrame([
    {"Model Name": "Model1", "Layers": "3*128*128 - 256 - 32 - 10", "Activation": "ReLU", "Optimizer": best_optimizer1,"Loss function":best_loss_function1, **metrics1},
    {"Model Name": "Model2", "Layers": "3*128*128 - 256 - 128 - 64 -10", "Activation": "ReLU", "Optimizer": best_optimizer2, "Loss function":best_loss_function2,**metrics2},
    {"Model Name": "Model3", "Layers": "3*128*128 - 128 - 64 - 10", "Activation": "ReLU", "Optimizer": best_optimizer3,"Loss function":best_loss_function3, **metrics3}
])

# Save to CSV
results_df.to_csv('model_summary.csv', index=False)
print("CSV report generated: model_summary.csv")
print(results_df)