## Imports

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import models, datasets, transforms
from sklearn.model_selection import StratifiedKFold
import numpy as np
from utils import train_model, evaluate_model
import random

## Set seed for reproducibility

In [10]:
def set_seed(seed):
    """
    Set the seed for reproducibility.

    Args:
        seed (int): Seed value to set for random number generation.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Set seed to be 42
set_seed(42)

## EfficientNet-B2

In [11]:
# Use CUDA if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the EfficientNet model to use
def create_efficientnet_model(num_classes):
    # Load the EfficientNet-B2 model
    model = models.efficientnet_b2(pretrained=False)
    # Replace the classifier layer
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
    return model.to(device)

In [19]:
models.efficientnet_b2(pretrained=False).classifier[1].in_features

1408

## Helper Functions

In [4]:
import itertools
import torch
import numpy as np

def load_dataset(dataset_folder):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.436, 0.385, 0.344], std=[0.296, 0.269, 0.261])
    ])
    
    dataset = datasets.ImageFolder(root=dataset_folder, transform=transform)
    return dataset
    
def grid_search_tuning(dataset, num_classes, learning_rate, batch_sizes, num_epochs=5):
    best_accuracy = 0
    best_f1 = 0
    best_params = {}

    for lr, bs in itertools.product(learning_rates, batch_sizes):
        print(f"Testing LR: {lr}, BS: {bs}")

        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        targets = np.array([dataset.targets[i] for i in range(len(dataset))])
        fold_accuracies = []
        fold_f1_scores = []

        for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(dataset)), targets)):
            print(f'Fold {fold + 1}')
            
            train_sampler = Subset(dataset, train_idx)
            val_sampler = Subset(dataset, val_idx)
            
            train_dataloader = DataLoader(train_sampler, batch_size=bs, shuffle=True)
            val_dataloader = DataLoader(val_sampler, batch_size=bs, shuffle=False)
            
            model = create_efficientnet_model(num_classes)
            trained_model = train_model(model, train_dataloader, val_dataloader, num_epochs=num_epochs, lr=lr)
            
            fold_accuracy, fold_f1 = evaluate_model(trained_model, val_dataloader)
            fold_accuracies.append(fold_accuracy)
            fold_f1_scores.append(fold_f1)
            print(f'Fold {fold + 1} Accuracy: {fold_accuracy:.4f}, F1 Score: {fold_f1:.4f}')
        
        mean_accuracy = np.mean(fold_accuracies)
        mean_f1 = np.mean(fold_f1_scores)
        print(f'Mean Accuracy for LR: {lr}, BS: {bs}: {mean_accuracy:.4f}')
        print(f'Mean F1 Score for LR: {lr}, BS: {bs}: {mean_f1:.4f}')

        if mean_accuracy > best_accuracy:
            best_accuracy = mean_accuracy
            best_f1 = mean_f1
            best_params = {'learning_rate': lr, 'batch_size': bs}
    
    print(f'Best Parameters: {best_params}')
    print(f'Best Accuracy: {best_accuracy:.4f}')
    print(f'Best F1 Score: {best_f1:.4f}')

## Start the tuning

In [5]:
# Set up device and dataset
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dataset_folder = '../data/lfw'
dataset = load_dataset(dataset_folder)
num_classes = len(dataset.classes)

In [5]:
# Define ranges for hyperparameters
learning_rates = [0.01, 0.1]
batch_sizes = [32, 64, 16]

# Perform grid search tuning
grid_search_tuning(dataset, num_classes, learning_rates, batch_sizes, num_epochs=50)

Testing LR: 0.01, BS: 64
Fold 1
Epoch 1/50, Train Loss: 3.0670, Val Loss: 12.3375
Epoch 2/50, Train Loss: 2.6262, Val Loss: 5.7012
Epoch 3/50, Train Loss: 2.6251, Val Loss: 3.3625
Epoch 4/50, Train Loss: 2.5130, Val Loss: 2.5753
Epoch 5/50, Train Loss: 2.3822, Val Loss: 2.3078
Epoch 6/50, Train Loss: 2.3674, Val Loss: 2.2801
Epoch 7/50, Train Loss: 2.3441, Val Loss: 2.2851
Epoch 8/50, Train Loss: 2.3343, Val Loss: 2.2980
Epoch 9/50, Train Loss: 2.3196, Val Loss: 2.3098
Epoch 10/50, Train Loss: 2.3163, Val Loss: 2.2731
Epoch 11/50, Train Loss: 2.3151, Val Loss: 2.2882
Epoch 12/50, Train Loss: 2.2930, Val Loss: 2.2917
Epoch 13/50, Train Loss: 2.2965, Val Loss: 2.2776
Epoch 14/50, Train Loss: 2.2875, Val Loss: 2.3057
Epoch 15/50, Train Loss: 2.3087, Val Loss: 2.2846
Early stopping triggered after 15 epochs
Training complete
Accuracy: 0.0993
F1 Score: 0.0578
Fold 1 Accuracy: 0.0993, F1 Score: 0.0578
Fold 2
Epoch 1/50, Train Loss: 3.0340, Val Loss: 2.4740
Epoch 2/50, Train Loss: 2.6161, Val

## Refined Tuning
Try even smaller batch sizes.

In [6]:
# Define ranges for hyperparameters
learning_rates = [0.01]
batch_sizes = [4, 8, 16]

# Perform grid search tuning
grid_search_tuning(dataset, num_classes, learning_rates, batch_sizes, num_epochs=50)

Testing LR: 0.01, BS: 4
Fold 1
Epoch 1/50, Train Loss: 2.8062, Val Loss: 2.2313
Epoch 2/50, Train Loss: 2.4148, Val Loss: 2.1563
Epoch 3/50, Train Loss: 2.3710, Val Loss: 2.1377
Epoch 4/50, Train Loss: 2.3759, Val Loss: 2.2570
Epoch 5/50, Train Loss: 2.3581, Val Loss: 2.2222
Epoch 6/50, Train Loss: 2.3748, Val Loss: 2.1869
Epoch 7/50, Train Loss: 2.4070, Val Loss: 2.2000
Epoch 8/50, Train Loss: 2.3642, Val Loss: 2.0884
Epoch 9/50, Train Loss: 2.4005, Val Loss: 2.0965
Epoch 10/50, Train Loss: 2.4019, Val Loss: 2.3324
Epoch 11/50, Train Loss: 2.4180, Val Loss: 2.4383
Epoch 12/50, Train Loss: 2.4182, Val Loss: 2.3084
Epoch 13/50, Train Loss: 2.3853, Val Loss: 2.1691
Early stopping triggered after 13 epochs
Training complete
Accuracy: 0.3630
F1 Score: 0.0533
Fold 1 Accuracy: 0.3630, F1 Score: 0.0533
Fold 2
Epoch 1/50, Train Loss: 2.8047, Val Loss: 4.5692
Epoch 2/50, Train Loss: 2.4659, Val Loss: 2.2379
Epoch 3/50, Train Loss: 2.4006, Val Loss: 2.1755
Epoch 4/50, Train Loss: 2.3605, Val Los

In [None]:
# Define ranges for hyperparameters
learning_rates = [0.001]
batch_sizes = [64]

# Perform grid search tuning
grid_search_tuning(dataset, num_classes, learning_rates, batch_sizes, num_epochs=50)

Testing LR: 0.001, BS: 64
Fold 1
Epoch 1/50, Train Loss: 2.4751, Val Loss: 2.2950
Epoch 2/50, Train Loss: 2.4154, Val Loss: 2.3019
Epoch 3/50, Train Loss: 2.3468, Val Loss: 2.3018
Epoch 4/50, Train Loss: 2.3084, Val Loss: 2.2579
Epoch 5/50, Train Loss: 2.2743, Val Loss: 3.0812
Epoch 6/50, Train Loss: 2.2550, Val Loss: 2.6783
Epoch 7/50, Train Loss: 2.1580, Val Loss: 2.5046
Epoch 8/50, Train Loss: 2.1271, Val Loss: 2.3027
Epoch 9/50, Train Loss: 2.0666, Val Loss: 2.1249
Epoch 10/50, Train Loss: 1.9765, Val Loss: 2.6768
Epoch 11/50, Train Loss: 1.9072, Val Loss: 2.3050
Epoch 12/50, Train Loss: 1.8453, Val Loss: 2.0948
Epoch 13/50, Train Loss: 1.6182, Val Loss: 2.0356
Epoch 14/50, Train Loss: 1.5096, Val Loss: 2.2054
Epoch 15/50, Train Loss: 1.5955, Val Loss: 2.0021
Epoch 16/50, Train Loss: 1.4358, Val Loss: 2.5640
Epoch 17/50, Train Loss: 1.2947, Val Loss: 2.2783
Epoch 18/50, Train Loss: 1.2286, Val Loss: 2.4463
Epoch 19/50, Train Loss: 1.0742, Val Loss: 1.8978
Epoch 20/50, Train Loss: 0

## Results

```
Best Parameters: {'learning_rate': 0.01, 'batch_size': 4}

```