## Evaluated on the test set

In [1]:
import torch
from torchvision import datasets
from torchvision.transforms import v2
import os
import glob


In [2]:
data_transforms = {
    'train': v2.Compose([
        v2.PILToTensor(),
        v2.RandomResizedCrop(size=(224, 224), antialias=True),
        v2.RandomHorizontalFlip(p=0.5),
        v2.RandomHorizontalFlip(p=0.5),
        v2.RandomAffine(degrees=30, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=10),
        v2.ToDtype(torch.float32, scale=True),  # to float32 in [0, 1]
        v2.Normalize(mean=(0.704, 0.740, 0.781), std=(0.115, 0.135, 0.160))
            ]),
    'val': v2.Compose([
        v2.PILToTensor(),
        v2.Resize(256, antialias=True),
        v2.CenterCrop(224),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=(0.704, 0.740, 0.781), std=(0.115, 0.135, 0.160))
    ]),
    'test': v2.Compose([
        v2.PILToTensor(),
        v2.Resize(256, antialias=True),
        v2.CenterCrop(224),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=(0.704, 0.740, 0.781), std=(0.115, 0.135, 0.160))
    ]),
}


data_dir = r'../0-Data/2-blobs'

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val','test']}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32,
                                              shuffle=True, num_workers=4, pin_memory=True)
               for x in ['train', 'val', 'test']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val','test']}
class_names = image_datasets['train'].classes

In [3]:
model_paths = glob.glob('models/*_best.pt')
model_paths

['models/efficientnetv2l_Adam_lr0.001_best.pt',
 'models/efficientnetv2l_SGD_lr0.001_best.pt',
 'models/efficientnetv2m_Adam_lr0.001_best.pt',
 'models/efficientnetv2m_SGD_lr0.001_best.pt',
 'models/efficientnetv2s_Adam_lr0.001_best.pt',
 'models/efficientnetv2s_SGD_lr0.001_best.pt',
 'models/resnet101_Adam_lr0.001_best.pt',
 'models/resnet101_SGD_lr0.001_best.pt',
 'models/resnet18_Adam_lr0.001_best.pt',
 'models/resnet18_SGD_lr0.001_best.pt',
 'models/resnet50_Adam_lr0.001_best.pt',
 'models/resnet50_SGD_lr0.001_best.pt',
 'models/swinv2b_Adam_lr0.001_best.pt',
 'models/swinv2b_SGD_lr0.001_best.pt',
 'models/swinv2s_Adam_lr0.001_best.pt',
 'models/swinv2s_SGD_lr0.001_best.pt',
 'models/swinv2t_Adam_lr0.001_best.pt',
 'models/swinv2t_SGD_lr0.001_best.pt']

In [6]:
import torch
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming `model_paths` is already defined in a previous notebook cell

results = []
data_type = 'test'  # Only 'test' dataset in this case

for model_path in model_paths:
    # Parse the filename to extract the model name, optimizer, and learning rate
    filename = model_path.split('/')[-1].replace('_best.pt', '')
    parts = filename.split('_')
    model_name = parts[0]       # Model name
    optimizer = parts[1]        # Optimizer
    learning_rate = parts[2].replace('lr', '')  # Learning rate

    # Remove any existing 'model' variable from globals
    if 'model' in globals():
        del globals()['model']
        
    model = torch.load(model_path, weights_only=False)
    model = model.cuda()
    model.eval()

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloaders[data_type]:
            inputs = inputs.cuda()
            labels = labels.cuda()
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate evaluation metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    # Append results to a list
    results.append({
        'Model Name': model_name,
        'Optimizer': optimizer,
        'Learning Rate': learning_rate,
        'Data Type': data_type,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    })

    # Print all metrics in one single line output
    print(f"Model {model_path} on {data_type} dataset: Accuracy: {accuracy:.2f}% | Precision: {precision:.2f} | Recall: {recall:.2f} | F1 Score: {f1:.2f}")

# Create a DataFrame from the results and print it
df = pd.DataFrame(results)
print(df)

Model models/efficientnetv2l_Adam_lr0.001_best.pt on test dataset: Accuracy: 0.94% | Precision: 0.90 | Recall: 0.87 | F1 Score: 0.88
Model models/efficientnetv2l_SGD_lr0.001_best.pt on test dataset: Accuracy: 0.96% | Precision: 0.92 | Recall: 0.93 | F1 Score: 0.92
Model models/efficientnetv2m_Adam_lr0.001_best.pt on test dataset: Accuracy: 0.94% | Precision: 0.91 | Recall: 0.89 | F1 Score: 0.89
Model models/efficientnetv2m_SGD_lr0.001_best.pt on test dataset: Accuracy: 0.96% | Precision: 0.93 | Recall: 0.93 | F1 Score: 0.92
Model models/efficientnetv2s_Adam_lr0.001_best.pt on test dataset: Accuracy: 0.94% | Precision: 0.88 | Recall: 0.88 | F1 Score: 0.88
Model models/efficientnetv2s_SGD_lr0.001_best.pt on test dataset: Accuracy: 0.95% | Precision: 0.90 | Recall: 0.91 | F1 Score: 0.90
Model models/resnet101_Adam_lr0.001_best.pt on test dataset: Accuracy: 0.93% | Precision: 0.86 | Recall: 0.84 | F1 Score: 0.84
Model models/resnet101_SGD_lr0.001_best.pt on test dataset: Accuracy: 0.95% | 

In [8]:
df.to_excel(r'3-evaluation_metrics.xlsx') 