In [2]:
# Imports
import torch
import timm
import random
import csv   
import random
import numpy as np
import torch.nn as nn
import pandas as pd
from pathlib import Path
import pickle
import sys
import os
import configparser
import ast
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report


sys.path.append('../training')  # This adds the parent directory to the system path
from utils import *
from dataloaders import *

In [2]:
def eval_dataset_v2(model, criterion, loader, input_data, device, batch_size, export_preds=False):
    correct = 0
    total_loss = 0.0
    all_labels = []
    all_probs = []
    all_preds = []
    model = model.to(device)
    with torch.no_grad():
        if device != 'cpu':
            net =  (model) if batch_size > 10 else model
        else:
            net = model
        for i, data in enumerate(loader):
            inputs, fixation, labels = data # get the inputs; data is a list of [inputs, fixation, labels]
            all_labels.extend(list(labels.numpy()))
            inputs = inputs.to(device)
            fixation = fixation.to(device)
            labels = labels.to(device) 


            outputs = net(inputs)
            total_loss += criterion(outputs, labels).item() * batch_size
            
            if export_preds:
                all_probs.extend(outputs.data.cpu().detach().numpy())
            _, predicted = torch.max(outputs.data, 1)
            if export_preds:
                all_preds.extend(predicted.cpu().detach().numpy())
            correct += (predicted == labels).sum()
        accuracy = 100 * (correct.item()) / len(input_data)
    
    return np.array(all_labels), np.array(all_probs), np.array(all_preds), accuracy, total_loss

def get_metrics(y_true, y_pred):
    # Assuming y_true and y_pred are your arrays of true labels and predictions respectively
    accuracy = accuracy_score(y_true, y_pred) * 100
    precision = precision_score(y_true, y_pred, average='binary')
    recall = recall_score(y_true, y_pred, average='binary')
    f1 = f1_score(y_true, y_pred, average='binary')

    # ROC-AUC score calculation for binary classification
    # For multi-class, you need the prediction scores, not the predicted labels, and use a different strategy
    roc_auc = roc_auc_score(y_true, y_pred) if len(set(y_true)) == 2 else "ROC-AUC not applicable"

    # Confusion Matrix
    conf_matrix = confusion_matrix(y_true, y_pred)

    # Print the metrics
    print(f"Accuracy: {accuracy:.2f}")
    # print(f"Precision: {precision}")
    # print(f"Recall: {recall}")
    # print(f"F1 Score: {f1}")
    # print(f"ROC-AUC Score: {roc_auc}")
    # print(f"Confusion Matrix:\n{conf_matrix}")

    return accuracy, precision, recall, f1, roc_auc, conf_matrix

In [None]:
# Define parameters
model_type      = 'vit'
dataset_type    = 'dreyeve'
subset_layers   = 5

if model_type == 'vit':
    lambda_value = None
elif model_type == 'fax':
    lambda_value    = 0.1 # 0.01, 0.1, 0.2, 0.8, 1

train_data_cond = 'full'
test_data_cond  = 'full'
runs            = 10

data_path       = 'fixatt/data' # path to datasets
vit_version     = 'vit_base_patch16_224'
model_load_type = 'early_stopped'
validation      = True
device          = torch.device("cuda:1")

if dataset_type == 'vr':
    batch_size = 64
elif dataset_type == 'dreyeve':
    batch_size = 16

print(f'model_type: {model_type}, dataset_type: {dataset_type}, subset_layers: {subset_layers}, lambda_value: {lambda_value}')


# Initialize lists to store metrics for each run
accuracies = []
f1_scores = []
roc_auc_scores = []

for run in range(runs):
    # Define parameters
    random_state = run
    if random_state:
        torch.manual_seed(random_state)
        random.seed(random_state)
        np.random.seed(random_state)

    # Load data
    train_list, valid_list, test_list = get_split_data(dataset_type, data_path, train_data_cond, dataset_type, validation, random_state)
    train_loader, valid_loader, test_loader, train_data, valid_data, test_data = get_loaders(dataset_type, model_type, train_list, valid_list, test_list, 
                                                                                            batch_size, train_data_cond, test_data_cond)

    # Load model and loss function
    pretrained_path = f'fixatt/training/pre_trained_models/{dataset_type}_{test_data_cond}_{model_type}'    
    pretrained_weights = f'{pretrained_path}/trained_LR_driving_{run}_{subset_layers}_layer_{lambda_value}_lambda_{model_load_type}.pt'
    model = timm.create_model(vit_version, num_classes=2).to(device)
    if subset_layers:
        model.blocks = nn.Sequential(*[model.blocks[i] for i in range(subset_layers)])
    model.load_state_dict(torch.load(pretrained_weights, map_location=device))

    criterion = nn.CrossEntropyLoss()

    # Inference using test set
    all_labels, all_probs, all_preds, test_accuracy, test_total_loss = eval_dataset_v2(model, criterion, test_loader, test_data, device, batch_size, export_preds=True)

    accuracy, precision, recall, f1, roc_auc, conf_matrix = get_metrics(all_labels, all_preds)

    # Store the metrics
    accuracies.append(accuracy)
    f1_scores.append(f1)
    roc_auc_scores.append(roc_auc)

# Compute mean and std for each metric
accuracy_mean = np.mean(accuracies)
accuracy_std = np.std(accuracies)

f1_mean = np.mean(f1_scores)
f1_std = np.std(f1_scores)

roc_auc_mean = np.mean(roc_auc_scores)
roc_auc_std = np.std(roc_auc_scores)

# Print the mean ± std for accuracy, f1, and roc_auc scores
print(f'Accuracy: {accuracy_mean:.2f} ± {accuracy_std:.2f}')
print(f'ROC AUC Score: {roc_auc_mean:.2f} ± {roc_auc_std:.2f}')
print(f'F1 Score: {f1_mean:.2f} ± {f1_std:.2f}')

In [None]:
def plot_accuracies_from_csv(dataset, main_path):
    # Initialize variables for accumulating accuracies and counting the runs
    total_accuracy = 0
    count_runs = 0
    best_run = None
    best_accuracy = 0

    # Loop through all possible runs
    for run in range(10):  # Assuming runs from 0 to 9
        csv_file_name = f"trained_LR_driving_{run}_{subset_layers}_layer_{lambda_value}_lambda.csv"
            
        csv_file_path = Path(main_path) / csv_file_name
        
        # Check if the CSV file exists
        if csv_file_path.exists():
            df = pd.read_csv(csv_file_path)
            accuracy = df.iloc[-20]['test_accuracy']
            print(f'Accuracy for {dataset}, run {run}: {accuracy:.2f}')
            
            # Accumulate total accuracy and increment run count
            total_accuracy += accuracy
            count_runs += 1
            
            # Update the best run if this run has a higher accuracy
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_run = run

    # Calculate the average accuracy if there were any runs
    if count_runs > 0:
        average_accuracy = total_accuracy / count_runs
        print(f"For {dataset}: The average accuracy is {average_accuracy:.2f}")
    else:
        print(f"No runs found for {dataset}")

    # Print the best run and its accuracy
    print(f"For {dataset}: The best run is {best_run} with an accuracy of {best_accuracy:.2f}")

main_path = f'fixatt/training/pre_trained_models/{dataset_type}_full_{model_type}'
plot_accuracies_from_csv(dataset_type, main_path)