In [1]:
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Subset
from poutyne import Model
from sklearn.metrics import f1_score, recall_score, precision_score, confusion_matrix
from torchvision.datasets import ImageFolder
from custom_lib.data_prep import data_transformation_pipeline, data_loader
from custom_lib.eval_tools import bootstrap_evaluation_poutyne, tb_metrics_generator
import importlib
import torch
from torch import nn
import os
import re





# Set Bootstrap Params

In [2]:
data_dir = "~/Documents/data/"
external_data_folder = "mendeley_expanded_tb"
tb_class_index = 1
model_folder = "tb_results"
n_bootstraps = 1000

# Define Custom Functions

In [3]:
def load_model(model_name, **kwargs):
    """Dynamically loads and instantiates a model from custom_lib.custom_models."""
    module = importlib.import_module(f"custom_lib.custom_models.{model_name}")
    
    # Find the first class in the module (assuming only one model class per file)
    model_class = getattr(module, model_name, None)
    
    if model_class is None:
        raise ValueError(f"Could not find a class named '{model_name}' in '{module.__name__}'")

    return model_class(**kwargs)



 

In [4]:
def bootstrap_evaluation_poutyne(model, data, save_logs, n_bootstraps, seed, tb_class_index, results_dir=None):
    """
    Perform bootstrap evaluation of a model on a test dataset.

    Args:
        model: The trained Poutyne model to evaluate.
        data: The dataset to evaluate on (e.g., ImageFolder dataset).
        save_logs: Whether to save the metric distributions to CSV.
        n_bootstraps: Number of bootstrap samples to generate.
        seed: Random seed for reproducibility.
        results_dir: Directory to save the bootstrap distribution CSV.

    Returns:
        A pandas DataFrame with mean and confidence intervals for:
        - Accuracy
        - F1 Score
        - Sensitivity (Recall)
        - Specificity
        - Test Loss
    """
    rng = np.random.RandomState(seed)

    # Store bootstrapped metrics
    metrics = {
        "accuracy": [],
        "f1_score": [],
        "sensitivity": [],
        "specificity": [],
        "loss": [],
    }

    # Calculate 10% of the dataset size
    # subset_size = int(0.1 * len(data))

    for i in range(n_bootstraps):
        print(f"step {i + 1}/{n_bootstraps}")
        # Sample 10% of the data with replacement
        sampled_indices = rng.choice(len(data), len(data), replace=True)
        sampled_subset = Subset(data, sampled_indices)
        sampled_loader = DataLoader(sampled_subset, batch_size=32 * 2, shuffle=False)

        # Evaluate the model on the sampled subset
        sample_test_loss, sample_test_acc, sample_y_pred, sample_y_true = model.evaluate_generator(
            sampled_loader, 
            return_pred=True,
            return_ground_truth=True
        )

        sample_sens, sample_spec = tb_metrics_generator(y_pred=sample_y_pred, y_true=sample_y_true, tb_class_index=tb_class_index)
        sample_f1_score = 2 * (sample_sens * sample_spec) / (sample_spec + sample_sens)

        # Append metrics to the list
        metrics["accuracy"].append(sample_test_acc)
        metrics["loss"].append(sample_test_loss)
        metrics["sensitivity"].append(sample_sens)
        metrics["specificity"].append(sample_spec)
        metrics["f1_score"].append(sample_f1_score)

    # Convert metrics to a DataFrame
    metrics_df = pd.DataFrame(metrics)

    if save_logs:
        metrics_df.to_csv(f"{results_dir}/bootstrap_distribution.csv", index=False)

    # Calculate mean and confidence intervals
    mean_metrics = metrics_df.mean()
    confidence_intervals = metrics_df.apply(lambda x: np.percentile(x, [2.5, 97.5]))

    # Create a wide DataFrame for mean and confidence intervals
    results_df = pd.DataFrame()

    # Add mean, lower_ci, and upper_ci for each metric
    for metric in mean_metrics.index:
        results_df[f"{metric}_mean"] = [mean_metrics[metric]]
        results_df[f"{metric}_lower_ci"] = [confidence_intervals[metric][0]]  # 2.5th percentile
        results_df[f"{metric}_upper_ci"] = [confidence_intervals[metric][1]]  # 97.5th percentile

    if save_logs:
        results_df.to_csv(f"{results_dir}/metrics_df.csv", index=False)

    return results_df

In [5]:
device = (
        "cuda"
        if torch.cuda.is_available()
        else "mps"
        if torch.backends.mps.is_available()
        else "cpu"
        )
print(f"Using {device} device")

model = load_model(
        "truncated_b0",
        num_classes=2,
        removed_layers=0,
        batch_size=32,
        image_size=224,
        pretrained=True,
        dropout_p=.2
                )

Using mps device


In [6]:


# Create val transform
val_transform = data_transformation_pipeline(image_size = 224,
                                            center_crop=224, 
                                            normalize=True,
                                            is_train=False)

external_data_path = f"{data_dir}/{external_data_folder}"

# Apply transformations to dataset
external_data = ImageFolder(external_data_path, transform=val_transform)

external_test_loader = DataLoader(
                external_data, batch_size=32 * 2, num_workers=4, pin_memory=True, drop_last=False)



In [7]:
# subdirs = ["truncated_b0_reduced_layers_5_2025-03-07_18-23", "truncated_b0_reduced_layers_4_2025-03-07_17-50", "truncated_b0_reduced_layers_0_2025-03-10_13-48"]

# bootstrap_results = pd.DataFrame()

# for subdir in subdirs:
#     results_path = f"external_bootstrap_results1k/{subdir}"

#     # Regular expression to extract the number after "_layers_"
#     match = re.search(r'_layers_(\d+)', subdir)
#     removed_layers = int(match.group(1))

#     # Create the directory, ensuring parent directories exist
#     os.makedirs(results_path, exist_ok=True)

#     model = load_model(
#                 "truncated_b0",
#                 num_classes=2,
#                 removed_layers=removed_layers,
#                 batch_size=32,
#                 image_size=224,
#                 pretrained=True,
#                 dropout_p=.2
#                         )


#     poutyne_model = Model(
#                         model,
#                         optimizer=torch.optim.Adam(model.parameters(), lr=.001),
#                         loss_function=nn.CrossEntropyLoss(),
#                         batch_metrics=["accuracy"],
#                         device=device
#                         )
    
#     poutyne_model.network.load_state_dict(torch.load(f"{model_folder}/{subdir}/best_model.pth"))

    
#     new_rows = bootstrap_evaluation_poutyne(model=poutyne_model, seed=42, 
#                              data=external_data,
#                              save_logs=True, 
#                              n_bootstraps=n_bootstraps,
#                              tb_class_index=tb_class_index,
#                              results_dir=results_path)

#     new_rows['model_name'] = subdir

#     new_rows['trunc_blocks'] = removed_layers

#     bootstrap_results = pd.concat([bootstrap_results, new_rows])


# bootstrap_results.to_csv(f"external_bootstrap_results1k/bootstrap_results.csv", index=False)

In [11]:
os.makedirs('external_bootstrap_results1k_new/truncated_b0_act1_reduced_layers_3_2025-03-16_13-28', exist_ok=True)


model = load_model(
                "truncated_b0_act1",
                num_classes=2,
                removed_layers=3,
                batch_size=32,
                image_size=224,
                pretrained=True,
                dropout_p=.2
                        )


poutyne_model = Model(
                    model,
                    optimizer=torch.optim.Adam(model.parameters(), lr=.001),
                    loss_function=nn.CrossEntropyLoss(),
                    batch_metrics=["accuracy"],
                    device=device
                    )
    
poutyne_model.network.load_state_dict(torch.load(f"tb_results_new/truncated_b0_act1_reduced_layers_3_2025-03-16_13-28/best_model.pth"))



  poutyne_model.network.load_state_dict(torch.load(f"tb_results_new/truncated_b0_act1_reduced_layers_3_2025-03-16_13-28/best_model.pth"))


<All keys matched successfully>

In [14]:
new_row =  bootstrap_evaluation_poutyne(model=poutyne_model, seed=42, 
                             data=external_data,
                             save_logs=True, 
                             n_bootstraps=1000,
                             tb_class_index=tb_class_index,
                             results_dir='external_bootstrap_results1k_new/truncated_b0_act1_reduced_layers_3_2025-03-16_13-28')

step 1/1000
Test steps: 76 10.92s test_loss: 0.084357 test_acc: 97.006237                                 
step 2/1000
Test steps: 76 10.24s test_loss: 0.074349 test_acc: 97.463617                                
step 3/1000
Test steps: 76 10.22s test_loss: 0.074839 test_acc: 97.151767                                
step 4/1000
Test steps: 76 10.37s test_loss: 0.071837 test_acc: 97.463617                                
step 5/1000
Test steps: 76 10.42s test_loss: 0.089625 test_acc: 96.923077                                
step 6/1000
Test steps: 76 10.28s test_loss: 0.079257 test_acc: 97.027027                                
step 7/1000
Test steps: 76 10.23s test_loss: 0.065345 test_acc: 97.588358                                
step 8/1000
Test steps: 76 10.20s test_loss: 0.069853 test_acc: 97.484407                                 
step 9/1000
Test steps: 76 10.30s test_loss: 0.065735 test_acc: 97.879418                                
step 10/1000
Test steps: 76 10.46s test_loss

In [15]:
new_row

Unnamed: 0,accuracy_mean,accuracy_lower_ci,accuracy_upper_ci,f1_score_mean,f1_score_lower_ci,f1_score_upper_ci,sensitivity_mean,sensitivity_lower_ci,sensitivity_upper_ci,specificity_mean,specificity_lower_ci,specificity_upper_ci,loss_mean,loss_lower_ci,loss_upper_ci
0,97.392495,96.943347,97.837838,0.973039,0.968211,0.977717,0.989652,0.985317,0.993228,0.956985,0.948695,0.964807,0.071865,0.059795,0.083496
