In [13]:
import torch
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
import numpy as np
from torchvision import transforms

# book keeping namings and code
from settings import img_size, prototype_shape, num_classes, \
                     prototype_activation_function, \
                     add_on_layers_type, \
                     num_test_examples, img_size, test_batch_size

from dataset_class import ECGImageDataset
import model_for_superclasses as model

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [14]:
# Define transformations
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    normalize,
])
# Function to create a subset of the dataset
def create_subset(dataset, num_examples):
    # Ensure num_examples doesn't exceed the dataset length
    num_examples = min(len(dataset), num_examples)
    indices = np.random.choice(len(dataset), num_examples, replace=False)
    subset = torch.utils.data.Subset(dataset, indices)
    return subset

# Initialize dataset and dataloader for testing
test_dataset = ECGImageDataset('test-100.json', transform=transform)

if num_test_examples is not None:
    test_subset = create_subset(test_dataset, num_test_examples)
else:
    test_subset = test_dataset

# Create data loader for the subset
test_loader = torch.utils.data.DataLoader(test_subset, batch_size=test_batch_size, shuffle=True, num_workers=4, pin_memory=False)

In [20]:
# Load the model
saved_model_path = 'saved_models/vgg19/14/20_17pushAUROC_0.9127.pth'

# construct the model
base_architecture = 'vgg19'
ppnet = model.construct_PPNet(base_architecture=base_architecture,
                              pretrained=True, img_size=img_size,
                              prototype_shape=prototype_shape,
                              num_classes=num_classes,
                              prototype_activation_function=prototype_activation_function,
                              add_on_layers_type=add_on_layers_type)
ppnet = ppnet.to(device)

In [21]:
# Load the model
ppnet.load_state_dict(torch.load(saved_model_path))
ppnet.eval()

  ppnet.load_state_dict(torch.load(saved_model_path))


PPNet(
	features: VGG19, batch_norm=False,
	img_size: 224,
	prototype_shape: (2272, 128, 1, 1),
	proto_layer_rf_info: [7, 32, 268, 16.0],
	num_classes: 5,
	epsilon: 0.0001
)

In [22]:
# List of heart conditions corresponding to the labels
heart_conditions = ['CD', 'HYP', 'MI', 'NORM', 'STTC']

# Define a function to test the model and calculate AUROC scores
def test_model(test_loader, model):
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc="Testing", leave=False):
            images = images.to(device)
            labels = labels[0].to(device)
            
            # Get model predictions
            outputs = model(images)[0]
            
            # Store predictions and labels
            all_preds.append(outputs.cpu())
            all_labels.append(labels.cpu())
    
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    
    # Calculate overall AUROC
    overall_auroc = roc_auc_score(all_labels, all_preds, average='macro', multi_class='ovr')
    print(f"Overall AUROC: {overall_auroc:.4f}")
    
    # Calculate AUROC for each individual class
    for i, condition in enumerate(heart_conditions):
        class_auroc = roc_auc_score(all_labels[:, i], all_preds[:, i])
        print(f"AUROC for {condition}: {class_auroc:.4f}")

# Test the model
test_model(test_loader, ppnet)

                                                                                                                                                                                     

Overall AUROC: 0.8970
AUROC for CD: 0.8651
AUROC for HYP: 0.8897
AUROC for MI: 0.8710
AUROC for NORM: 0.9357
AUROC for STTC: 0.9237




In [23]:
import torch
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
import numpy as np

# List of heart conditions corresponding to the labels
heart_conditions = ['CD', 'HYP', 'MI', 'NORM', 'STTC']

# Function to perform bootstrap sampling
def bootstrap_auroc(all_labels, all_preds, num_samples=1000):
    auroc_scores = []
    
    for _ in range(num_samples):
        # Sample with replacement
        indices = np.random.choice(range(len(all_labels)), size=len(all_labels), replace=True)
        sampled_labels = all_labels[indices]
        sampled_preds = all_preds[indices]
        
        # Compute AUROC for the sampled data
        sample_auroc = roc_auc_score(sampled_labels, sampled_preds, average='macro', multi_class='ovr')
        auroc_scores.append(sample_auroc)
    
    # Convert to numpy array for easy statistics
    auroc_scores = np.array(auroc_scores)
    
    # Calculate mean AUROC and confidence intervals
    mean_auroc = np.mean(auroc_scores)
    ci_lower = np.percentile(auroc_scores, 2.5)
    ci_upper = np.percentile(auroc_scores, 97.5)
    
    return mean_auroc, ci_lower, ci_upper

# Define a function to test the model and calculate AUROC scores with bootstrap sampling
def test_model_with_bootstrap(test_loader, model, num_samples=1000):
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc="Testing", leave=False):
            images = images.to(device)
            labels = labels[0].to(device)
            
            # Get model predictions
            outputs = model(images)[0]
            
            # Store predictions and labels
            all_preds.append(outputs.cpu())
            all_labels.append(labels.cpu())
    
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    
    # Convert to numpy arrays for bootstrap
    all_preds_np = all_preds.numpy()
    all_labels_np = all_labels.numpy()
    
    # Calculate overall AUROC using bootstrap sampling
    mean_auroc, ci_lower, ci_upper = bootstrap_auroc(all_labels_np, all_preds_np, num_samples)
    print(f"Overall AUROC (Bootstrap Mean): {mean_auroc:.4f}")
    print(f"95% Confidence Interval: [{ci_lower:.4f}, {ci_upper:.4f}]")
    
    # Calculate AUROC for each individual class
    for i, condition in enumerate(heart_conditions):
        class_auroc = roc_auc_score(all_labels_np[:, i], all_preds_np[:, i])
        print(f"AUROC for {condition}: {class_auroc:.4f}")

# Test the model with bootstrap sampling
test_model_with_bootstrap(test_loader, ppnet)


                                                                                                                                                                                     

Overall AUROC (Bootstrap Mean): 0.8969
95% Confidence Interval: [0.8873, 0.9058]
AUROC for CD: 0.8651
AUROC for HYP: 0.8897
AUROC for MI: 0.8710
AUROC for NORM: 0.9357
AUROC for STTC: 0.9237
