In [None]:
!git clone https://github.com/bearpaw/pytorch-classification.git

Cloning into 'pytorch-classification'...
remote: Enumerating objects: 287, done.[K
remote: Total 287 (delta 0), reused 0 (delta 0), pack-reused 287 (from 1)[K
Receiving objects: 100% (287/287), 440.37 KiB | 27.52 MiB/s, done.
Resolving deltas: 100% (167/167), done.


In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
!unzip -q /content/drive/Shareddrives/FML_Project/Project.zip -d /content/

In [None]:
import numpy as np
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from collections import OrderedDict
import matplotlib.pyplot as plt
import os
from sklearn.isotonic import IsotonicRegression # Import for Isotonic Regression

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Normalization stats for CIFAR-10
transform_cifar = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
])

# --- Load and Split Data (for CIFAR-10) ---
print("\nLoading and splitting CIFAR-10 data...")
try:
    full_test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cifar)
    val_size, test_size = 5000, 5000
    val_dataset, test_dataset = random_split(full_test_dataset, [val_size, test_size],
                                             generator=torch.Generator().manual_seed(42))
    val_loader = DataLoader(val_dataset, batch_size=100, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)
    print(f"Data successfully split from original CIFAR-10 test set:")
    print(f"  -> New Validation samples: {len(val_dataset)}")
    print(f"  -> New Test samples:       {len(test_dataset)}")
except Exception as e:
    print(f"‚ùå ERROR: Could not load CIFAR-10 data. {e}")
    exit()

# --- Helper Functions ---
def get_predictions(model, loader, device):
    model.eval()
    all_conf, all_corr = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            probs = F.softmax(logits, dim=1)
            conf, pred = torch.max(probs, 1)
            all_conf.extend(conf.cpu().numpy())
            all_corr.extend((pred == y).cpu().numpy())
    return np.array(all_conf), np.array(all_corr)

def calculate_ece(confidences, correct, n_bins=15):
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    ece = 0.0
    for i in range(n_bins):
        bin_lower, bin_upper = bin_boundaries[i], bin_boundaries[i + 1]
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        prop_in_bin = np.mean(in_bin)
        if prop_in_bin > 0:
            accuracy_in_bin = np.mean(correct[in_bin])
            avg_confidence_in_bin = np.mean(confidences[in_bin])
            ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
    return ece * 100

def plot_reliability_diagram(confidences, correct, n_bins, model_name, suffix):
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    bin_lowers, bin_uppers = bin_boundaries[:-1], bin_boundaries[1:]
    bin_accs, bin_confs, bin_props = np.zeros(n_bins), np.zeros(n_bins), np.zeros(n_bins)
    for i, (bin_lower, bin_upper) in enumerate(zip(bin_lowers, bin_uppers)):
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        bin_props[i] = np.mean(in_bin)
        if bin_props[i] > 0:
            bin_accs[i] = np.mean(correct[in_bin])
            bin_confs[i] = np.mean(confidences[in_bin])
    plt.figure(figsize=(8, 7))
    plt.bar(bin_lowers + 1/(2*n_bins), bin_accs, width=1/n_bins*0.9, alpha=0.3, color='red', label='Accuracy')
    plt.plot([0, 1], [0, 1], 'k--', label='Perfect Calibration')
    plt.xlabel('Confidence'); plt.ylabel('Accuracy'); plt.title(f'Reliability Diagram: {model_name} ({suffix})')
    plt.legend(); plt.xlim(0, 1); plt.ylim(0, 1)
    save_folder = "/content/cifar-10"; os.makedirs(save_folder, exist_ok=True)
    filename = f"{save_folder}/{model_name.replace(' ', '_')}_C10_reliability_{suffix}.png"
    plt.savefig(filename); plt.close()
    print(f"‚úÖ Saved reliability diagram: {filename}")

def load_checkpoint(model, path, device):
    print(f"Loading checkpoint: {path}")
    ckpt = torch.load(path, map_location=device, weights_only=False)
    state_dict = ckpt.get('state_dict', ckpt)
    new_sd = OrderedDict((k.replace('module.', ''), v) for k, v in state_dict.items())
    model.load_state_dict(new_sd)
    return model

# ================================================
# ISOTONIC REGRESSION CALIBRATION CLASS
# ================================================
class IsotonicRegressionCalibrator:
    def __init__(self):
        self.ir = IsotonicRegression(out_of_bounds="clip")

    def fit(self, confidences, correct):
        # We need probabilities to be sorted for IsotonicRegression to work correctly
        # However, `fit` method of `IsotonicRegression` sorts internally based on `x` (confidences)
        # So we just pass them as is.
        self.ir.fit(confidences, correct)

    def predict(self, confidences):
        return self.ir.predict(confidences)

# --- Main Calibration Function for Isotonic Regression ---
def run_isotonic_calibration(model, model_name, n_bins=15):
    print("\n" + "="*70)
    print(f"üìä Isotonic Regression Calibration for: {model_name} (CIFAR-10)")
    print("="*70)

    model.to(device).eval()

    # --- Predictions before calibration ---
    conf_b, corr_b = get_predictions(model, test_loader, device)
    acc_b = np.mean(corr_b) * 100
    ece_b = calculate_ece(conf_b, corr_b, n_bins)
    avg_conf_b = np.mean(conf_b) * 100

    print(f"\nBefore Calibration ‚Üí Acc: {acc_b:.2f}%, Conf: {avg_conf_b:.2f}%, ECE: {ece_b:.3f}%")
    plot_reliability_diagram(conf_b, corr_b, n_bins, model_name, "before_iso")

    # --- Fit Isotonic Regression on validation set ---
    conf_val, corr_val = get_predictions(model, val_loader, device)
    iso_reg = IsotonicRegressionCalibrator()
    iso_reg.fit(conf_val, corr_val)
    print(f"‚úÖ Isotonic Regression fitted on validation data")

    # --- Apply on test set ---
    calibrated_conf = iso_reg.predict(conf_b)
    ece_a = calculate_ece(calibrated_conf, corr_b, n_bins)
    avg_conf_a = np.mean(calibrated_conf) * 100

    print(f"\nAfter Calibration ‚Üí Acc: {acc_b:.2f}%, Conf: {avg_conf_a:.2f}%, ECE: {ece_a:.3f}%")
    plot_reliability_diagram(calibrated_conf, corr_b, n_bins, model_name, "after_iso")

    # --- Return result for table ---
    return {
        "name": model_name,
        "acc": acc_b,
        "ece_before": ece_b,
        "ece_after": ece_a,
        "conf_before": avg_conf_b,
        "conf_after": avg_conf_a,
    }

# =============================================
# --- RUN CALIBRATION FOR CIFAR-10 MODELS ---
# =============================================
all_results_iso = []

try:
    from models.cifar import resnet
    print("\n--- Running Isotonic Regression calibration for local ResNet-164 ---")
    model_resnet164 = resnet(depth=164, num_classes=10, block_name='Bottleneck')
    path = '/content/Project/resnet110cifar10/model_best.pth.tar'
    model_resnet164 = load_checkpoint(model_resnet164, path, device)
    results = run_isotonic_calibration(model_resnet164, "ResNet-164")
    all_results_iso.append(results)
except Exception as e:
    print(f"‚ùå Skipping local ResNet-164: {e}")

try:
    print("\n--- Running Isotonic Regression calibration for ResNet-56 (torch.hub) ---")
    model_hub = torch.hub.load("chenyaofo/pytorch-cifar-models", "cifar10_resnet56", pretrained=True, trust_repo=True)
    results = run_isotonic_calibration(model_hub, "ResNet-56 (Hub)")
    all_results_iso.append(results)
except Exception as e:
    print(f"‚ùå Skipping ResNet-56 (Hub): {e}")

if all_results_iso:
    print("\n" + "="*130)
    print("üìä Final Calibration Comparison on CIFAR-10 Test Set (Isotonic Regression)")
    print("="*130)
    print(f"{'Model':<22} | {'Accuracy':>10} | {'ECE (Before)':>13} | {'ECE (After)':>12} | {'Conf (Before)':>15} | {'Conf (After)':>15}")
    print("-"*130)
    for r in all_results_iso:
        print(f"{r['name']:<22} | {r['acc']:>9.2f}% | {r['ece_before']:>12.4f}% | {r['ece_after']:>11.4f}% | {r['conf_before']:>14.2f}% | {r['conf_after']:>14.2f}%")
    print("="*130)
else:
    print("\nNo models were successfully calibrated with Isotonic Regression.")

Using device: cuda

Loading and splitting CIFAR-10 data...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 170M/170M [00:14<00:00, 12.0MB/s]


Data successfully split from original CIFAR-10 test set:
  -> New Validation samples: 5000
  -> New Test samples:       5000
‚ùå Skipping local ResNet-164: No module named 'models'

--- Running Isotonic Regression calibration for ResNet-56 (torch.hub) ---
Downloading: "https://github.com/chenyaofo/pytorch-cifar-models/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/chenyaofo/pytorch-cifar-models/releases/download/resnet/cifar10_resnet56-187c023a.pt" to /root/.cache/torch/hub/checkpoints/cifar10_resnet56-187c023a.pt


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3.39M/3.39M [00:00<00:00, 56.9MB/s]



üìä Isotonic Regression Calibration for: ResNet-56 (Hub) (CIFAR-10)

Before Calibration ‚Üí Acc: 94.12%, Conf: 98.07%, ECE: 3.964%
‚úÖ Saved reliability diagram: /content/cifar-10/ResNet-56_(Hub)_C10_reliability_before_iso.png
‚úÖ Isotonic Regression fitted on validation data

After Calibration ‚Üí Acc: 94.12%, Conf: 94.53%, ECE: 0.841%
‚úÖ Saved reliability diagram: /content/cifar-10/ResNet-56_(Hub)_C10_reliability_after_iso.png

üìä Final Calibration Comparison on CIFAR-10 Test Set (Isotonic Regression)
Model                  |   Accuracy |  ECE (Before) |  ECE (After) |   Conf (Before) |    Conf (After)
----------------------------------------------------------------------------------------------------------------------------------
ResNet-56 (Hub)        |     94.12% |       3.9644% |      0.8414% |          98.07% |          94.53%


In [None]:
# ========================================================
# ISOTONIC REGRESSION CALIBRATION FOR CIFAR-100 MODELS
# ========================================================
%cd pytorch-classification/
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from collections import OrderedDict
import matplotlib.pyplot as plt
import os
from sklearn.isotonic import IsotonicRegression # Ensure this is imported

# Define device (re-use from previous cells, ensuring it's available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --- CIFAR-100 Data Loading and Transforms ---
transform_cifar100 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

print("\nLoading and splitting CIFAR-100 data for Isotonic Regression...")
try:
    full_test_dataset_cifar100 = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_cifar100)
    val_size_cifar100 = 5000
    test_size_cifar100 = len(full_test_dataset_cifar100) - val_size_cifar100

    if test_size_cifar100 <= 0:
        raise ValueError("Validation size exceeds CIFAR-100 test dataset size!")

    val_dataset_cifar100, test_dataset_cifar100 = random_split(
        full_test_dataset_cifar100, [val_size_cifar100, test_size_cifar100],
        generator=torch.Generator().manual_seed(42)
    )

    val_loader_cifar100 = DataLoader(val_dataset_cifar100, batch_size=100, shuffle=False, num_workers=2)
    test_loader_cifar100 = DataLoader(test_dataset_cifar100, batch_size=100, shuffle=False, num_workers=2)

    print(f"CIFAR-100 Data successfully split:")
    print(f"  -> Validation samples: {len(val_dataset_cifar100)}")
    print(f"  -> Test samples:       {len(test_dataset_cifar100)}")
except Exception as e:
    print(f"‚ùå ERROR: Could not load CIFAR-100 data. {e}")
    exit()

# --- Helper Functions (copied for self-containment) ---
def get_predictions(model, loader, device):
    model.eval()
    all_conf, all_corr = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            probs = F.softmax(logits, dim=1)
            conf, pred = torch.max(probs, 1)
            all_conf.extend(conf.cpu().numpy())
            all_corr.extend((pred == y).cpu().numpy())
    return np.array(all_conf), np.array(all_corr)

def calculate_ece(confidences, correct, n_bins=15):
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    ece = 0.0
    for i in range(n_bins):
        bin_lower, bin_upper = bin_boundaries[i], bin_boundaries[i + 1]
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        prop_in_bin = np.mean(in_bin)
        if prop_in_bin > 0:
            accuracy_in_bin = np.mean(correct[in_bin])
            avg_confidence_in_bin = np.mean(confidences[in_bin])
            ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
    return ece * 100

def plot_reliability_diagram(confidences, correct, n_bins, model_name, suffix):
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    bin_lowers, bin_uppers = bin_boundaries[:-1], bin_boundaries[1:]
    bin_accs, bin_confs, bin_props = np.zeros(n_bins), np.zeros(n_bins), np.zeros(n_bins)
    for i, (bin_lower, bin_upper) in enumerate(zip(bin_lowers, bin_uppers)):
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        bin_props[i] = np.mean(in_bin)
        if bin_props[i] > 0:
            bin_accs[i] = np.mean(correct[in_bin])
            bin_confs[i] = np.mean(confidences[in_bin])
    plt.figure(figsize=(8, 7))
    bar_width = 1.0 / n_bins
    bar_centers = bin_lowers + bar_width / 2
    non_empty_mask = bin_props > 0
    plt.bar(bar_centers[non_empty_mask], bin_accs[non_empty_mask], width=bar_width * 0.9, alpha=0.3, color='red', edgecolor='red', label='Accuracy')
    plt.plot([0, 1], [0, 1], 'k--', label='Perfect Calibration')
    plt.xlabel('Confidence'); plt.ylabel('Accuracy'); plt.title(f'Reliability Diagram: {model_name} ({suffix})')
    plt.legend(); plt.xlim(0, 1); plt.ylim(0, 1)
    save_folder = "/content/cifar-100"; os.makedirs(save_folder, exist_ok=True) # Changed folder to cifar-100
    filename = f"{save_folder}/{model_name.replace(' ', '_')}_C100_reliability_{suffix}.png" # Changed filename for cifar-100
    plt.savefig(filename); plt.close()
    print(f"‚úÖ Saved reliability diagram: {filename}")

def load_checkpoint(model, path, device):
    print(f"Loading checkpoint: {path}")
    try:
        ckpt = torch.load(path, map_location=device, weights_only=False)
    except Exception as e:
        print(f"Error loading checkpoint with weights_only=False: {e}")
        try:
            print("Attempting fallback with weights_only=True...")
            ckpt = torch.load(path, map_location=device, weights_only=True)
        except Exception as e_true:
            print(f"Fallback also failed: {e_true}")
            raise e

    if 'state_dict' in ckpt:
        state_dict = ckpt['state_dict']
    else:
        print("Warning: 'state_dict' key not found. Assuming checkpoint is the state_dict itself.")
        state_dict = ckpt

    new_sd = OrderedDict((k.replace('module.', ''), v) for k, v in state_dict.items())
    model.load_state_dict(new_sd)
    return model

# ================================================
# ISOTONIC REGRESSION CALIBRATION CLASS
# ================================================
class IsotonicRegressionCalibrator:
    def __init__(self):
        self.ir = IsotonicRegression(out_of_bounds="clip")

    def fit(self, confidences, correct):
        self.ir.fit(confidences, correct)

    def predict(self, confidences):
        return self.ir.predict(confidences)

# Re-define run_isotonic_calibration to accept specific loaders
def run_isotonic_calibration_general(model, model_name, val_loader, test_loader, dataset_name, n_bins=15):
    print("\n" + "="*70)
    print(f"üìä Isotonic Regression Calibration for: {model_name} ({dataset_name})")
    print("="*70)

    model.to(device).eval()

    # --- Predictions before calibration ---
    conf_b, corr_b = get_predictions(model, test_loader, device)
    acc_b = np.mean(corr_b) * 100
    ece_b = calculate_ece(conf_b, corr_b, n_bins)
    avg_conf_b = np.mean(conf_b) * 100

    print(f"\nBefore Calibration ‚Üí Acc: {acc_b:.2f}%, Conf: {avg_conf_b:.2f}%, ECE: {ece_b:.3f}%")
    plot_reliability_diagram(conf_b, corr_b, n_bins, model_name, f"before_iso_{dataset_name.replace(' ', '_')}")

    # --- Fit Isotonic Regression on validation set ---
    conf_val, corr_val = get_predictions(model, val_loader, device)
    iso_reg = IsotonicRegressionCalibrator()
    iso_reg.fit(conf_val, corr_val)
    print(f"‚úÖ Isotonic Regression fitted on validation data for {dataset_name}")

    # --- Apply on test set ---
    calibrated_conf = iso_reg.predict(conf_b)
    ece_a = calculate_ece(calibrated_conf, corr_b, n_bins)
    avg_conf_a = np.mean(calibrated_conf) * 100

    print(f"\nAfter Calibration ‚Üí Acc: {acc_b:.2f}%, Conf: {avg_conf_a:.2f}%, ECE: {ece_a:.3f}%")
    plot_reliability_diagram(calibrated_conf, corr_b, n_bins, model_name, f"after_iso_{dataset_name.replace(' ', '_')}")

    # --- Return result for table ---
    return {
        "name": model_name,
        "acc": acc_b,
        "ece_before": ece_b,
        "ece_after": ece_a,
        "conf_before": avg_conf_b,
        "conf_after": avg_conf_a,
    }

all_results_cifar100_iso = []

try:
    from models.cifar import resnet
    from models.cifar.densenet import densenet, Bottleneck
    from models.cifar.wrn import WideResNet
    print("Successfully imported models from 'models.cifar' directory.")
except ImportError:
    print("WARNING: Could not import model definitions. Using mocks.")
    # Define simple MockModel for cases where actual models fail to import
    class MockModel(nn.Module):
        def __init__(self, num_classes=100):
            super().__init__()
            # Adjust input features for a common CIFAR-100 image size (3x32x32)
            # A simple linear layer might not be suitable for actual image data
            # but serves as a placeholder to prevent immediate crashes if imports fail.
            self.fc = nn.Linear(32*32*3, num_classes) # Assuming flattened input for mock
        def forward(self, x):
            # Flatten the input tensor
            x = x.view(x.size(0), -1)
            return self.fc(x)

    # Re-define functions to return MockModel with appropriate num_classes
    def resnet_mock(depth, num_classes, block_name): return MockModel(num_classes=num_classes)
    def densenet_mock(depth, num_classes, growthRate, compressionRate, block): return MockModel(num_classes=num_classes)
    def wideresnet_mock(depth, num_classes, widen_factor, dropRate): return MockModel(num_classes=num_classes)

    # Assign mocks to the original names
    resnet = resnet_mock
    densenet = densenet_mock
    WideResNet = wideresnet_mock
    Bottleneck = None # Bottleneck is a class used by densenet, can't mock easily without full definition

try:
    model_resnet164_cifar100 = resnet(depth=164, num_classes=100, block_name='Bottleneck')
    path_resnet164 = '/content/Project/resnet164Cifar100/checkpoint.pth.tar'
    model_resnet164_cifar100 = load_checkpoint(model_resnet164_cifar100, path_resnet164, device)
    results = run_isotonic_calibration_general(model_resnet164_cifar100, "ResNet-164", val_loader_cifar100, test_loader_cifar100, "CIFAR-100")
    all_results_cifar100_iso.append(results)
except Exception as e:
    print(f"‚ùå Skipping ResNet-164 (CIFAR-100): {e}")

try:
    model_densenet_cifar100 = densenet(depth=190, num_classes=100, growthRate=40,
                              compressionRate=2, block=Bottleneck)
    path_densenet = '/content/Project/densenet190Cifar100/checkpoint.pth.tar'
    model_densenet_cifar100 = load_checkpoint(model_densenet_cifar100, path_densenet, device)
    results = run_isotonic_calibration_general(model_densenet_cifar100, "DenseNet-190", val_loader_cifar100, test_loader_cifar100, "CIFAR-100")
    all_results_cifar100_iso.append(results)
except Exception as e:
    print(f"‚ùå Skipping DenseNet-190 (CIFAR-100): {e}")

try:
    model_hub_cifar100 = torch.hub.load("chenyaofo/pytorch-cifar-models",
                               "cifar100_resnet56", pretrained=True, trust_repo=True)
    results = run_isotonic_calibration_general(model_hub_cifar100, "ResNet-56 (Hub)", val_loader_cifar100, test_loader_cifar100, "CIFAR-100")
    all_results_cifar100_iso.append(results)
except Exception as e:
    print(f"‚ùå Skipping ResNet-56 (Hub) (CIFAR-100): {e}")

try:
    model_wrn_cifar100 = WideResNet(depth=28, num_classes=100, widen_factor=10, dropRate=0.3)
    path_wrn = '/content/Project/WRNCifar100/checkpoint.pth.tar'
    model_wrn_cifar100 = load_checkpoint(model_wrn_cifar100, path_wrn, device)
    results = run_isotonic_calibration_general(model_wrn_cifar100, "WideResNet-28-10", val_loader_cifar100, test_loader_cifar100, "CIFAR-100")
    all_results_cifar100_iso.append(results)
except Exception as e:
    print(f"‚ùå Skipping WideResNet-28-10 (CIFAR-100): {e}")


if all_results_cifar100_iso:
    print("\n" + "="*130)
    print("üìä Final Isotonic Regression Comparison on CIFAR-100 Test Set")
    print("="*130)
    print(f"{'Model':<22} | {'Accuracy':>10} | {'ECE (Before)':>13} | {'ECE (After)':>12} | {'Conf (Before)':>15} | {'Conf (After)':>15}")
    print("-"*130)
    for r in all_results_cifar100_iso:
        print(f"{r['name']:<22} | {r['acc']:>9.2f}% | {r['ece_before']:>12.4f}% | {r['ece_after']:>11.4f}% | {r['conf_before']:>14.2f}% | {r['conf_after']:>14.2f}%")
    print("="*130)
else:
    print("\nNo CIFAR-100 models were successfully calibrated with Isotonic Regression.")

/content/pytorch-classification
Using device: cuda

Loading and splitting CIFAR-100 data for Isotonic Regression...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 169M/169M [00:13<00:00, 12.1MB/s]


CIFAR-100 Data successfully split:
  -> Validation samples: 5000
  -> Test samples:       5000
Successfully imported models from 'models.cifar' directory.
Loading checkpoint: /content/Project/resnet164Cifar100/checkpoint.pth.tar

üìä Isotonic Regression Calibration for: ResNet-164 (CIFAR-100)

Before Calibration ‚Üí Acc: 73.14%, Conf: 88.21%, ECE: 15.075%
‚úÖ Saved reliability diagram: /content/cifar-100/ResNet-164_C100_reliability_before_iso_CIFAR-100.png
‚úÖ Isotonic Regression fitted on validation data for CIFAR-100

After Calibration ‚Üí Acc: 73.14%, Conf: 74.05%, ECE: 1.903%
‚úÖ Saved reliability diagram: /content/cifar-100/ResNet-164_C100_reliability_after_iso_CIFAR-100.png
Loading checkpoint: /content/Project/densenet190Cifar100/checkpoint.pth.tar

üìä Isotonic Regression Calibration for: DenseNet-190 (CIFAR-100)

Before Calibration ‚Üí Acc: 82.26%, Conf: 89.55%, ECE: 7.335%
‚úÖ Saved reliability diagram: /content/cifar-100/DenseNet-190_C100_reliability_before_iso_CIFAR-100.pn

Using cache found in /root/.cache/torch/hub/chenyaofo_pytorch-cifar-models_master



Before Calibration ‚Üí Acc: 67.42%, Conf: 83.29%, ECE: 15.867%
‚úÖ Saved reliability diagram: /content/cifar-100/ResNet-56_(Hub)_C100_reliability_before_iso_CIFAR-100.png
‚úÖ Isotonic Regression fitted on validation data for CIFAR-100

After Calibration ‚Üí Acc: 67.42%, Conf: 67.20%, ECE: 1.941%
‚úÖ Saved reliability diagram: /content/cifar-100/ResNet-56_(Hub)_C100_reliability_after_iso_CIFAR-100.png
Loading checkpoint: /content/Project/WRNCifar100/checkpoint.pth.tar

üìä Isotonic Regression Calibration for: WideResNet-28-10 (CIFAR-100)

Before Calibration ‚Üí Acc: 81.74%, Conf: 87.70%, ECE: 6.450%
‚úÖ Saved reliability diagram: /content/cifar-100/WideResNet-28-10_C100_reliability_before_iso_CIFAR-100.png
‚úÖ Isotonic Regression fitted on validation data for CIFAR-100

After Calibration ‚Üí Acc: 81.74%, Conf: 81.54%, ECE: 1.323%
‚úÖ Saved reliability diagram: /content/cifar-100/WideResNet-28-10_C100_reliability_after_iso_CIFAR-100.png

üìä Final Isotonic Regression Comparison on CIF

In [None]:
import kagglehub
path = kagglehub.dataset_download("jutrera/stanford-car-dataset-by-classes-folder")
print(path)
# Corrected: Move the entire 'car_data' directory, not its contents directly into /content/
!mv {path}/* /content/

mv: inter-device move failed: '/kaggle/input/birds400/birds400' to '/content/birds400'; unable to remove target: Directory not empty


In [None]:
import kagglehub
path = kagglehub.dataset_download("antoniozarauzmoreno/birds400")
# Corrected: Move the entire 'birds400' directory, not its contents directly into /content/
!mv {path} /content/

Using Colab cache for faster access to the 'birds400' dataset.
mv: inter-device move failed: '/kaggle/input/birds400' to '/content/birds400'; unable to remove target: Directory not empty


In [None]:
# ========================================================
# ISOTONIC REGRESSION CALIBRATION FOR STANFORD CARS (MobileNetV2)
# ========================================================
%cd pytorch-classification/
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import os
from collections import OrderedDict
from sklearn.isotonic import IsotonicRegression # Ensure this is imported

# Define device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"==> Using device: {device}")

# --- Stanford Cars Data Loading and Transforms ---
data_transforms_cars = {
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
}

print("\nLoading and splitting Stanford Cars data for Isotonic Regression...")
try:
    # Corrected path to point to the moved 'car_data' directory
    full_test_dataset_cars = datasets.ImageFolder(
        '/content/car_data/car_data/test',
        data_transforms_cars['test']
    )

    num_classes_cars = len(full_test_dataset_cars.classes)
    print(f"Found {num_classes_cars} classes in the Stanford Cars dataset.")

    val_size_cars = 4000
    test_size_cars = len(full_test_dataset_cars) - val_size_cars

    if test_size_cars <= 0:
        raise ValueError("Validation size exceeds Stanford Cars test dataset size!")

    val_dataset_cars, test_dataset_cars = random_split(
        full_test_dataset_cars,
        [val_size_cars, test_size_cars],
        generator=torch.Generator().manual_seed(42)
    )

    val_loader_cars = DataLoader(val_dataset_cars, batch_size=32, shuffle=False, num_workers=2)
    test_loader_cars = DataLoader(test_dataset_cars, batch_size=32, shuffle=False, num_workers=2)

    print(f"Stanford Cars Data successfully split:")
    print(f"  -> Validation samples: {len(val_dataset_cars)}")
    print(f"  -> Test samples:       {len(test_dataset_cars)}")

except Exception as e:
    print(f"‚ùå ERROR loading Stanford Cars data: {e}")
    # Exit gracefully if data loading fails to prevent subsequent errors
    exit()

# --- Helper Functions (copied for self-containment) ---
def get_predictions(model, loader, device):
    model.eval()
    all_conf, all_corr = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            probs = F.softmax(logits, dim=1)
            conf, pred = torch.max(probs, 1)
            all_conf.extend(conf.cpu().numpy())
            all_corr.extend((pred == y).cpu().numpy())
    return np.array(all_conf), np.array(all_corr)

def calculate_ece(confidences, correct, n_bins=15):
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    ece = 0.0
    for i in range(n_bins):
        bin_lower, bin_upper = bin_boundaries[i], bin_boundaries[i + 1]
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        prop_in_bin = np.mean(in_bin)
        if prop_in_bin > 0:
            accuracy_in_bin = np.mean(correct[in_bin])
            avg_confidence_in_bin = np.mean(confidences[in_bin])
            ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
    return ece * 100

def plot_reliability_diagram(confidences, correct, n_bins, model_name, suffix):
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    bin_lowers, bin_uppers = bin_boundaries[:-1], bin_boundaries[1:]
    bin_accs, bin_confs, bin_props = np.zeros(n_bins), np.zeros(n_bins), np.zeros(n_bins)
    for i, (bin_lower, bin_upper) in enumerate(zip(bin_lowers, bin_uppers)):
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        bin_props[i] = np.mean(in_bin)
        if bin_props[i] > 0:
            bin_accs[i] = np.mean(correct[in_bin])
            bin_confs[i] = np.mean(confidences[in_bin])
    plt.figure(figsize=(8, 7))
    bar_width = 1.0 / n_bins
    bar_centers = bin_lowers + bar_width / 2
    non_empty_mask = bin_props > 0
    plt.bar(bar_centers[non_empty_mask], bin_accs[non_empty_mask], width=bar_width * 0.9, alpha=0.3, color='red', edgecolor='red', label='Accuracy')
    plt.plot([0, 1], [0, 1], 'k--', label='Perfect Calibration')
    plt.xlabel('Confidence'); plt.ylabel('Accuracy'); plt.title(f'Reliability Diagram: {model_name} ({suffix})')
    plt.legend(); plt.xlim(0, 1); plt.ylim(0, 1)
    save_folder = "/content/cars"; os.makedirs(save_folder, exist_ok=True)
    filename = f"{save_folder}/{model_name.replace(' ', '_')}_reliability_{suffix}.png"
    plt.savefig(filename); plt.close()
    print(f"‚úÖ Saved reliability diagram: {filename}")

def load_checkpoint(model, path, device):
    print(f"Loading checkpoint: {path}")
    ckpt = torch.load(path, map_location=device, weights_only=False)
    state_dict = ckpt.get('state_dict', ckpt)
    new_sd = OrderedDict((k.replace('module.', ''), v) for k, v in state_dict.items())
    model.load_state_dict(new_sd)
    return model

# ================================================
# ISOTONIC REGRESSION CALIBRATION CLASS
# ================================================
class IsotonicRegressionCalibrator:
    def __init__(self):
        self.ir = IsotonicRegression(out_of_bounds="clip")

    def fit(self, confidences, correct):
        self.ir.fit(confidences, correct)

    def predict(self, confidences):
        return self.ir.predict(confidences)

# --- Main Calibration Function for Isotonic Regression ---
def run_isotonic_calibration_general(model, model_name, val_loader, test_loader, dataset_name, n_bins=15):
    print("\n" + "="*70)
    print(f"üìä Isotonic Regression Calibration for: {model_name} ({dataset_name})")
    print("="*70)

    model.to(device).eval()

    # --- Predictions before calibration ---
    conf_b, corr_b = get_predictions(model, test_loader, device)
    acc_b = np.mean(corr_b) * 100
    ece_b = calculate_ece(conf_b, corr_b, n_bins)
    avg_conf_b = np.mean(conf_b) * 100

    print(f"\nBefore Calibration ‚Üí Acc: {acc_b:.2f}%, Conf: {avg_conf_b:.2f}%, ECE: {ece_b:.3f}%")
    plot_reliability_diagram(conf_b, corr_b, n_bins, model_name, f"before_iso_{dataset_name.replace(' ', '_')}")

    # --- Fit Isotonic Regression on validation set ---
    conf_val, corr_val = get_predictions(model, val_loader, device)
    iso_reg = IsotonicRegressionCalibrator()
    iso_reg.fit(conf_val, corr_val)
    print(f"‚úÖ Isotonic Regression fitted on validation data for {dataset_name}")

    # --- Apply on test set ---
    calibrated_conf = iso_reg.predict(conf_b)
    ece_a = calculate_ece(calibrated_conf, corr_b, n_bins)
    avg_conf_a = np.mean(calibrated_conf) * 100

    print(f"\nAfter Calibration ‚Üí Acc: {acc_b:.2f}%, Conf: {avg_conf_a:.2f}%, ECE: {ece_a:.3f}%")
    plot_reliability_diagram(calibrated_conf, corr_b, n_bins, model_name, f"after_iso_{dataset_name.replace(' ', '_')}")

    # --- Return result for table ---
    return {
        "name": model_name,
        "acc": acc_b,
        "ece_before": ece_b,
        "ece_after": ece_a,
        "conf_before": avg_conf_b,
        "conf_after": avg_conf_a,
    }

all_results_cars_iso = []

try:
    print("\nRe-creating and loading MobileNetV2 model for Stanford Cars...")
    model_mobilenet_cars = models.mobilenet_v2(weights=None)

    # num_classes_cars is defined within the try-except block for data loading
    if 'num_classes_cars' not in locals():
        raise NameError("num_classes_cars not defined. Data loading failed.")

    print(f"Building MobileNetV2 for {num_classes_cars} classes.")
    in_features_mobilenet = model_mobilenet_cars.classifier[1].in_features
    model_mobilenet_cars.classifier[1] = nn.Linear(in_features_mobilenet, num_classes_cars)
    model_mobilenet_cars.to(device)

    MODEL_PATH_MOBILENET = '/content/Project/MobilenetV2_Cars/model_best.pth'
    model_mobilenet_cars.load_state_dict(torch.load(MODEL_PATH_MOBILENET, map_location=device))
    print("‚úÖ MobileNetV2 model loaded successfully.")

    results = run_isotonic_calibration_general(model_mobilenet_cars, "MobileNetV2", val_loader_cars, test_loader_cars, "Stanford Cars")
    all_results_cars_iso.append(results)

except FileNotFoundError:
    print(f"‚ùå CRITICAL ERROR: Could not find model for MobileNetV2 at '{MODEL_PATH_MOBILENET}'")
except Exception as e:
    print(f"‚ùå An error occurred with MobileNetV2 (Stanford Cars): {e}")

if all_results_cars_iso:
    print("\n" + "="*130)
    print("üìä Final Isotonic Regression Comparison on Stanford Cars Test Set")
    print("="*130)
    print(f"{'Model':<22} | {'Accuracy':>10} | {'ECE (Before)':>13} | {'ECE (After)':>12} | {'Conf (Before)':>15} | {'Conf (After)':>15}")
    print("-"*130)
    for r in all_results_cars_iso:
        print(f"{r['name']:<22} | {r['acc']:>9.2f}% | {r['ece_before']:>12.4f}% | {r['ece_after']:>11.4f}% | {r['conf_before']:>14.2f}% | {r['conf_after']:>14.2f}%")
    print("="*130)
else:
    print("\nNo Stanford Cars models were successfully calibrated with Isotonic Regression.")

/content/pytorch-classification
==> Using device: cuda

Loading and splitting Stanford Cars data for Isotonic Regression...
Found 196 classes in the Stanford Cars dataset.
Stanford Cars Data successfully split:
  -> Validation samples: 4000
  -> Test samples:       4041

Re-creating and loading MobileNetV2 model for Stanford Cars...
Building MobileNetV2 for 196 classes.
‚úÖ MobileNetV2 model loaded successfully.

üìä Isotonic Regression Calibration for: MobileNetV2 (Stanford Cars)

Before Calibration ‚Üí Acc: 44.35%, Conf: 53.77%, ECE: 9.421%
‚úÖ Saved reliability diagram: /content/cars/MobileNetV2_reliability_before_iso_Stanford_Cars.png
‚úÖ Isotonic Regression fitted on validation data for Stanford Cars

After Calibration ‚Üí Acc: 44.35%, Conf: 44.13%, ECE: 1.525%
‚úÖ Saved reliability diagram: /content/cars/MobileNetV2_reliability_after_iso_Stanford_Cars.png

üìä Final Isotonic Regression Comparison on Stanford Cars Test Set
Model                  |   Accuracy |  ECE (Before) |  E

In [None]:
# ========================================================
# ISOTONIC REGRESSION CALIBRATION FOR BIRDS DATASET (InceptionV3)
# ========================================================
%cd pytorch-classification/
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import os
from collections import OrderedDict
from sklearn.isotonic import IsotonicRegression # Ensure this is imported
import torchvision # Needed for torch.serialization.add_safe_globals

# Define device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"==> Using device: {device}")

# --- Birds Dataset Data Loading and Transforms ---
data_transforms_birds = {
    'test': transforms.Compose([
        transforms.Resize(299),
        transforms.CenterCrop(299),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
}

print("\nLoading and splitting Birds Dataset data for Isotonic Regression...")
# Corrected path to point to the moved 'birds400' directory
BIRDS_TEST_PATH = '/content/birds400/birds400/test'

try:
    full_test_dataset_birds = datasets.ImageFolder(BIRDS_TEST_PATH, data_transforms_birds['test'])
    num_classes_birds = len(full_test_dataset_birds.classes)
    print(f"Found {num_classes_birds} classes in the Birds Dataset.")

    val_size_birds = 1000
    test_size_birds = len(full_test_dataset_birds) - val_size_birds

    if test_size_birds <= 0:
        raise ValueError("Validation size exceeds Birds Dataset test dataset size!")

    val_dataset_birds, test_dataset_birds = random_split(full_test_dataset_birds, [val_size_birds, test_size_birds],
                                             generator=torch.Generator().manual_seed(42))

    val_loader_birds = DataLoader(val_dataset_birds, batch_size=32, shuffle=False, num_workers=2)
    test_loader_birds = DataLoader(test_dataset_birds, batch_size=32, shuffle=False, num_workers=2)

    print(f"Birds Dataset Data successfully split:")
    print(f"  -> Validation samples: {len(val_dataset_birds)}")
    print(f"  -> Test samples:       {len(test_dataset_birds)}")

except FileNotFoundError:
    print(f"‚ùå ERROR: Birds Dataset directory not found at: {BIRDS_TEST_PATH}")
    print("Please make sure you ran the data preparation cell first.")
    exit()
except Exception as e:
    print(f"‚ùå An error occurred during Birds Dataset data loading: {e}")
    exit()

# --- Helper Functions (copied for self-containment) ---
def get_predictions(model, loader, device):
    model.eval()
    all_conf, all_corr = [], []
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            logits = model(inputs)
            if isinstance(logits, tuple):  # Handle InceptionV3 tuple output
                logits = logits.logits
            probs = F.softmax(logits, dim=1)
            conf, pred = torch.max(probs, 1)
            all_conf.extend(conf.cpu().numpy())
            all_corr.extend((pred == labels).cpu().numpy())
    return np.array(all_conf), np.array(all_corr)

def calculate_ece(confidences, correct, n_bins=15):
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    ece = 0.0
    for i in range(n_bins):
        bin_lower, bin_upper = bin_boundaries[i], bin_boundaries[i + 1]
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        prop_in_bin = np.mean(in_bin)
        if prop_in_bin > 0:
            accuracy_in_bin = np.mean(correct[in_bin])
            avg_confidence_in_bin = np.mean(confidences[in_bin])
            ece += np.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
    return ece * 100

def plot_reliability_diagram(confidences, correct, n_bins, model_name, suffix):
    bin_boundaries = np.linspace(0, 1, n_bins + 1)
    bin_lowers, bin_uppers = bin_boundaries[:-1], bin_boundaries[1:]
    bin_accs, bin_confs, bin_props = np.zeros(n_bins), np.zeros(n_bins), np.zeros(n_bins)
    for i, (bin_lower, bin_upper) in enumerate(zip(bin_lowers, bin_uppers)):
        in_bin = (confidences > bin_lower) & (confidences <= bin_upper)
        bin_props[i] = np.mean(in_bin)
        if bin_props[i] > 0:
            bin_accs[i] = np.mean(correct[in_bin])
            bin_confs[i] = np.mean(confidences[in_bin])
    plt.figure(figsize=(8, 7))
    bar_width = 1.0 / n_bins
    bar_centers = bin_lowers + bar_width / 2
    non_empty_mask = bin_props > 0
    plt.bar(bar_centers[non_empty_mask], bin_accs[non_empty_mask], width=bar_width * 0.9, alpha=0.3, color='red', edgecolor='red', label='Accuracy')
    plt.plot([0, 1], [0, 1], 'k--', label='Perfect Calibration')
    plt.xlabel('Confidence'); plt.ylabel('Accuracy'); plt.title(f'Reliability Diagram: {model_name} ({suffix})')
    plt.legend(); plt.xlim(0, 1); plt.ylim(0, 1)
    save_folder = "/content/birds"; os.makedirs(save_folder, exist_ok=True)
    filename = f"{save_folder}/{model_name.replace(' ', '_')}_reliability_{suffix}.png"
    plt.savefig(filename); plt.close()
    print(f"‚úÖ Saved reliability diagram: {filename}")

def load_checkpoint(model, path, device):
    print(f"Loading checkpoint: {path}")
    ckpt = torch.load(path, map_location=device, weights_only=False)
    state_dict = ckpt.get('state_dict', ckpt)
    new_sd = OrderedDict((k.replace('module.', ''), v) for k, v in state_dict.items())
    model.load_state_dict(new_sd)
    return model

# ================================================
# ISOTONIC REGRESSION CALIBRATION CLASS
# ================================================
class IsotonicRegressionCalibrator:
    def __init__(self):
        self.ir = IsotonicRegression(out_of_bounds="clip")

    def fit(self, confidences, correct):
        self.ir.fit(confidences, correct)

    def predict(self, confidences):
        return self.ir.predict(confidences)

# --- Main Calibration Function for Isotonic Regression ---
def run_isotonic_calibration_general(model, model_name, val_loader, test_loader, dataset_name, n_bins=15):
    print("\n" + "="*70)
    print(f"üìä Isotonic Regression Calibration for: {model_name} ({dataset_name})")
    print("="*70)

    model.to(device).eval()

    # --- Predictions before calibration ---
    conf_b, corr_b = get_predictions(model, test_loader, device)
    acc_b = np.mean(corr_b) * 100
    ece_b = calculate_ece(conf_b, corr_b, n_bins)
    avg_conf_b = np.mean(conf_b) * 100

    print(f"\nBefore Calibration ‚Üí Acc: {acc_b:.2f}%, Conf: {avg_conf_b:.2f}%, ECE: {ece_b:.3f}%")
    plot_reliability_diagram(conf_b, corr_b, n_bins, model_name, f"before_iso_{dataset_name.replace(' ', '_')}")

    # --- Fit Isotonic Regression on validation set ---
    conf_val, corr_val = get_predictions(model, val_loader, device)
    iso_reg = IsotonicRegressionCalibrator()
    iso_reg.fit(conf_val, corr_val)
    print(f"‚úÖ Isotonic Regression fitted on validation data for {dataset_name}")

    # --- Apply on test set ---
    calibrated_conf = iso_reg.predict(conf_b)
    ece_a = calculate_ece(calibrated_conf, corr_b, n_bins)
    avg_conf_a = np.mean(calibrated_conf) * 100

    print(f"\nAfter Calibration ‚Üí Acc: {acc_b:.2f}%, Conf: {avg_conf_a:.2f}%, ECE: {ece_a:.3f}%")
    plot_reliability_diagram(calibrated_conf, corr_b, n_bins, model_name, f"after_iso_{dataset_name.replace(' ', '_')}")

    # --- Return result for table ---
    return {
        "name": model_name,
        "acc": acc_b,
        "ece_before": ece_b,
        "ece_after": ece_a,
        "conf_before": avg_conf_b,
        "conf_after": avg_conf_a,
    }

all_results_birds_iso = []

try:
    model_name_birds = "InceptionV3_Fold9"
    MODEL_PATH_INCEPTION = "/content/Project/InceptionNetV3_Birds/inceptionv3_birds9.pth"

    print("\n" + "="*80)
    print(f"STARTING ISOTONIC REGRESSION FOR: {model_name_birds} from {MODEL_PATH_INCEPTION}")
    print("="*80)

    torch.serialization.add_safe_globals([torchvision.models.inception.Inception3])
    loaded_object_inception = torch.load(MODEL_PATH_INCEPTION, map_location=device, weights_only=False)

    if isinstance(loaded_object_inception, nn.Module):
        model_inception_birds = loaded_object_inception
        in_features_inception = model_inception_birds.fc.in_features
        if model_inception_birds.fc.out_features != num_classes_birds:
            model_inception_birds.fc = nn.Linear(in_features_inception, num_classes_birds)
        model_inception_birds.to(device)
    elif isinstance(loaded_object_inception, dict):
        model_inception_birds = models.inception_v3(weights=None, aux_logits=False, init_weights=False)
        in_features_inception = model_inception_birds.fc.in_features
        model_inception_birds.fc = nn.Linear(in_features_inception, num_classes_birds)
        model_inception_birds.to(device)
        state_dict_inception = loaded_object_inception.get('state_dict', loaded_object_inception)
        new_sd_inception = OrderedDict((k.replace('module.', ''), v) for k, v in state_dict_inception.items())
        model_inception_birds.load_state_dict(new_sd_inception)
    else:
        raise TypeError(f"Unexpected object type for InceptionV3: {type(loaded_object_inception)}")

    print("InceptionV3 model loaded successfully.")
    results = run_isotonic_calibration_general(model_inception_birds, model_name_birds, val_loader_birds, test_loader_birds, "Birds Dataset")
    all_results_birds_iso.append(results)

except FileNotFoundError:
    print(f"‚ùå CRITICAL ERROR: Could not find weights for InceptionV3 at '{MODEL_PATH_INCEPTION}'")
except Exception as e:
    print(f"‚ùå An error occurred with InceptionV3 (Birds Dataset): {e}")

if all_results_birds_iso:
    print("\n" + "="*130)
    print("üìä Final Isotonic Regression Comparison on Birds Dataset Test Set")
    print("="*130)
    print(f"{'Model':<22} | {'Accuracy':>10} | {'ECE (Before)':>13} | {'ECE (After)':>12} | {'Conf (Before)':>15} | {'Conf (After)':>15}")
    print("-"*130)
    for r in all_results_birds_iso:
        print(f"{r['name']:<22} | {r['acc']:>9.2f}% | {r['ece_before']:>12.4f}% | {r['ece_after']:>11.4f}% | {r['conf_before']:>14.2f}% | {r['conf_after']:>14.2f}%")
    print("="*130)
else:
    print("\nNo Birds Dataset models were successfully calibrated with Isotonic Regression.")

/content/pytorch-classification
==> Using device: cuda

Loading and splitting Birds Dataset data for Isotonic Regression...
Found 400 classes in the Birds Dataset.
Birds Dataset Data successfully split:
  -> Validation samples: 1000
  -> Test samples:       1000

STARTING ISOTONIC REGRESSION FOR: InceptionV3_Fold9 from /content/Project/InceptionNetV3_Birds/inceptionv3_birds9.pth
InceptionV3 model loaded successfully.

üìä Isotonic Regression Calibration for: InceptionV3_Fold9 (Birds Dataset)

Before Calibration ‚Üí Acc: 98.90%, Conf: 98.65%, ECE: 0.503%
‚úÖ Saved reliability diagram: /content/birds/InceptionV3_Fold9_reliability_before_iso_Birds_Dataset.png
‚úÖ Isotonic Regression fitted on validation data for Birds Dataset

After Calibration ‚Üí Acc: 98.90%, Conf: 99.72%, ECE: 0.825%
‚úÖ Saved reliability diagram: /content/birds/InceptionV3_Fold9_reliability_after_iso_Birds_Dataset.png

üìä Final Isotonic Regression Comparison on Birds Dataset Test Set
Model                  |   Accu