Cell 1: Imports and Global Configuration

In [1]:
import os
import json
import pandas as pd
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import time
from pathlib import Path
import gc
from thop import profile
import torch_pruning as tp
import re
import traceback # Keep for detailed error messages

print("--- Notebook Setup: Imports completed ---")

# --- Configuration ---
ROOT_DIR = "saved_models_and_logs"
OUTPUT_CSV_NB = "model_optimization_summary_notebook.csv"
DEFAULT_NUM_CLASSES = 1000
FIXED_NUM_CLASSES = 1000 # For model reconstruction consistency

# --- Uniform Evaluation Configuration ---
VALIDATION_DATA_PATH = "imagenet-mini/val" # MAKE SURE THIS PATH IS CORRECT
BATCH_SIZE_EVAL = 32
NUM_WORKERS_EVAL = 0 # Set to 0 for Windows or if issues, >0 for Linux if beneficial
MAX_EVAL_BATCHES = 125 # Max batches for accuracy evaluation (set to float('inf') for all)

# --- Device and Input Tensors ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

INPUT_TENSOR_CPU = torch.randn(1, 3, 224, 224)
INPUT_TENSOR_GPU = None
if DEVICE.type == 'cuda':
    try:
        INPUT_TENSOR_GPU = INPUT_TENSOR_CPU.to(DEVICE)
    except Exception as e_cuda_init:
        print(f"ERROR initializing INPUT_TENSOR_GPU on CUDA: {e_cuda_init}")
        INPUT_TENSOR_GPU = None # Ensure it's None if failed

WARMUP_INFERENCES = 2
TIMED_INFERENCES = 5

GPU_UNSTABLE_QUANTIZED_MODELS = [
    "resnet18pretrained_distilled_quant_ptq_int8_perchannel_post",
    "resnet18pretrained_distilled_quant_ptq_int8_pertensor_post",
    "resnet18pretrained_distilled_quant_qat_int8_epochs8",
    "resnet50_quant_ptq_int8_perchannel_post",
    "resnet50_quant_ptq_int8_pertensor_post",
    "resnet50_quant_qat_int8_epochs8",
]

# --- DataFrame to store all results ---
# We'll populate this as we go
results_df = pd.DataFrame()
current_eval_experiment_id_nb = "" # For logging within evaluate_model_uniformly
baseline_metrics_nb = {} # To store baseline metrics for relative calculations

--- Notebook Setup: Imports completed ---
Using device: cuda


Cell 2: Core Helper Functions (Path, Size, Pruning Reconstruction Logic)

In [2]:
# --- Helper: Image Transforms ---
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
eval_transforms = transforms.Compose([
    transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize,
])

# --- Helper: Model File and Size ---
def get_model_file_path_nb(experiment_path_str):
    experiment_path = Path(experiment_path_str)
    # Prioritize 'model_final.pth' as it's common in your structured pruning logs
    specific_model_file = experiment_path / "model_final.pth"
    if specific_model_file.exists():
        return str(specific_model_file)
        
    pth_files = list(experiment_path.glob("*.pth"))
    if pth_files:
        for common_name in ["model_quantized.pth"]: # Check other common names
            for p_file in pth_files:
                if p_file.name == common_name: return str(p_file)
        for p_file in pth_files: # Specific baseline names
            if "baseline_ft_imagenetmini_final.pth" in p_file.name: return str(p_file)
        # Fallback: return the first .pth file found if common names aren't present
        # print(f"    Note ({experiment_path.name}): Using first .pth file found: {pth_files[0]} as no common/specific name matched.")
        return str(pth_files[0])
    # print(f"    Warning ({experiment_path.name}): No .pth file found in {experiment_path_str}")
    return None

def get_model_size_mb_nb(model_path_str):
    if model_path_str and os.path.exists(model_path_str):
        return os.path.getsize(model_path_str) / (1024 * 1024)
    return None

# --- Model Definition and Pruning Application (FROM SCRIPT 1) ---
def get_base_resnet50_model_for_reconstruction_nb():
    model = models.resnet50(weights=None, num_classes=FIXED_NUM_CLASSES)
    return model

def apply_structured_pruning_to_model_for_reconstruction_nb(
    model_to_prune, example_inputs, target_pruning_rate_per_layer, device_obj
):
    model_to_prune.to(device_obj)
    example_inputs = example_inputs.to(device_obj)
    ignored_layers = []
    for name, m in model_to_prune.named_modules():
        if isinstance(m, nn.Linear) and m.out_features == FIXED_NUM_CLASSES:
            ignored_layers.append(m)
    try:
        importance = tp.importance.MagnitudeImportance(p=1) # L1 norm
        pruner = tp.pruner.MagnitudePruner(
            model=model_to_prune, example_inputs=example_inputs, importance=importance,
            iterative_steps=1, pruning_ratio=target_pruning_rate_per_layer,
            global_pruning=False, ignored_layers=ignored_layers,
        )
        pruner.step()
    except Exception as e_prune:
        print(f"      ERROR during tp.pruner.MagnitudePruner step (rate {target_pruning_rate_per_layer}): {e_prune}")
        return None # Indicate failure
    return model_to_prune

def get_pruning_config_from_log_for_reconstruction_nb(log_file_path_str):
    """Helper to load log and extract key pruning param for a single stage/one-shot."""
    log_file_path = Path(log_file_path_str)
    if not log_file_path.exists():
        # print(f"    Log file not found: {log_file_path}")
        return None
    try:
        with open(log_file_path, 'r') as f:
            log_data = json.load(f)

        # For one-shot, directly from config_details
        if 'config_details' in log_data and 'target_filter_pruning_rate_per_layer' in log_data['config_details']:
            rate = log_data['config_details']['target_filter_pruning_rate_per_layer']
            if rate is not None: return {'type': 'one-shot', 'rate': float(rate)}

        # For a single iterative stage, get its own applied rate
        if 'config_details' in log_data and 'applied_step_rate_for_this_stage' in log_data['config_details']:
            rate = log_data['config_details']['applied_step_rate_for_this_stage']
            if rate is not None: return {'type': 'iterative_step', 'rate': float(rate)}
        
        # Fallback for some iterative logs that might only have overall target at a specific stage
        if 'config_details' in log_data and 'target_overall_sparsity_approx_for_this_stage' in log_data['config_details']:
            rate = log_data['config_details']['target_overall_sparsity_approx_for_this_stage']
            if rate is not None:
                # print(f"    Warning: Found 'target_overall_sparsity_approx_for_this_stage' ({rate}) in {log_file_path}. Using for iterative_step reconstruction rate.")
                return {'type': 'iterative_step', 'rate': float(rate)}

    except json.JSONDecodeError:
        print(f"    Error decoding JSON from {log_file_path}")
    except Exception as e:
        print(f"    Error processing log {log_file_path}: {e}")
    return None

# Renamed and refactored: Takes a pre-compiled pruning_config
def _reconstruct_model_arch_and_load_weights_nb(model_path_str, device_obj, pruning_config, exp_id_for_log=""):
    """
    Reconstructs model architecture based on pruning_config and loads weights.
    pruning_config must be pre-determined (cumulative for iterative models).
    """
    # print(f"    ({exp_id_for_log}) _reconstruct_model_arch_and_load_weights_nb for: {model_path_str} with config: {pruning_config}")
    
    if not pruning_config:
        print(f"    ERROR ({exp_id_for_log}): No pruning_config provided for {model_path_str}.")
        return None

    reconstructed_model = get_base_resnet50_model_for_reconstruction_nb() # Assuming ResNet50 for structured
    reconstructed_model.to(device_obj)
    example_inputs_local = INPUT_TENSOR_CPU.to(device_obj) # Use CPU tensor, move to device

    try:
        if pruning_config['type'] == 'one-shot':
            rate = pruning_config['rate']
            # print(f"      ({exp_id_for_log}) Applying one-shot pruning for reconstruction with rate {rate}")
            reconstructed_model = apply_structured_pruning_to_model_for_reconstruction_nb(
                reconstructed_model, example_inputs_local, rate, device_obj)
        elif pruning_config['type'] == 'iterative':
            step_rates = pruning_config.get('step_rates', [])
            if not step_rates:
                print(f"    ERROR ({exp_id_for_log}): Iterative pruning_config for {model_path_str} has no step_rates.")
                return None
            current_arch_model = reconstructed_model
            # print(f"      ({exp_id_for_log}) Applying iterative reconstruction with rates: {step_rates}")
            for i, step_rate in enumerate(step_rates):
                # print(f"        ({exp_id_for_log}) Applying iterative step {i+1} with rate {step_rate}")
                current_arch_model = apply_structured_pruning_to_model_for_reconstruction_nb(
                    current_arch_model, example_inputs_local, step_rate, device_obj)
                if current_arch_model is None:
                    print(f"      ERROR ({exp_id_for_log}): Iterative pruning step {i+1} (rate {step_rate}) failed during reconstruction for {model_path_str}.")
                    return None
            reconstructed_model = current_arch_model
        else:
            print(f"    ERROR ({exp_id_for_log}): Unknown pruning_config type: {pruning_config.get('type')} for {model_path_str}")
            return None
        
        if reconstructed_model is None:
            print(f"    ERROR ({exp_id_for_log}): Model became None after pruning application for {model_path_str}")
            return None

        # Load state_dict with weights_only=True where possible
        state_dict = torch.load(model_path_str, map_location=device_obj, weights_only=True)
        
        if all(key.startswith('module.') for key in state_dict.keys()):
            state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
        if 'model' in state_dict and isinstance(state_dict['model'], dict): state_dict = state_dict['model']
        elif 'state_dict' in state_dict and isinstance(state_dict['state_dict'], dict): state_dict = state_dict['state_dict']
        
        reconstructed_model.load_state_dict(state_dict)
        # print(f"    ({exp_id_for_log}) State_dict loaded into RECONSTRUCTED structured model: {model_path_str}")
        reconstructed_model.eval()
        return reconstructed_model
    except Exception as e:
        print(f"    ERROR in _reconstruct_model_arch_and_load_weights_nb for {model_path_str} ({exp_id_for_log}): {e}")
        # import traceback; traceback.print_exc()
        return None

print("--- Helper functions defined ---")

--- Helper functions defined ---


Cell 3: Central Model Loading Function

In [3]:
# Added all_experiments_df to signature
def load_model_for_experiment_nb(exp_info, all_experiments_df, target_device_str='cpu'):
    """
    Loads a model for a given experiment. Handles structured pruning reconstruction.
    exp_info: Pandas Series for the current experiment.
    all_experiments_df: The complete DataFrame of all discovered experiments (for lookups).
    target_device_str: 'cpu' or 'cuda'.
    Returns: loaded_model_obj or None
    """
    model_path = exp_info.get('Model_File_Path')
    exp_path_str = exp_info.get('Experiment_Path') # Directory of the experiment
    base_arch = exp_info.get('Base_Model_Arch')
    num_classes = exp_info.get('Num_Classes', DEFAULT_NUM_CLASSES)
    is_structured = exp_info.get('Is_Structured_Pruning', False)
    exp_id = exp_info.get('Experiment_ID', 'Unknown_Exp')

    if not model_path or not os.path.exists(model_path):
        print(f"      ERROR ({exp_id}): Model file not found at {model_path}")
        return None
    if os.path.getsize(model_path) == 0:
        print(f"      ERROR ({exp_id}): Model file is 0 bytes: {model_path}")
        return None

    device_to_load_on = torch.device(target_device_str)
    loaded_model = None
    
    # Attempt 1: JIT load (common for some quantized/pruned models)
    try:
        loaded_model = torch.jit.load(model_path, map_location=device_to_load_on)
        # print(f"      INFO ({exp_id}): Model loaded using torch.jit.load() on {device_to_load_on}")
        loaded_model.eval()
        return loaded_model
    except Exception:
        pass # Silently try next method

    # Attempt 2: Structured Pruning Reconstruction (if applicable)
    if is_structured:
        # print(f"      INFO ({exp_id}): Structured pruning experiment. Constructing pruning config.")
        pruning_config_for_reconstruction = None
        
        base_exp_name_iter = exp_info.get('Base_Exp_Name_Iterative')
        stage_num_iter = exp_info.get('Stage_Num_Iterative')

        if base_exp_name_iter and stage_num_iter is not None: # It's an iterative model
            # print(f"        Iterative model detected: {base_exp_name_iter}, stage {stage_num_iter}")
            cumulative_step_rates = []
            
            # Find all stages of this iterative experiment from the main DataFrame
            # Ensure we only consider stages up to and including the current one
            relevant_stages_info = all_experiments_df[
                (all_experiments_df['Base_Exp_Name_Iterative'] == base_exp_name_iter) &
                (all_experiments_df['Stage_Num_Iterative'] <= stage_num_iter) &
                (all_experiments_df['Stage_Num_Iterative'].notna()) # Ensure stage number is not NaN
            ].sort_values(by='Stage_Num_Iterative')
            
            # print(f"          Found {len(relevant_stages_info)} stages up to current for '{exp_id}'.")

            for _, stage_row in relevant_stages_info.iterrows():
                stage_log_path = stage_row.get('Log_Path')
                stage_exp_id = stage_row.get('Experiment_ID') # For logging
                # print(f"            Processing previous/current stage {stage_row.get('Stage_Num_Iterative')} (log: {stage_log_path}) for rate.")
                stage_log_pruning_info = get_pruning_config_from_log_for_reconstruction_nb(stage_log_path)
                
                if stage_log_pruning_info and stage_log_pruning_info.get('type') == 'iterative_step':
                    cumulative_step_rates.append(stage_log_pruning_info['rate'])
                else:
                    print(f"        WARNING ({exp_id}): Could not get 'iterative_step' rate for sibling/self '{stage_exp_id}' (stage {stage_row.get('Stage_Num_Iterative')}). Full reconstruction may fail.")
                    cumulative_step_rates = [] # Invalidate if any stage's rate is missing
                    break 
            
            if cumulative_step_rates:
                pruning_config_for_reconstruction = {'type': 'iterative', 'step_rates': cumulative_step_rates}
            else:
                 print(f"        ERROR ({exp_id}): Failed to build cumulative step rates for iterative model. Reconstruction might fail.")

        else: # It's one-shot structured
            # print(f"        One-shot structured model: {exp_id}")
            log_path_current_exp = exp_info.get('Log_Path')
            one_shot_pruning_info = get_pruning_config_from_log_for_reconstruction_nb(log_path_current_exp)
            if one_shot_pruning_info and one_shot_pruning_info.get('type') == 'one-shot':
                pruning_config_for_reconstruction = one_shot_pruning_info
            # Handle case where an iterative log might be mistakenly parsed as one-shot if stage info is missing
            elif one_shot_pruning_info and one_shot_pruning_info.get('type') == 'iterative_step':
                print(f"        Warning ({exp_id}): Found 'iterative_step' in log but no base_exp_name/stage. Treating as one-shot with rate {one_shot_pruning_info['rate']}.")
                pruning_config_for_reconstruction = {'type': 'one-shot', 'rate': one_shot_pruning_info['rate']}
            else:
                print(f"        ERROR ({exp_id}): Could not get 'one-shot' pruning config from log {log_path_current_exp}.")

        if pruning_config_for_reconstruction:
            # print(f"      Attempting reconstruction for {exp_id} with derived config: {pruning_config_for_reconstruction}")
            reconstructed = _reconstruct_model_arch_and_load_weights_nb(
                model_path, device_to_load_on, pruning_config_for_reconstruction, exp_id
            )
            if reconstructed:
                # print(f"      INFO ({exp_id}): Successfully reconstructed structured model on {device_to_load_on}.")
                reconstructed.eval()
                return reconstructed
            else:
                print(f"      WARNING ({exp_id}): Failed to reconstruct structured model with derived config. Will try standard load as fallback.")
        else:
            print(f"      WARNING ({exp_id}): Could not determine a valid pruning_config_for_reconstruction for structured model. Will try standard load as fallback.")
            # Fall through to standard loading if config determination or reconstruction fails

    # Attempt 3: Standard torch.load (full model or state_dict) - Fallback
    try:
        # For full model (if torch.save(model, path) was used), weights_only must be False (current default)
        _raw_loaded_content = torch.load(model_path, map_location=device_to_load_on) # weights_only=False default
        if isinstance(_raw_loaded_content, torch.nn.Module):
            loaded_model = _raw_loaded_content
            # print(f"      INFO ({exp_id}): Model loaded as full nn.Module on {device_to_load_on} (fallback).")
        elif isinstance(_raw_loaded_content, dict): # Likely a state_dict
            # print(f"      INFO ({exp_id}): Content is dict, attempting state_dict load for {base_arch} on {device_to_load_on} (fallback).")
            if base_arch == "ResNet18": model_instance = models.resnet18(weights=None, num_classes=num_classes)
            elif base_arch == "ResNet50": model_instance = models.resnet50(weights=None, num_classes=num_classes)
            else:
                print(f"      ERROR ({exp_id}): Unknown base_arch '{base_arch}' for fallback state_dict load.")
                return None
            
            state_dict = _raw_loaded_content
            if any(k.startswith('module.') for k in state_dict.keys()):
                state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
            if 'model' in state_dict and isinstance(state_dict['model'], dict): state_dict = state_dict['model']
            elif 'state_dict' in state_dict and isinstance(state_dict['state_dict'], dict): state_dict = state_dict['state_dict']

            model_instance.load_state_dict(state_dict) # This might fail if arch is wrong
            loaded_model = model_instance
            # print(f"      INFO ({exp_id}): Fallback state_dict loaded into {base_arch} instance on {device_to_load_on}.")
        else:
            print(f"      ERROR ({exp_id}): Fallback loaded object is neither nn.Module nor dict: {type(_raw_loaded_content)}")
            return None
        
        if loaded_model:
            loaded_model.eval()
            return loaded_model.to(device_to_load_on)

    except RuntimeError as e_load:
        if "Error(s) in loading state_dict" in str(e_load):
             # This is expected if structured pruning reconstruction failed and we try to load into a base model.
             print(f"      INFO ({exp_id}): Fallback state_dict load failed (likely arch mismatch for structured model, as reconstruction may have failed): {str(e_load).splitlines()[0]}")
        else: # Other RuntimeError
            print(f"      ERROR ({exp_id}): During fallback torch.load or state_dict assignment: {str(e_load).splitlines()[0]}")
        return None
    except Exception as e_gen_load:
        print(f"      ERROR ({exp_id}): General error during fallback model loading: {str(e_gen_load).splitlines()[0]}")
        return None
        
    print(f"      ERROR ({exp_id}): Model could not be loaded by any method.")
    return None

print("--- Central model loader defined ---")

--- Central model loader defined ---


Cell 4: Experiment Discovery and DataFrame Initialization

In [4]:
def discover_experiments_nb():
    print(f"--- Discovering experiments in: {ROOT_DIR} ---")
    discovered_experiments = []
    if not os.path.exists(ROOT_DIR):
        print(f"ERROR: ROOT_DIR '{ROOT_DIR}' does not exist!")
        return pd.DataFrame()

    # First, process baselines
    for cat_name_outer in os.listdir(ROOT_DIR):
        cat_path_outer = os.path.join(ROOT_DIR, cat_name_outer)
        if os.path.isdir(cat_path_outer) and ("baseline" in cat_name_outer.lower()):
            exp_name = cat_name_outer
            exp_path = cat_path_outer
            
            base_arch = "Unknown"
            if "resnet18" in exp_name.lower(): base_arch = "ResNet18"
            elif "resnet50" in exp_name.lower(): base_arch = "ResNet50"

            model_file = get_model_file_path_nb(exp_path)
            log_path = os.path.join(exp_path, "log.json")
            num_classes = DEFAULT_NUM_CLASSES
            config_details, training_summary, original_eval_metrics = {}, {}, {}
            if os.path.exists(log_path):
                try:
                    with open(log_path, 'r') as f: log_data_temp = json.load(f)
                    config_details = log_data_temp.get('config_details', {})
                    training_summary = log_data_temp.get('training_summary', {})
                    original_eval_metrics = log_data_temp.get('original_evaluation_metrics_from_log', {})
                    num_classes = config_details.get('num_classes', DEFAULT_NUM_CLASSES)
                except json.JSONDecodeError:
                    print(f"    Warning: JSON error in {log_path} for baseline {exp_name}")
                except Exception as e_log_parse_base:
                     print(f"    Warning: Error parsing log {log_path} for baseline {exp_name}: {e_log_parse_base}")


            exp_data = {
                "Experiment_ID": exp_name,
                "Experiment_Path": exp_path,
                "Log_Path": log_path,
                "Model_File_Path": model_file,
                "Base_Model_Arch": base_arch,
                "Optimization_Category": "Baseline",
                "Specific_Technique": "Baseline",
                "Key_Parameters": "N/A",
                "Is_Structured_Pruning": False,
                "Base_Exp_Name_Iterative": None, # Iterative info not applicable to baselines
                "Stage_Num_Iterative": None,   # Iterative info not applicable to baselines
                "Num_Classes": num_classes,
                "Config_Details_From_Log": config_details,
                "Training_Summary_From_Log": training_summary,
                "Original_Eval_Metrics_From_Log": original_eval_metrics
            }
            discovered_experiments.append(exp_data)

    # Then process other experiments
    for cat_name in os.listdir(ROOT_DIR):
        cat_path = os.path.join(ROOT_DIR, cat_name)
        if not os.path.isdir(cat_path) or "baseline" in cat_name.lower():
            continue

        is_cat_structured = "pruning_structured_iterative" in cat_name.lower() or \
                            "pruning_structured_oneshot" in cat_name.lower()

        for exp_name in os.listdir(cat_path):
            exp_path_str = os.path.join(cat_path, exp_name) # Use string for Path() later if needed
            if not os.path.isdir(exp_path_str):
                continue

            base_arch = "ResNet50" # Default
            if "resnet18" in exp_name.lower(): base_arch = "ResNet18"
            if cat_name == "combined_distilled_quantized" and "resnet18" in exp_name.lower(): base_arch = "ResNet18" 

            model_file = get_model_file_path_nb(exp_path_str)
            log_path_str_current = os.path.join(exp_path_str, "log.json")
            
            current_exp_is_structured = is_cat_structured
            # More robust check for structured based on experiment name patterns
            if not current_exp_is_structured:
                if "prune_struct_it" in exp_name.lower() or \
                   "prune_struct_os" in exp_name.lower() or \
                   "structured_l1_filter" in exp_name.lower() or \
                   (base_arch == "ResNet50" and ("pruning_structured" in cat_name.lower())): # if base is R50 and in struct cat
                    current_exp_is_structured = True
            
            base_exp_name_iterative = None
            stage_num_iterative = None
            # Check if it's an iterative structured model to parse base name and stage
            # This regex tries to capture common patterns like 'name_stageNUMBER...' or 'name_sNUMBER'
            if current_exp_is_structured and \
               ("iterative" in cat_name.lower() or "it" in exp_name.lower() or "_stage" in exp_name.lower()):
                # Try to match '..._stage<number>...' pattern first
                match = re.search(r"(.+?)(?:_|-)(?:stage|s)(\d+)", exp_name.lower())
                if match:
                    base_exp_name_iterative = match.group(1)
                    stage_num_iterative = int(match.group(2))
                else: # Fallback if no explicit 'stage' or 's' prefix but seems iterative
                    # This part might need refinement based on your specific naming for iterative stages
                    # if no clear stage number is found, it won't be treated as iterative by the loader
                    # print(f"  Warning: Could not parse base_name/stage for potential iterative: {exp_name} in {cat_name}")
                    pass
            
            # Ensure base_arch is correct for structured ResNet50 if not already set
            if current_exp_is_structured and base_arch == "Unknown" and "resnet50" in exp_name.lower():
                 base_arch = "ResNet50"


            num_classes = DEFAULT_NUM_CLASSES
            config_details, training_summary, original_eval_metrics, quant_specific_details = {}, {}, {}, {}
            specific_tech_parts, key_params_parts = [], []

            if os.path.exists(log_path_str_current):
                try:
                    with open(log_path_str_current, 'r') as f: log_data_temp = json.load(f)
                    config_details = log_data_temp.get('config_details', {})
                    training_summary = log_data_temp.get('training_summary', {})
                    original_eval_metrics = log_data_temp.get('original_evaluation_metrics_from_log', {})
                    quant_specific_details = log_data_temp.get('quantization_specific_details', {})
                    
                    num_classes = config_details.get('num_classes', DEFAULT_NUM_CLASSES)
                    if 'student_config' in config_details and isinstance(config_details['student_config'], dict):
                        num_classes = config_details['student_config'].get('num_classes', num_classes)

                    # --- Populate Specific_Technique, Key_Parameters (from original script) ---
                    if config_details.get('teacher_model_architecture'):
                        specific_tech_parts.append("Knowledge Distillation")
                        teacher = config_details.get('teacher_model_architecture')
                        student = config_details.get('student_model_architecture', base_arch)
                        key_params_parts.append(f"T:{teacher}->S:{student}")
                        if base_arch == "ResNet50" and "resnet18" in student.lower(): base_arch = "ResNet18"

                    quant_method_cfg = str(config_details.get('quantization_method_type', '')).lower()
                    if "kmeans" in quant_method_cfg or "kmeans" in exp_name.lower():
                        specific_tech_parts.append("KMeans Quant")
                        clusters = config_details.get('kmeans_clusters') or quant_specific_details.get('kmeans_clusters')
                        if clusters: key_params_parts.append(f"Clusters: {clusters}")
                    elif "ptq" in quant_method_cfg or ("quant" in exp_name.lower() and "ptq" in exp_name.lower()):
                        tech = "PTQ INT8"
                        if "per_channel" in quant_method_cfg or "perchannel" in exp_name.lower(): tech += " (Per-Channel)"
                        elif "per_tensor" in quant_method_cfg or "pertensor" in exp_name.lower(): tech += " (Per-Tensor)"
                        else: 
                             if "perchannel" in exp_name.lower(): tech += " (Per-Channel)"
                             elif "pertensor" in exp_name.lower(): tech += " (Per-Tensor)"
                        specific_tech_parts.append(tech)
                    elif "qat" in quant_method_cfg or ("quant" in exp_name.lower() and "qat" in exp_name.lower()):
                        specific_tech_parts.append("QAT INT8")
                        epochs = config_details.get('qat_epochs')
                        if epochs is not None: key_params_parts.append(f"QAT Epochs: {epochs}")

                    pruning_tech_exp_name = exp_name.lower()
                    pruning_method_cfg = config_details.get('pruning_method_name', '').lower()
                    pruning_strat_cfg = config_details.get('pruning_strategy_type', '').lower()
                    
                    # Check if specifically structured from name or config
                    is_exp_name_struct_it = "prune_struct_it" in pruning_tech_exp_name or \
                                            "iterative_structured" in pruning_strat_cfg or \
                                            (base_exp_name_iterative is not None) # If parsed as iterative
                    is_exp_name_struct_os = ("prune_struct_os" in pruning_tech_exp_name or \
                                            "one_shot_structured" in pruning_strat_cfg or \
                                            "structured_l1_filter" in pruning_method_cfg) and \
                                            not is_exp_name_struct_it # Ensure it's not also iterative

                    if is_exp_name_struct_it:
                        specific_tech_parts.append("Iterative Structured Pruning (L1 Filter)")
                        # For iterative, the key_params (rates) are derived from the log of the specific stage
                        log_pr_conf = get_pruning_config_from_log_for_reconstruction_nb(Path(log_path_str_current))
                        if log_pr_conf and log_pr_conf.get('type') == 'iterative_step': 
                            key_params_parts.append(f"Stage Rate: {log_pr_conf['rate']*100:.1f}%")
                        # Also ensure current_exp_is_structured is True
                        current_exp_is_structured = True
                    elif is_exp_name_struct_os:
                        specific_tech_parts.append("One-Shot Structured Pruning (L1 Filter)")
                        log_pr_conf = get_pruning_config_from_log_for_reconstruction_nb(Path(log_path_str_current))
                        if log_pr_conf and log_pr_conf.get('type') == 'one-shot':
                             key_params_parts.append(f"Rate: {log_pr_conf['rate']*100:.1f}%")
                        current_exp_is_structured = True
                    elif "prune_nm" in pruning_tech_exp_name or "nm_sparsity" in pruning_method_cfg:
                        if "N:M Sparsity" not in specific_tech_parts: specific_tech_parts.append("N:M Sparsity")
                        n_val = config_details.get('nm_sparsity_n', 2); m_val = config_details.get('nm_sparsity_m', 4)
                        key_params_parts.append(f"N:{n_val}, M:{m_val}")
                    elif "prune_unstruct_it" in pruning_tech_exp_name or "iterative_unstructured" in pruning_strat_cfg:
                        specific_tech_parts.append("Iterative Unstructured Pruning (L1)")
                    elif "prune_unstruct_os" in pruning_tech_exp_name or "one_shot_unstructured" in pruning_strat_cfg:
                        specific_tech_parts.append("One-Shot Unstructured Pruning (L1)")

                    # General pruning sparsity parameter if not specifically handled by structured
                    if any("Pruning" in tech for tech in specific_tech_parts) and not (is_exp_name_struct_it or is_exp_name_struct_os):
                        target_sparsities = [
                            config_details.get('target_overall_sparsity_approx_for_this_stage'),
                            # config_details.get('target_filter_pruning_rate_per_layer'), # Usually for structured
                            config_details.get('target_sparsity_for_this_stage'),
                            config_details.get('target_sparsity')
                        ]
                        for sp_val in target_sparsities:
                            if sp_val is not None:
                                try: key_params_parts.append(f"Target Sparsity: {float(sp_val)*100:.1f}%")
                                except ValueError: key_params_parts.append(f"Target Sparsity: {sp_val}")
                                break
                except json.JSONDecodeError:
                    print(f"    Warning: JSON error in {log_path_str_current} for {exp_name}")
                except Exception as e_log_parse:
                    print(f"    Warning: Error parsing log {log_path_str_current} for {exp_name}: {e_log_parse}")


            opt_cat_map = {
                "combined_distilled_quantized": "Combined", "knowledge_distillation": "Knowledge Distillation",
                "pruning_nm_sparsity": "Pruning", "pruning_structured_iterative": "Pruning",
                "pruning_structured_oneshot": "Pruning", "pruning_unstructured_iterative": "Pruning",
                "pruning_unstructured_oneshot": "Pruning", "quantization_kmeans": "Quantization",
                "quantization_ptq_int8": "Quantization", "quantization_qat_int8": "Quantization",
            }
            optimization_category = opt_cat_map.get(cat_name, "Other")


            exp_data = {
                "Experiment_ID": exp_name,
                "Experiment_Path": exp_path_str,
                "Log_Path": log_path_str_current,
                "Model_File_Path": model_file,
                "Base_Model_Arch": base_arch,
                "Optimization_Category": optimization_category,
                "Specific_Technique": " + ".join(list(dict.fromkeys(specific_tech_parts))) if specific_tech_parts else "Other",
                "Key_Parameters": "; ".join(list(dict.fromkeys(key_params_parts))) if key_params_parts else "N/A", # Remove duplicates
                "Is_Structured_Pruning": current_exp_is_structured,
                "Base_Exp_Name_Iterative": base_exp_name_iterative,
                "Stage_Num_Iterative": stage_num_iterative,      
                "Num_Classes": num_classes,
                "Config_Details_From_Log": config_details, 
                "Training_Summary_From_Log": training_summary,
                "Original_Eval_Metrics_From_Log": original_eval_metrics
            }
            discovered_experiments.append(exp_data)

    df = pd.DataFrame(discovered_experiments)
    if not df.empty:
        # Ensure Stage_Num_Iterative is numeric for sorting, NaNs are fine
        if 'Stage_Num_Iterative' in df.columns:
             df['Stage_Num_Iterative'] = pd.to_numeric(df['Stage_Num_Iterative'], errors='coerce')
        df = df.set_index("Experiment_ID", drop=False) # Keep Experiment_ID also as a column
    print(f"--- Discovery finished. Found {len(df)} experiments. ---")
    return df

# Initialize/Re-initialize the global DataFrame
results_df = discover_experiments_nb() # This will be the single source of truth for experiment info
if not results_df.empty:
    # Display new columns for verification, especially for iterative models
    iterative_check_df = results_df[results_df['Base_Exp_Name_Iterative'].notna()]
    if not iterative_check_df.empty:
        print("\n--- Iterative Model Parsing Check (sample): ---")
        display(iterative_check_df[['Experiment_ID', 'Base_Exp_Name_Iterative', 'Stage_Num_Iterative', 'Log_Path']].head())
    else:
        print("\n--- No iterative models parsed with Base_Exp_Name_Iterative. ---")
    
    print("\n--- All Discovered Experiments Sample: ---")
    display(results_df[['Experiment_ID', 'Base_Model_Arch', 'Is_Structured_Pruning', 'Model_File_Path', 'Log_Path']].head())

else:
    print("No experiments discovered. Check ROOT_DIR and folder structure.")

--- Discovering experiments in: saved_models_and_logs ---
--- Discovery finished. Found 25 experiments. ---

--- Iterative Model Parsing Check (sample): ---


Unnamed: 0_level_0,Experiment_ID,Base_Exp_Name_Iterative,Stage_Num_Iterative,Log_Path
Experiment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
resnet50_prune_struct_it_l1filter_stage1_approx_sp50_ft,resnet50_prune_struct_it_l1filter_stage1_appro...,resnet50_prune_struct_it_l1filter,1.0,saved_models_and_logs\pruning_structured_itera...
resnet50_prune_struct_it_l1filter_stage2_approx_sp75_ft,resnet50_prune_struct_it_l1filter_stage2_appro...,resnet50_prune_struct_it_l1filter,2.0,saved_models_and_logs\pruning_structured_itera...
resnet50_prune_struct_it_l1filter_stage3_approx_sp90_ft,resnet50_prune_struct_it_l1filter_stage3_appro...,resnet50_prune_struct_it_l1filter,3.0,saved_models_and_logs\pruning_structured_itera...



--- All Discovered Experiments Sample: ---


Unnamed: 0_level_0,Experiment_ID,Base_Model_Arch,Is_Structured_Pruning,Model_File_Path,Log_Path
Experiment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
resnet18_baseline,resnet18_baseline,ResNet18,False,saved_models_and_logs\resnet18_baseline\resnet...,saved_models_and_logs\resnet18_baseline\log.json
resnet50_baseline,resnet50_baseline,ResNet50,False,saved_models_and_logs\resnet50_baseline\resnet...,saved_models_and_logs\resnet50_baseline\log.json
resnet18pretrained_distilled_quant_kmeans_256clusters_post,resnet18pretrained_distilled_quant_kmeans_256c...,ResNet18,False,saved_models_and_logs\combined_distilled_quant...,saved_models_and_logs\combined_distilled_quant...
resnet18pretrained_distilled_quant_ptq_int8_perchannel_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,ResNet18,False,saved_models_and_logs\combined_distilled_quant...,saved_models_and_logs\combined_distilled_quant...
resnet18pretrained_distilled_quant_ptq_int8_pertensor_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,ResNet18,False,saved_models_and_logs\combined_distilled_quant...,saved_models_and_logs\combined_distilled_quant...


Cell 5: Calculate Model Disk Size

In [5]:
def calculate_and_store_disk_sizes(df_to_update):
    if df_to_update.empty:
        print("Experiment DataFrame is empty. Run discovery cell first.")
        return
    print("\n--- Calculating Model Disk Sizes ---")
    sizes_mb = {}
    for exp_id, row in df_to_update.iterrows():
        model_file = row.get('Model_File_Path')
        size = get_model_size_mb_nb(model_file)
        sizes_mb[exp_id] = size if size is not None else "N/A (File Missing/Error)"
        if exp_id.startswith("baseline") and size is not None and pd.notna(size): # For baselines_metrics_nb
             if row['Base_Model_Arch'] not in baseline_metrics_nb: baseline_metrics_nb[row['Base_Model_Arch']] = {}
             baseline_metrics_nb[row['Base_Model_Arch']]['model_size_mb_disk'] = size


    df_to_update['Model_Size_MB_Disk'] = pd.Series(sizes_mb)
    print("--- Disk sizes calculated and stored. ---")
    display(df_to_update[['Experiment_ID', 'Model_Size_MB_Disk']].head())

if not results_df.empty:
    calculate_and_store_disk_sizes(results_df)
else:
    print("Skipping disk size calculation as no experiments were discovered.")


--- Calculating Model Disk Sizes ---
--- Disk sizes calculated and stored. ---


Unnamed: 0_level_0,Experiment_ID,Model_Size_MB_Disk
Experiment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
resnet18_baseline,resnet18_baseline,44.669577
resnet50_baseline,resnet50_baseline,97.796141
resnet18pretrained_distilled_quant_kmeans_256clusters_post,resnet18pretrained_distilled_quant_kmeans_256c...,44.667328
resnet18pretrained_distilled_quant_ptq_int8_perchannel_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,11.302094
resnet18pretrained_distilled_quant_ptq_int8_pertensor_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,11.300758


Cell 6: Calculate FLOPs and Parameters

In [6]:
def calculate_and_store_flops_params(df_to_update):
    if df_to_update.empty:
        print("Experiment DataFrame is empty. Run discovery cell first.")
        return
    print("\n--- Calculating FLOPs and Parameters ---")
    flops_list = {}
    params_list = {}

    for exp_id, row in df_to_update.iterrows():
        print(f"  Processing FLOPs/Params for: {exp_id}")
        # Load model on CPU for thop, unless it's JIT and we fallback to baseline
        # For structured pruning, load_model_for_experiment_nb should reconstruct on CPU if target_device_str='cpu'
        model_obj = load_model_for_experiment_nb(row, df_to_update, target_device_str='cpu')
        
        current_flops = "N/A (Load Error)"
        current_params = "N/A (Load Error)"

        if model_obj:
            is_jit_module = isinstance(model_obj, torch.jit.ScriptModule)
            thop_success = False
            if not is_jit_module:
                try:
                    # Ensure model is on CPU for thop
                    model_obj_cpu = model_obj.to(torch.device('cpu'))
                    macs, params = profile(model_obj_cpu, inputs=(INPUT_TENSOR_CPU,), verbose=False)
                    current_flops = macs / 1e9  # GMACs
                    current_params = params / 1e6 # Millions
                    thop_success = True
                    del model_obj_cpu
                except Exception as e_thop:
                    # print(f"      Warning ({exp_id}): thop failed: {e_thop}. Will try fallback.")
                    current_flops = "N/A (thop Error)"
                    current_params = "N/A (thop Error)"
            else: # Is JIT
                # print(f"      INFO ({exp_id}): Model is JIT ScriptModule. Skipping thop, will use baseline fallback if applicable.")
                current_flops = "N/A (JIT, thop N/A)" # Placeholder before fallback
                current_params = "N/A (JIT, thop N/A)"

            # Fallback for JIT or thop failure (quantized, kmeans, etc.)
            is_ao_quant_or_kmeans = "ptq" in exp_id.lower() or \
                                    "qat" in exp_id.lower() or \
                                    "kmeans" in exp_id.lower()
            
            if (is_jit_module or not thop_success) and (is_ao_quant_or_kmeans or exp_id.startswith("baseline")): # Baselines might be JIT saved
                base_arch = row['Base_Model_Arch']
                # Try to get baseline from already processed baselines if available
                # This part is tricky as baselines themselves are being processed in this loop.
                # We rely on baseline_metrics_nb potentially being populated if a baseline was processed by thop *before* this model.
                # Or, if this *is* a baseline and thop failed, this fallback won't help for itself.
                if base_arch in baseline_metrics_nb and baseline_metrics_nb[base_arch]:
                    # print(f"Attempting fallback to baseline for {exp_id} using {base_arch}")
                    baseline_f = baseline_metrics_nb[base_arch].get("flops_gmacs")
                    baseline_p = baseline_metrics_nb[base_arch].get("params_millions")
                    if pd.notna(baseline_f) and current_flops.startswith("N/A"): current_flops = baseline_f
                    if pd.notna(baseline_p) and current_params.startswith("N/A"): current_params = baseline_p
                elif not (current_flops != "N/A (thop Error)" and current_flops != "N/A (Load Error)"): # If not successfully calculated and no baseline
                    current_flops = "N/A (Fallback Miss)"
                    current_params = "N/A (Fallback Miss)"


            del model_obj
            if DEVICE.type == 'cuda': torch.cuda.empty_cache()
            gc.collect()

        flops_list[exp_id] = current_flops
        params_list[exp_id] = current_params
        
        # Store for baseline_metrics_nb if this is a baseline and successfully calculated
        if exp_id.startswith("baseline"):
            if row['Base_Model_Arch'] not in baseline_metrics_nb: baseline_metrics_nb[row['Base_Model_Arch']] = {}
            if pd.notna(current_flops) and isinstance(current_flops, (int,float)):
                 baseline_metrics_nb[row['Base_Model_Arch']]['flops_gmacs'] = current_flops
            if pd.notna(current_params) and isinstance(current_params, (int,float)):
                 baseline_metrics_nb[row['Base_Model_Arch']]['params_millions'] = current_params


    df_to_update['FLOPs_GMACs'] = pd.Series(flops_list)
    df_to_update['Params_Millions'] = pd.Series(params_list)
    print("--- FLOPs and Parameters calculated and stored. ---")
    # print("DEBUG: baseline_metrics_nb after FLOPs/Params:", baseline_metrics_nb)
    display(df_to_update[['Experiment_ID', 'FLOPs_GMACs', 'Params_Millions']].head())

if not results_df.empty:
    calculate_and_store_flops_params(results_df)
else:
    print("Skipping FLOPs/Params calculation as no experiments were discovered.")


--- Calculating FLOPs and Parameters ---
  Processing FLOPs/Params for: resnet18_baseline


  _raw_loaded_content = torch.load(model_path, map_location=device_to_load_on) # weights_only=False default


  Processing FLOPs/Params for: resnet50_baseline
  Processing FLOPs/Params for: resnet18pretrained_distilled_quant_kmeans_256clusters_post
  Processing FLOPs/Params for: resnet18pretrained_distilled_quant_ptq_int8_perchannel_post
  Processing FLOPs/Params for: resnet18pretrained_distilled_quant_ptq_int8_pertensor_post
  Processing FLOPs/Params for: resnet18pretrained_distilled_quant_qat_int8_epochs8
  Processing FLOPs/Params for: resnet50_to_resnet18pretrained_kd
  Processing FLOPs/Params for: resnet50_to_resnet18scratch_kd
  Processing FLOPs/Params for: resnet50_prune_nm24_ft
  Processing FLOPs/Params for: resnet50_prune_struct_it_l1filter_stage1_approx_sp50_ft
  Processing FLOPs/Params for: resnet50_prune_struct_it_l1filter_stage2_approx_sp75_ft
  Processing FLOPs/Params for: resnet50_prune_struct_it_l1filter_stage3_approx_sp90_ft
  Processing FLOPs/Params for: resnet50_prune_struct_os_l1filter_fp30_ft
  Processing FLOPs/Params for: resnet50_prune_struct_os_l1filter_fp55_ft
  Process

Unnamed: 0_level_0,Experiment_ID,FLOPs_GMACs,Params_Millions
Experiment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
resnet18_baseline,resnet18_baseline,1.824034,11.689512
resnet50_baseline,resnet50_baseline,4.133743,25.557032
resnet18pretrained_distilled_quant_kmeans_256clusters_post,resnet18pretrained_distilled_quant_kmeans_256c...,1.824034,11.689512
resnet18pretrained_distilled_quant_ptq_int8_perchannel_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,"N/A (JIT, thop N/A)","N/A (JIT, thop N/A)"
resnet18pretrained_distilled_quant_ptq_int8_pertensor_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,"N/A (JIT, thop N/A)","N/A (JIT, thop N/A)"


Cell 7: Calculate Accuracy

In [7]:
# Define evaluate_model_uniformly (from original script, slightly adapted for notebook context)
@torch.no_grad()
def evaluate_model_uniformly_nb(model, device_str_eval, num_classes_eval, max_batches_to_eval, exp_id_for_log):
    global current_eval_experiment_id_nb # Use the notebook-specific global
    current_eval_experiment_id_nb = exp_id_for_log # For logging inside this function

    if not os.path.exists(VALIDATION_DATA_PATH):
        print(f"      ERROR ({current_eval_experiment_id_nb}): Val data path not found: {VALIDATION_DATA_PATH}")
        return "N/A (Val Data Missing)"
    try:
        val_dataset = ImageFolder(VALIDATION_DATA_PATH, eval_transforms)
        if len(val_dataset.classes) != num_classes_eval and num_classes_eval != FIXED_NUM_CLASSES : # Allow FIXED_NUM_CLASSES for imagenet default
             # Only warn if model's num_classes is truly different and not the standard 1000 for ImageNet pretrains
            print(f"      WARNING ({current_eval_experiment_id_nb}): Dataset classes ({len(val_dataset.classes)}) vs Model classes ({num_classes_eval}). Accuracy may be misleading.")
        if len(val_dataset) == 0:
            print(f"      WARNING ({current_eval_experiment_id_nb}): Validation dataset at '{VALIDATION_DATA_PATH}' is empty.")
            return 0.0 # Or "N/A (Val Data Empty)"
        
        # Limit workers for DataLoader if on Windows or for stability
        current_num_workers = NUM_WORKERS_EVAL if DEVICE.type == 'cuda' else 0 # Often 0 is safer on Windows

        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE_EVAL, shuffle=False,
                                num_workers=current_num_workers, pin_memory=(True if device_str_eval=='cuda' else False))
    except Exception as e:
        print(f"      ERROR ({current_eval_experiment_id_nb}): Could not load validation data: {e}")
        # traceback.print_exc()
        return f"N/A (Val Data Load Error: {str(e).splitlines()[0]})"

    device_obj_eval = torch.device(device_str_eval)
    model.to(device_obj_eval)
    model.eval()
    correct = 0
    total = 0
    batches_processed = 0
    
    # print(f"      INFO ({current_eval_experiment_id_nb}): Evaluating on device {device_str_eval} for max {max_batches_to_eval} batches.")

    for images, labels in val_loader:
        try:
            images, labels = images.to(device_obj_eval), labels.to(device_obj_eval)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            batches_processed += 1
            if batches_processed >= max_batches_to_eval:
                break
        except Exception as e_batch:
            print(f"      ERROR ({current_eval_experiment_id_nb}) during batch {batches_processed} eval: {e_batch}")
            # traceback.print_exc()
            return "N/A (Batch Eval Error)"

    accuracy = (correct / total) * 100.0 if total > 0 else 0.0 # As percentage
    # print(f"      INFO ({current_eval_experiment_id_nb}): Accuracy = {accuracy:.2f}% ({correct}/{total}) on {batches_processed} batches.")
    return accuracy


def calculate_and_store_accuracy(df_to_update):
    if df_to_update.empty:
        print("Experiment DataFrame is empty. Run discovery cell first.")
        return
    if not os.path.exists(VALIDATION_DATA_PATH):
        print(f"ERROR: Validation data path '{VALIDATION_DATA_PATH}' not found. Cannot calculate accuracy.")
        df_to_update['Final_Val_Accuracy'] = "N/A (Val Data Missing)"
        return

    print("\n--- Calculating Model Accuracies ---")
    accuracies = {}
    for exp_id, row in df_to_update.iterrows():
        print(f"  Processing Accuracy for: {exp_id}")
        
        is_gpu_unstable = exp_id in GPU_UNSTABLE_QUANTIZED_MODELS
        eval_device_str = 'cpu' if is_gpu_unstable else DEVICE.type
        if is_gpu_unstable: print(f"      INFO ({exp_id}): Known GPU unstable. Forcing CPU evaluation.")

        # Load model onto the evaluation device
        model_obj = load_model_for_experiment_nb(row, df_to_update, target_device_str=eval_device_str)
        
        current_acc = "N/A (Load Error)"
        if model_obj:
            num_classes = row.get('Num_Classes', DEFAULT_NUM_CLASSES)
            # The model_obj from load_model_for_experiment_nb is already on eval_device_str
            current_acc = evaluate_model_uniformly_nb(model_obj, eval_device_str, num_classes, MAX_EVAL_BATCHES, exp_id)
            
            del model_obj
            if DEVICE.type == 'cuda': torch.cuda.empty_cache()
            gc.collect()
        
        accuracies[exp_id] = current_acc
        if exp_id.startswith("baseline") and isinstance(current_acc, (float, int)): # For baselines_metrics_nb
            if row['Base_Model_Arch'] not in baseline_metrics_nb: baseline_metrics_nb[row['Base_Model_Arch']] = {}
            baseline_metrics_nb[row['Base_Model_Arch']]['val_accuracy'] = current_acc


    df_to_update['Final_Val_Accuracy'] = pd.Series(accuracies)
    print("--- Accuracies calculated and stored. ---")
    # print("DEBUG: baseline_metrics_nb after Accuracy:", baseline_metrics_nb)
    display(df_to_update[['Experiment_ID', 'Final_Val_Accuracy']].head())

if not results_df.empty:
    calculate_and_store_accuracy(results_df)
else:
    print("Skipping accuracy calculation as no experiments were discovered.")


--- Calculating Model Accuracies ---
  Processing Accuracy for: resnet18_baseline


  _raw_loaded_content = torch.load(model_path, map_location=device_to_load_on) # weights_only=False default


  Processing Accuracy for: resnet50_baseline
  Processing Accuracy for: resnet18pretrained_distilled_quant_kmeans_256clusters_post
  Processing Accuracy for: resnet18pretrained_distilled_quant_ptq_int8_perchannel_post
      INFO (resnet18pretrained_distilled_quant_ptq_int8_perchannel_post): Known GPU unstable. Forcing CPU evaluation.
  Processing Accuracy for: resnet18pretrained_distilled_quant_ptq_int8_pertensor_post
      INFO (resnet18pretrained_distilled_quant_ptq_int8_pertensor_post): Known GPU unstable. Forcing CPU evaluation.
  Processing Accuracy for: resnet18pretrained_distilled_quant_qat_int8_epochs8
      INFO (resnet18pretrained_distilled_quant_qat_int8_epochs8): Known GPU unstable. Forcing CPU evaluation.
  Processing Accuracy for: resnet50_to_resnet18pretrained_kd
  Processing Accuracy for: resnet50_to_resnet18scratch_kd
  Processing Accuracy for: resnet50_prune_nm24_ft
  Processing Accuracy for: resnet50_prune_struct_it_l1filter_stage1_approx_sp50_ft
  Processing Accurac

Unnamed: 0_level_0,Experiment_ID,Final_Val_Accuracy
Experiment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
resnet18_baseline,resnet18_baseline,50.089217
resnet50_baseline,resnet50_baseline,64.950293
resnet18pretrained_distilled_quant_kmeans_256clusters_post,resnet18pretrained_distilled_quant_kmeans_256c...,53.683406
resnet18pretrained_distilled_quant_ptq_int8_perchannel_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,50.777466
resnet18pretrained_distilled_quant_ptq_int8_pertensor_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,53.352027


Cell 8: Calculate CPU Inference Time

In [8]:
def calculate_and_store_cpu_inference_time(df_to_update):
    if df_to_update.empty:
        print("Experiment DataFrame is empty. Run discovery cell first.")
        return
    print("\n--- Calculating CPU Inference Times ---")
    cpu_times = {}

    for exp_id, row in df_to_update.iterrows():
        print(f"  Processing CPU Time for: {exp_id}")
        # Load model on CPU
        model_obj = load_model_for_experiment_nb(row, df_to_update, target_device_str='cpu')
        
        current_cpu_time = "N/A (Load Error)"
        if model_obj:
            try:
                model_obj.eval() # Ensure eval mode
                with torch.no_grad():
                    for _ in range(WARMUP_INFERENCES): _ = model_obj(INPUT_TENSOR_CPU)
                    
                    timings = []
                    for _ in range(TIMED_INFERENCES):
                        start_time = time.perf_counter()
                        _ = model_obj(INPUT_TENSOR_CPU)
                        end_time = time.perf_counter()
                        timings.append((end_time - start_time) * 1000) # milliseconds
                    current_cpu_time = sum(timings) / len(timings) if timings else "N/A (Timing Error)"
            except Exception as e_cpu_time:
                current_cpu_time = f"N/A (CPU Time Error: {str(e_cpu_time).splitlines()[0]})"
            
            del model_obj
            # No CUDA cache clear needed for CPU, but gc.collect is good
            gc.collect()
            
        cpu_times[exp_id] = current_cpu_time
        if exp_id.startswith("baseline") and isinstance(current_cpu_time, (float, int)): # For baselines_metrics_nb
            if row['Base_Model_Arch'] not in baseline_metrics_nb: baseline_metrics_nb[row['Base_Model_Arch']] = {}
            baseline_metrics_nb[row['Base_Model_Arch']]['inference_cpu_ms'] = current_cpu_time


    df_to_update['Inference_Time_ms_CPU (Batch 1)'] = pd.Series(cpu_times)
    print("--- CPU Inference Times calculated and stored. ---")
    # print("DEBUG: baseline_metrics_nb after CPU Time:", baseline_metrics_nb)
    display(df_to_update[['Experiment_ID', 'Inference_Time_ms_CPU (Batch 1)']].head())

if not results_df.empty:
    calculate_and_store_cpu_inference_time(results_df)
else:
    print("Skipping CPU inference time calculation as no experiments were discovered.")


--- Calculating CPU Inference Times ---
  Processing CPU Time for: resnet18_baseline


  _raw_loaded_content = torch.load(model_path, map_location=device_to_load_on) # weights_only=False default


  Processing CPU Time for: resnet50_baseline
  Processing CPU Time for: resnet18pretrained_distilled_quant_kmeans_256clusters_post
  Processing CPU Time for: resnet18pretrained_distilled_quant_ptq_int8_perchannel_post
  Processing CPU Time for: resnet18pretrained_distilled_quant_ptq_int8_pertensor_post
  Processing CPU Time for: resnet18pretrained_distilled_quant_qat_int8_epochs8
  Processing CPU Time for: resnet50_to_resnet18pretrained_kd
  Processing CPU Time for: resnet50_to_resnet18scratch_kd
  Processing CPU Time for: resnet50_prune_nm24_ft
  Processing CPU Time for: resnet50_prune_struct_it_l1filter_stage1_approx_sp50_ft
  Processing CPU Time for: resnet50_prune_struct_it_l1filter_stage2_approx_sp75_ft
  Processing CPU Time for: resnet50_prune_struct_it_l1filter_stage3_approx_sp90_ft
  Processing CPU Time for: resnet50_prune_struct_os_l1filter_fp30_ft
  Processing CPU Time for: resnet50_prune_struct_os_l1filter_fp55_ft
  Processing CPU Time for: resnet50_prune_struct_os_l1filter_

Unnamed: 0_level_0,Experiment_ID,Inference_Time_ms_CPU (Batch 1)
Experiment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
resnet18_baseline,resnet18_baseline,35.31372
resnet50_baseline,resnet50_baseline,88.06
resnet18pretrained_distilled_quant_kmeans_256clusters_post,resnet18pretrained_distilled_quant_kmeans_256c...,37.22856
resnet18pretrained_distilled_quant_ptq_int8_perchannel_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,19.01976
resnet18pretrained_distilled_quant_ptq_int8_pertensor_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,18.31738


Cell 9: Calculate GPU Inference Time

In [9]:
def calculate_and_store_gpu_inference_time(df_to_update):
    if df_to_update.empty:
        print("Experiment DataFrame is empty. Run discovery cell first.")
        return
    if DEVICE.type != 'cuda' or INPUT_TENSOR_GPU is None:
        print("CUDA not available or INPUT_TENSOR_GPU not initialized. Skipping GPU inference times.")
        df_to_update['Inference_Time_ms_GPU (Batch 1)'] = "N/A (CUDA unavailable or init error)"
        return

    print("\n--- Calculating GPU Inference Times ---")
    gpu_times = {}

    for exp_id, row in df_to_update.iterrows():
        print(f"  Processing GPU Time for: {exp_id}")
        
        if exp_id in GPU_UNSTABLE_QUANTIZED_MODELS:
            print(f"      INFO ({exp_id}): Known JIT GPU unstable. Skipping GPU timing.")
            gpu_times[exp_id] = "N/A (Known JIT GPU Unstable)"
            continue

        # Load model on GPU
        model_obj = load_model_for_experiment_nb(row, df_to_update, target_device_str='cuda')
        current_gpu_time = "N/A (Load Error)"

        if model_obj:
            try:
                model_obj.eval() # Ensure eval mode
                with torch.no_grad():
                    for _ in range(WARMUP_INFERENCES):
                        _ = model_obj(INPUT_TENSOR_GPU)
                        torch.cuda.synchronize(DEVICE) # Ensure warmup op is complete
                    
                    timings = []
                    for _ in range(TIMED_INFERENCES):
                        torch.cuda.synchronize(DEVICE) # Synchronize before starting timer
                        start_time = time.perf_counter()
                        _ = model_obj(INPUT_TENSOR_GPU)
                        torch.cuda.synchronize(DEVICE) # Synchronize after op to ensure it's complete
                        end_time = time.perf_counter()
                        timings.append((end_time - start_time) * 1000) # milliseconds
                    current_gpu_time = sum(timings) / len(timings) if timings else "N/A (Timing Error)"
            except Exception as e_gpu_time:
                current_gpu_time = f"N/A (GPU Time Error: {str(e_gpu_time).splitlines()[0]})"
                # traceback.print_exc()
            
            del model_obj
            torch.cuda.empty_cache()
            gc.collect()
        
        gpu_times[exp_id] = current_gpu_time
        if exp_id.startswith("baseline") and isinstance(current_gpu_time, (float, int)): # For baselines_metrics_nb
            if row['Base_Model_Arch'] not in baseline_metrics_nb: baseline_metrics_nb[row['Base_Model_Arch']] = {}
            baseline_metrics_nb[row['Base_Model_Arch']]['inference_gpu_ms'] = current_gpu_time


    df_to_update['Inference_Time_ms_GPU (Batch 1)'] = pd.Series(gpu_times)
    print("--- GPU Inference Times calculated and stored. ---")
    # print("DEBUG: baseline_metrics_nb after GPU Time:", baseline_metrics_nb)
    display(df_to_update[['Experiment_ID', 'Inference_Time_ms_GPU (Batch 1)']].head())

if not results_df.empty:
    calculate_and_store_gpu_inference_time(results_df)
else:
    print("Skipping GPU inference time calculation as no experiments were discovered.")


--- Calculating GPU Inference Times ---
  Processing GPU Time for: resnet18_baseline


  _raw_loaded_content = torch.load(model_path, map_location=device_to_load_on) # weights_only=False default


  Processing GPU Time for: resnet50_baseline
  Processing GPU Time for: resnet18pretrained_distilled_quant_kmeans_256clusters_post
  Processing GPU Time for: resnet18pretrained_distilled_quant_ptq_int8_perchannel_post
      INFO (resnet18pretrained_distilled_quant_ptq_int8_perchannel_post): Known JIT GPU unstable. Skipping GPU timing.
  Processing GPU Time for: resnet18pretrained_distilled_quant_ptq_int8_pertensor_post
      INFO (resnet18pretrained_distilled_quant_ptq_int8_pertensor_post): Known JIT GPU unstable. Skipping GPU timing.
  Processing GPU Time for: resnet18pretrained_distilled_quant_qat_int8_epochs8
      INFO (resnet18pretrained_distilled_quant_qat_int8_epochs8): Known JIT GPU unstable. Skipping GPU timing.
  Processing GPU Time for: resnet50_to_resnet18pretrained_kd
  Processing GPU Time for: resnet50_to_resnet18scratch_kd
  Processing GPU Time for: resnet50_prune_nm24_ft
  Processing GPU Time for: resnet50_prune_struct_it_l1filter_stage1_approx_sp50_ft
  Processing GPU 

Unnamed: 0_level_0,Experiment_ID,Inference_Time_ms_GPU (Batch 1)
Experiment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
resnet18_baseline,resnet18_baseline,3.52352
resnet50_baseline,resnet50_baseline,7.87376
resnet18pretrained_distilled_quant_kmeans_256clusters_post,resnet18pretrained_distilled_quant_kmeans_256c...,3.33568
resnet18pretrained_distilled_quant_ptq_int8_perchannel_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,N/A (Known JIT GPU Unstable)
resnet18pretrained_distilled_quant_ptq_int8_pertensor_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,N/A (Known JIT GPU Unstable)


Cell 10: Add Log-Based Information and Calculate Relative Metrics

In [10]:
def add_log_based_and_relative_metrics(df_to_update):
    if df_to_update.empty:
        print("Experiment DataFrame is empty. Cannot add log/relative metrics.")
        return
    
    print("\n--- Adding Log-Based Information and Calculating Relative Metrics ---")

    # Columns to extract from logs (if not already present from discovery)
    # Some might be slightly different from your original script, adjust as needed
    # Training_Summary_From_Log and Original_Eval_Metrics_From_Log are already in df_to_update from discovery
    
    acc_before_ft_list = {}
    model_size_log_list = {}
    ach_sparsity_list = {}
    ft_epochs_list = {}
    ft_time_list = {}
    notes_list = {}

    for exp_id, row in df_to_update.iterrows():
        training_summary = row.get('Training_Summary_From_Log', {})
        original_eval_metrics = row.get('Original_Eval_Metrics_From_Log', {})

        # Accuracy Before FT
        acc_b_ft = training_summary.get('accuracy_before_ft')
        if acc_b_ft is None: acc_b_ft = training_summary.get('accuracy_before_ft_this_stage')
        if acc_b_ft is None: acc_b_ft = training_summary.get('evaluation_accuracy_after_pruning_before_ft')
        acc_before_ft_list[exp_id] = acc_b_ft if acc_b_ft is not None else "N/A"
        
        # Model Size from Log
        model_size_log_list[exp_id] = original_eval_metrics.get('model_size_mb', "N/A")
        
        # Achieved Sparsity
        ach_sp = training_summary.get('achieved_overall_parameter_sparsity_percent')
        if ach_sp is None: ach_sp = training_summary.get('achieved_overall_sparsity_percent_after_stage')
        ach_sparsity_list[exp_id] = ach_sp if ach_sp is not None else "N/A"
        
        # FT Epochs
        ft_ep = training_summary.get('num_epochs_trained') or training_summary.get('num_epochs_trained_in_stage', 0)
        ft_epochs_list[exp_id] = ft_ep if ft_ep is not None else 0
        
        # FT Time
        ft_t = training_summary.get('total_training_time_seconds')
        if ft_t is None: ft_t = training_summary.get('total_training_time_seconds_in_stage')
        ft_time_list[exp_id] = ft_t if ft_t is not None and ft_epochs_list[exp_id] > 0 else 0.0
        
        # Notes
        notes_list[exp_id] = training_summary.get('notes', '')

    df_to_update['Accuracy_Before_FT'] = pd.Series(acc_before_ft_list)
    df_to_update['Model_Size_MB_Log'] = pd.Series(model_size_log_list)
    df_to_update['Achieved_Sparsity_Percent'] = pd.Series(ach_sparsity_list)
    df_to_update['FT_Epochs_Run'] = pd.Series(ft_epochs_list)
    df_to_update['FT_Time_seconds'] = pd.Series(ft_time_list)
    df_to_update['Notes_from_Log'] = pd.Series(notes_list)
    
    # --- Calculate Relative Metrics ---
    # Ensure baseline_metrics_nb is populated by running previous cells, especially for baselines.
    print("  Populated baseline_metrics_nb for relative calculation:", baseline_metrics_nb)

    for index, row_series in df_to_update.iterrows():
        exp_id = row_series.get("Experiment_ID")
        opt_cat_str = str(row_series.get("Optimization_Category","")).strip()
        base_model_arch_str = str(row_series.get("Base_Model_Arch", "")).strip()

        # Determine which baseline arch to use (logic from your script)
        baseline_arch_to_use = "ResNet50" # Default
        if opt_cat_str in ["Knowledge Distillation", "Combined"] and base_model_arch_str == "ResNet18":
            baseline_arch_to_use = "ResNet18"
        elif base_model_arch_str == "ResNet18": # If model is ResNet18, compare to ResNet18 baseline
            baseline_arch_to_use = "ResNet18"
        # else: use default ResNet50

        if opt_cat_str == "Baseline":
            # For baselines, their own values are the "baseline" values. Reductions are 0, speedups are 1.
            df_to_update.loc[index, "Baseline_Val_Accuracy"] = pd.to_numeric(row_series.get("Final_Val_Accuracy"), errors='coerce')
            df_to_update.loc[index, "Accuracy_Change_vs_Baseline_pp"] = 0.0
            df_to_update.loc[index, "Accuracy_Retention_Percent"] = 100.0
            df_to_update.loc[index, "Baseline_Model_Size_MB_Disk"] = pd.to_numeric(row_series.get("Model_Size_MB_Disk"), errors='coerce')
            df_to_update.loc[index, "Model_Size_Reduction_vs_Baseline_Percent"] = 0.0
            df_to_update.loc[index, "Baseline_Params_Millions"] = pd.to_numeric(row_series.get("Params_Millions"), errors='coerce')
            df_to_update.loc[index, "Params_Reduction_vs_Baseline_Percent"] = 0.0
            df_to_update.loc[index, "Baseline_FLOPs_GMACs"] = pd.to_numeric(row_series.get("FLOPs_GMACs"), errors='coerce')
            df_to_update.loc[index, "FLOPs_Reduction_vs_Baseline_Percent"] = 0.0
            df_to_update.loc[index, "Baseline_Inference_Time_ms_CPU"] = pd.to_numeric(row_series.get("Inference_Time_ms_CPU (Batch 1)"), errors='coerce')
            df_to_update.loc[index, "Inference_Speedup_vs_Baseline_CPU"] = 1.0
            if DEVICE.type == 'cuda' and "Baseline_Inference_Time_ms_GPU" in df_to_update.columns:
                df_to_update.loc[index, "Baseline_Inference_Time_ms_GPU"] = pd.to_numeric(row_series.get("Inference_Time_ms_GPU (Batch 1)"), errors='coerce')
                df_to_update.loc[index, "Inference_Speedup_vs_Baseline_GPU"] = 1.0
            continue

        if baseline_arch_to_use not in baseline_metrics_nb or not baseline_metrics_nb[baseline_arch_to_use]:
            print(f"    Warning: Baseline metrics for {baseline_arch_to_use} not found for exp {exp_id}. Skipping relative metrics.")
            continue
        
        current_baseline = baseline_metrics_nb[baseline_arch_to_use]
        # print(f"Exp: {exp_id}, Base_Arch_Model: {base_model_arch_str}, Using Baseline: {baseline_arch_to_use}, Metrics: {current_baseline}")

        # Accuracy
        baseline_acc = pd.to_numeric(current_baseline.get("val_accuracy"), errors='coerce')
        df_to_update.loc[index, "Baseline_Val_Accuracy"] = baseline_acc
        final_acc = pd.to_numeric(row_series.get("Final_Val_Accuracy"), errors='coerce')
        if pd.notna(final_acc) and pd.notna(baseline_acc):
            df_to_update.loc[index, "Accuracy_Change_vs_Baseline_pp"] = (final_acc - baseline_acc) # Already in pp if acc is %
            if baseline_acc != 0: df_to_update.loc[index, "Accuracy_Retention_Percent"] = (final_acc / baseline_acc) * 100
            else: df_to_update.loc[index, "Accuracy_Retention_Percent"] = pd.NA
        
        # Model Size (Disk)
        baseline_size_disk = pd.to_numeric(current_baseline.get("model_size_mb_disk"), errors='coerce')
        df_to_update.loc[index, "Baseline_Model_Size_MB_Disk"] = baseline_size_disk
        model_size_disk = pd.to_numeric(row_series.get("Model_Size_MB_Disk"), errors='coerce')
        if pd.notna(model_size_disk) and pd.notna(baseline_size_disk) and baseline_size_disk != 0:
            df_to_update.loc[index, "Model_Size_Reduction_vs_Baseline_Percent"] = ((baseline_size_disk - model_size_disk) / baseline_size_disk) * 100
        
        # Params
        baseline_params = pd.to_numeric(current_baseline.get("params_millions"), errors='coerce')
        df_to_update.loc[index, "Baseline_Params_Millions"] = baseline_params
        current_params = pd.to_numeric(row_series.get("Params_Millions"), errors='coerce')
        if pd.notna(current_params) and pd.notna(baseline_params) and baseline_params != 0:
            df_to_update.loc[index, "Params_Reduction_vs_Baseline_Percent"] = ((baseline_params - current_params) / baseline_params) * 100

        # FLOPs
        baseline_flops = pd.to_numeric(current_baseline.get("flops_gmacs"), errors='coerce')
        df_to_update.loc[index, "Baseline_FLOPs_GMACs"] = baseline_flops
        current_flops = pd.to_numeric(row_series.get("FLOPs_GMACs"), errors='coerce')
        if pd.notna(current_flops) and pd.notna(baseline_flops) and baseline_flops != 0:
            df_to_update.loc[index, "FLOPs_Reduction_vs_Baseline_Percent"] = ((baseline_flops - current_flops) / baseline_flops) * 100

        # CPU Inference Time
        baseline_infer_cpu = pd.to_numeric(current_baseline.get("inference_cpu_ms"), errors='coerce')
        df_to_update.loc[index, "Baseline_Inference_Time_ms_CPU"] = baseline_infer_cpu
        infer_cpu = pd.to_numeric(row_series.get("Inference_Time_ms_CPU (Batch 1)"), errors='coerce')
        if pd.notna(infer_cpu) and pd.notna(baseline_infer_cpu) and infer_cpu != 0:
            df_to_update.loc[index, "Inference_Speedup_vs_Baseline_CPU"] = baseline_infer_cpu / infer_cpu
        
        # GPU Inference Time
        if DEVICE.type == 'cuda' and "Baseline_Inference_Time_ms_GPU" in df_to_update.columns:
            baseline_infer_gpu = pd.to_numeric(current_baseline.get("inference_gpu_ms"), errors='coerce')
            df_to_update.loc[index, "Baseline_Inference_Time_ms_GPU"] = baseline_infer_gpu
            infer_gpu = pd.to_numeric(row_series.get("Inference_Time_ms_GPU (Batch 1)"), errors='coerce')
            if pd.notna(infer_gpu) and pd.notna(baseline_infer_gpu) and infer_gpu != 0:
                df_to_update.loc[index, "Inference_Speedup_vs_Baseline_GPU"] = baseline_infer_gpu / infer_gpu
    
    print("--- Log-based info added and relative metrics calculated. ---")
    display(df_to_update[['Experiment_ID', 'Final_Val_Accuracy', 'Baseline_Val_Accuracy', 'Accuracy_Retention_Percent']].head())


if not results_df.empty:
    # Ensure all desired columns exist before calculating relative metrics
    desired_columns_final = [
        "Experiment_ID", "Base_Model_Arch", "Optimization_Category", "Specific_Technique", "Key_Parameters",
        "Final_Val_Accuracy", "Accuracy_Drop_From_Best_Epoch_pp", "Accuracy_Before_FT", # Accuracy_Drop_From_Best_Epoch_pp is still N/A
        "Model_Size_MB_Disk", "Model_Size_MB_Log",
        "Params_Millions", "FLOPs_GMACs",
        "Achieved_Sparsity_Percent", "FT_Epochs_Run", "FT_Time_seconds",
        "Inference_Time_ms_CPU (Batch 1)", "Inference_Time_ms_GPU (Batch 1)", "Notes_from_Log",
        "Baseline_Val_Accuracy", "Accuracy_Change_vs_Baseline_pp", "Accuracy_Retention_Percent",
        "Baseline_Model_Size_MB_Disk", "Model_Size_Reduction_vs_Baseline_Percent",
        "Baseline_Params_Millions", "Params_Reduction_vs_Baseline_Percent",
        "Baseline_FLOPs_GMACs", "FLOPs_Reduction_vs_Baseline_Percent",
        "Baseline_Inference_Time_ms_CPU", "Inference_Speedup_vs_Baseline_CPU"
    ]
    if DEVICE.type == 'cuda':
        desired_columns_final.extend(["Baseline_Inference_Time_ms_GPU", "Inference_Speedup_vs_Baseline_GPU"])

    for col in desired_columns_final:
        if col not in results_df.columns:
            results_df[col] = pd.NA # Use pandas' NA

    add_log_based_and_relative_metrics(results_df)
    results_df = results_df.reindex(columns=desired_columns_final) # Reorder and ensure all are present
else:
    print("Skipping log/relative metrics as no experiments were discovered.")


--- Adding Log-Based Information and Calculating Relative Metrics ---
  Populated baseline_metrics_nb for relative calculation: {}
--- Log-based info added and relative metrics calculated. ---


Unnamed: 0_level_0,Experiment_ID,Final_Val_Accuracy,Baseline_Val_Accuracy,Accuracy_Retention_Percent
Experiment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
resnet18_baseline,resnet18_baseline,50.089217,50.089217,100.0
resnet50_baseline,resnet50_baseline,64.950293,64.950293,100.0
resnet18pretrained_distilled_quant_kmeans_256clusters_post,resnet18pretrained_distilled_quant_kmeans_256c...,53.683406,,
resnet18pretrained_distilled_quant_ptq_int8_perchannel_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,50.777466,,
resnet18pretrained_distilled_quant_ptq_int8_pertensor_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,53.352027,,


Cell 11: Final Review and Save to CSV

In [11]:
if not results_df.empty:
    print("\n--- Final DataFrame Review (First 5 rows) ---")
    # Convert relevant columns to numeric, coercing errors for display and consistent CSV.
    # This helps if "N/A" strings are present from errors.
    cols_to_numeric = [
        'Final_Val_Accuracy', 'Model_Size_MB_Disk', 'Model_Size_MB_Log', 
        'Params_Millions', 'FLOPs_GMACs', 'Achieved_Sparsity_Percent',
        'Inference_Time_ms_CPU (Batch 1)', 'Inference_Time_ms_GPU (Batch 1)',
        'Baseline_Val_Accuracy', 'Accuracy_Change_vs_Baseline_pp', 'Accuracy_Retention_Percent',
        'Baseline_Model_Size_MB_Disk', 'Model_Size_Reduction_vs_Baseline_Percent',
        'Baseline_Params_Millions', 'Params_Reduction_vs_Baseline_Percent',
        'Baseline_FLOPs_GMACs', 'FLOPs_Reduction_vs_Baseline_Percent',
        'Baseline_Inference_Time_ms_CPU', 'Inference_Speedup_vs_Baseline_CPU',
        'FT_Epochs_Run', 'FT_Time_seconds', 'Accuracy_Before_FT'
    ]
    if DEVICE.type == 'cuda':
        cols_to_numeric.extend(['Baseline_Inference_Time_ms_GPU', 'Inference_Speedup_vs_Baseline_GPU'])

    for col in cols_to_numeric:
        if col in results_df.columns:
            results_df[col] = pd.to_numeric(results_df[col], errors='coerce')


    # Set float_format for to_string and to_csv
    pd.options.display.float_format = '{:.4f}'.format

    display(results_df.head())
    
    # Save to CSV
    try:
        results_df.to_csv(OUTPUT_CSV_NB, index=False, lineterminator='\n', float_format='%.5f')
        print(f"\n--- Summary saved to {OUTPUT_CSV_NB} ---")
        print(f"Total experiments processed: {len(results_df)}")
    except Exception as e_csv:
        print(f"Error saving CSV: {e_csv}")
else:
    print("DataFrame is empty. Nothing to save.")

print("\n--- Notebook processing finished ---")


--- Final DataFrame Review (First 5 rows) ---


Unnamed: 0_level_0,Experiment_ID,Base_Model_Arch,Optimization_Category,Specific_Technique,Key_Parameters,Final_Val_Accuracy,Accuracy_Drop_From_Best_Epoch_pp,Accuracy_Before_FT,Model_Size_MB_Disk,Model_Size_MB_Log,...,Baseline_Model_Size_MB_Disk,Model_Size_Reduction_vs_Baseline_Percent,Baseline_Params_Millions,Params_Reduction_vs_Baseline_Percent,Baseline_FLOPs_GMACs,FLOPs_Reduction_vs_Baseline_Percent,Baseline_Inference_Time_ms_CPU,Inference_Speedup_vs_Baseline_CPU,Baseline_Inference_Time_ms_GPU,Inference_Speedup_vs_Baseline_GPU
Experiment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
resnet18_baseline,resnet18_baseline,ResNet18,Baseline,Baseline,,50.0892,,,44.6696,,...,44.6696,0.0,11.6895,0.0,1.824,0.0,35.3137,1.0,3.5235,1.0
resnet50_baseline,resnet50_baseline,ResNet50,Baseline,Baseline,,64.9503,,,97.7961,,...,97.7961,0.0,25.557,0.0,4.1337,0.0,88.06,1.0,7.8738,1.0
resnet18pretrained_distilled_quant_kmeans_256clusters_post,resnet18pretrained_distilled_quant_kmeans_256c...,ResNet18,Combined,KMeans Quant,Clusters: 256,53.6834,,,44.6673,44.6673,...,,,,,,,,,,
resnet18pretrained_distilled_quant_ptq_int8_perchannel_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,ResNet18,Combined,PTQ INT8 (Per-Channel),,50.7775,,,11.3021,11.3021,...,,,,,,,,,,
resnet18pretrained_distilled_quant_ptq_int8_pertensor_post,resnet18pretrained_distilled_quant_ptq_int8_pe...,ResNet18,Combined,PTQ INT8 (Per-Tensor),,53.352,,,11.3008,11.3008,...,,,,,,,,,,



--- Summary saved to model_optimization_summary_notebook.csv ---
Total experiments processed: 25

--- Notebook processing finished ---
