# Processing Time Analysis by Prompt Strategy

This notebook analyzes the processing time performance across different prompt strategies:
- **base_version**: Basic version with minimal context
- **with_geom**: Version with geospatial features
- **with_geom_time**: Full version with temporal + geospatial analysis

The analysis focuses on the `processing_time` column from prediction CSV files.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
import os
import gc
import psutil
from collections import defaultdict

warnings.filterwarnings('ignore')
plt.style.use('default')
sns.set_palette("husl")

# ============= CONFIGURATION =============
# MODEL SELECTION - Change this to analyze specific model
MODEL = "qwen2.5_7b"  # Options: qwen2.5_7b, qwen2.5_14b, llama3.1_8b, mixtral_8x7b, deepseek-coder_33b, mistral_7b

# Memory management settings
pd.set_option('mode.chained_assignment', None)
plt.rcParams['figure.max_open_warning'] = 0

# Configuration for complete analysis - NO LIMITS
MAX_FILES_PER_STRATEGY = None  # Load ALL files (9 per strategy = 18 total)
SAMPLE_SIZE = None  # Load ALL records - no sampling
CHUNK_SIZE = 5000  # Larger chunks for better performance with complete dataset

# Set paths
base_path = Path('/leonardo_work/IscrC_LLM-Mob/LLM-Mob-As-Mobility-Interpreter')
results_path = base_path / 'results'

def get_memory_usage():
    """Get current memory usage in MB"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024 / 1024

print(f"=== COMPLETE PROCESSING TIME ANALYSIS FOR MODEL: {MODEL} ===")
print(f"Configuration: Loading ALL files with ALL records (no sampling)")
print(f"Expected: 9 files per strategy × 2 strategies = 18 total files")
print(f"Base path: {base_path}")
print(f"Results path: {results_path}")
print(f"Results path exists: {results_path.exists()}")
print(f"Initial memory usage: {get_memory_usage():.1f} MB")

In [None]:
def load_all_processing_times_complete():
    """
    Load ALL processing times from CSV files for a single model from middle/ and penultimate/ directories
    NO SAMPLING - Complete analysis of all 18 files (9 per strategy)
    Returns: DataFrame with columns [strategy, anchor, dataset, processing_time]
    """
    all_data = []
    files_processed = 0
    
    # Define strategy mapping
    strategy_names = {
        'base_version': 'Base Version',
        'with_geom': 'With Geometry', 
        'with_geom_time': 'With Geometry + Time'
    }
    
    file_counts = defaultdict(int)
    record_counts = defaultdict(int)
    
    # Define anchor directories to search
    anchor_dirs = ['middle', 'penultimate']
    
    print(f"Starting complete data loading for model {MODEL}...")
    print("Loading from both middle/ and penultimate/ directories with NO sampling limits")
    
    try:
        for anchor_type in anchor_dirs:
            anchor_path = results_path / anchor_type / MODEL
            
            if not anchor_path.exists():
                print(f"Warning: Directory {anchor_path} does not exist, skipping...")
                continue
                
            print(f"\n=== Processing {anchor_type.upper()} directory: {anchor_path} ===")
            
            # Look for strategy subdirectories first (expected structure)
            strategy_dirs = [d for d in anchor_path.iterdir() if d.is_dir()]
            print(f"Found strategy directories: {[d.name for d in strategy_dirs]}")
            
            for strategy_dir in strategy_dirs:
                strategy = strategy_dir.name
                strategy_label = strategy_names.get(strategy, strategy)
                
                print(f"\n--- Processing strategy: {strategy_label} ---")
                print(f"    Directory: {strategy_dir}")
                
                # Load ALL CSV files in this strategy directory
                csv_files = list(strategy_dir.glob('*.csv'))
                csv_files = [f for f in csv_files if not f.name.endswith('_checkpoint.txt')]
                
                print(f"    Found {len(csv_files)} CSV files")
                
                # Process ALL files (no limits)
                for csv_file in csv_files:
                    try:
                        records_loaded = process_csv_file_complete(csv_file, MODEL, strategy_label, anchor_type, all_data)
                        files_processed += 1
                        file_counts[f"{strategy_label}_{anchor_type}"] += 1
                        record_counts[strategy_label] += records_loaded
                        
                        print(f"    ✓ {csv_file.name}: {records_loaded:,} records loaded")
                        
                        if files_processed % 3 == 0:
                            print(f"      Progress: {files_processed} files processed. Memory: {get_memory_usage():.1f} MB")
                            gc.collect()
                            
                    except Exception as e:
                        print(f"    ✗ Error loading {csv_file.name}: {e}")
                        continue
            
            # Also check for direct CSV files in anchor directory (backup check)
            direct_csv_files = [f for f in anchor_path.glob('*.csv') if not f.name.endswith('_checkpoint.txt')]
            if direct_csv_files:
                print(f"\nFound {len(direct_csv_files)} direct CSV files in {anchor_type}/")
                for csv_file in direct_csv_files:
                    try:
                        records_loaded = process_csv_file_complete(csv_file, MODEL, 'Unknown', anchor_type, all_data)
                        files_processed += 1
                        record_counts['Unknown'] += records_loaded
                        print(f"    ✓ Direct file {csv_file.name}: {records_loaded:,} records loaded")
                    except Exception as e:
                        print(f"    ✗ Error loading direct file {csv_file.name}: {e}")
                        continue
    
    except Exception as e:
        print(f"Critical error during data loading: {e}")
        return pd.DataFrame()
    
    print(f"\n=== LOADING SUMMARY FOR {MODEL} ===")
    print(f"Total files processed: {files_processed}")
    print(f"File count by strategy-anchor combination:")
    for combo, count in file_counts.items():
        print(f"  {combo}: {count} files")
    print(f"\nRecord count by strategy:")
    total_records = 0
    for strategy, count in record_counts.items():
        print(f"  {strategy}: {count:,} records")
        total_records += count
    print(f"  TOTAL: {total_records:,} records")
    
    return pd.DataFrame(all_data)

def process_csv_file_complete(csv_file, model_name, strategy_label, anchor_type, all_data):
    """Process a single CSV file and extract ALL processing time data - NO sampling"""
    total_records = 0
    
    try:
        # Read file in chunks to manage memory efficiently
        chunk_iter = pd.read_csv(csv_file, chunksize=CHUNK_SIZE, 
                               on_bad_lines='skip', engine='python')
        
        for chunk in chunk_iter:
            if 'processing_time' not in chunk.columns:
                continue
            
            # Filter valid processing times - keep ALL valid records
            valid_chunk = chunk.dropna(subset=['processing_time'])
            
            if len(valid_chunk) == 0:
                continue
            
            # Extract dataset name
            dataset_name = csv_file.stem.split('_pred_')[0] if '_pred_' in csv_file.stem else csv_file.stem
            
            # Add ALL records to collection (no sampling)
            for _, row in valid_chunk.iterrows():
                all_data.append({
                    'strategy': strategy_label,
                    'model': model_name,
                    'anchor': anchor_type,
                    'dataset': dataset_name,
                    'file': csv_file.name,
                    'processing_time': float(row['processing_time'])
                })
            
            total_records += len(valid_chunk)
        
        return total_records
        
    except Exception as e:
        raise Exception(f"Failed to process {csv_file.name}: {e}")

# Load complete data for selected model
print(f"Loading ALL processing times for model: {MODEL}")
print("Searching in middle/ and penultimate/ directories...")
print("NO LIMITS - Complete dataset analysis")

try:
    df_times = load_all_processing_times_complete()
    
    if len(df_times) == 0:
        print(f"ERROR: No data loaded for model {MODEL}. Check if the model directory exists in middle/ or penultimate/")
        print("Available directories:")
        for anchor in ['middle', 'penultimate']:
            anchor_path = results_path / anchor
            if anchor_path.exists():
                print(f"  {anchor}/: {[d.name for d in anchor_path.iterdir() if d.is_dir()]}")
    else:
        print(f"\n🎉 COMPLETE DATA LOADING SUCCESS! 🎉")
        print(f"Loaded {len(df_times):,} processing time records for model {MODEL}")
        print(f"Strategies found: {sorted(df_times['strategy'].unique())}")
        print(f"Anchor types found: {sorted(df_times['anchor'].unique())}")
        if 'dataset' in df_times.columns:
            print(f"Datasets found: {len(df_times['dataset'].unique())}")
        print(f"Memory usage after complete loading: {get_memory_usage():.1f} MB")
        
        # Force garbage collection
        gc.collect()
        
except Exception as e:
    print(f"CRITICAL ERROR: Failed to load data for model {MODEL}: {e}")
    df_times = pd.DataFrame()  # Empty dataframe to prevent further errors

In [None]:
# Data overview with safety checks - optimized for single model
if len(df_times) == 0:
    print(f"ERROR: No data available for model {MODEL}. Please check the data loading step.")
else:
    print(f"=== DATA OVERVIEW FOR MODEL {MODEL} ===")
    print(f"Total records: {len(df_times):,}")
    print(f"Processing time range: {df_times['processing_time'].min():.2f} - {df_times['processing_time'].max():.2f} seconds")
    print(f"Mean processing time: {df_times['processing_time'].mean():.2f} seconds")
    print(f"Median processing time: {df_times['processing_time'].median():.2f} seconds")

    print("\n=== RECORDS PER STRATEGY ===")
    strategy_counts = df_times['strategy'].value_counts()
    for strategy, count in strategy_counts.items():
        print(f"{strategy}: {count:,} records")
        
    print("\n=== RECORDS PER ANCHOR TYPE ===")
    anchor_counts = df_times['anchor'].value_counts()
    for anchor, count in anchor_counts.items():
        print(f"{anchor}: {count:,} records")

    print("\n=== BASIC STATISTICS BY STRATEGY ===")
    strategy_stats = df_times.groupby('strategy')['processing_time'].agg([
        'count', 'mean', 'median', 'std', 'min', 'max'
    ]).round(3)
    print(strategy_stats)
    
    # Add anchor type comparison
    if len(df_times['anchor'].unique()) > 1:
        print("\n=== BASIC STATISTICS BY ANCHOR TYPE ===")
        anchor_stats = df_times.groupby('anchor')['processing_time'].agg([
            'count', 'mean', 'median', 'std', 'min', 'max'
        ]).round(3)
        print(anchor_stats)
    
    print(f"\nMemory usage: {get_memory_usage():.1f} MB")

In [None]:
# Processing Time Distribution by Strategy - Optimized for single model
if len(df_times) == 0:
    print("No data available for visualization.")
else:
    # Enhanced visualization for single model analysis
    fig = plt.figure(figsize=(15, 12))
    
    try:
        # Create a 2x3 grid for comprehensive analysis
        gs = fig.add_gridspec(3, 2, hspace=0.3, wspace=0.3)
        
        # 1. Box plot by strategy
        ax1 = fig.add_subplot(gs[0, 0])
        sns.boxplot(data=df_times, x='strategy', y='processing_time', ax=ax1)
        ax1.set_title(f'Processing Time Distribution by Strategy\n({MODEL})')
        ax1.set_xlabel('Strategy')
        ax1.set_ylabel('Processing Time (seconds)')
        ax1.tick_params(axis='x', rotation=45)
        
        # 2. Box plot by anchor type (if available)
        ax2 = fig.add_subplot(gs[0, 1])
        if len(df_times['anchor'].unique()) > 1:
            sns.boxplot(data=df_times, x='anchor', y='processing_time', ax=ax2)
            ax2.set_title('Processing Time by Anchor Type')
            ax2.set_xlabel('Anchor Type')
            ax2.set_ylabel('Processing Time (seconds)')
        else:
            # Show histogram if only one anchor type
            df_times['processing_time'].hist(bins=50, ax=ax2)
            ax2.set_title('Processing Time Distribution')
            ax2.set_xlabel('Processing Time (seconds)')
            ax2.set_ylabel('Frequency')

        # 3. Mean processing time bar chart
        ax3 = fig.add_subplot(gs[1, 0])
        strategy_means = df_times.groupby('strategy')['processing_time'].mean().sort_values()
        bars = ax3.bar(range(len(strategy_means)), strategy_means.values, 
                      color=['#ff9999', '#66b3ff', '#99ff99'][:len(strategy_means)])
        ax3.set_title('Mean Processing Time by Strategy')
        ax3.set_xlabel('Strategy')
        ax3.set_ylabel('Mean Processing Time (seconds)')
        ax3.set_xticks(range(len(strategy_means)))
        ax3.set_xticklabels(strategy_means.index, rotation=45, ha='right')
        
        # Add value labels on bars
        for i, bar in enumerate(bars):
            height = bar.get_height()
            ax3.text(bar.get_x() + bar.get_width()/2., height,
                     f'{height:.2f}s', ha='center', va='bottom', fontsize=9)
        
        # 4. Processing rate (predictions per second)
        ax4 = fig.add_subplot(gs[1, 1])
        processing_rates = 1 / strategy_means
        bars4 = ax4.bar(range(len(processing_rates)), processing_rates.values, 
                       color=['#ffcc99', '#c2c2f0', '#ccffcc'][:len(processing_rates)])
        ax4.set_title('Processing Rate by Strategy')
        ax4.set_xlabel('Strategy')
        ax4.set_ylabel('Predictions/Second')
        ax4.set_xticks(range(len(processing_rates)))
        ax4.set_xticklabels(processing_rates.index, rotation=45, ha='right')
        
        # Add value labels
        for i, bar in enumerate(bars4):
            height = bar.get_height()
            ax4.text(bar.get_x() + bar.get_width()/2., height,
                     f'{height:.3f}', ha='center', va='bottom', fontsize=9)
        
        # 5. Strategy comparison pie chart
        ax5 = fig.add_subplot(gs[2, 0])
        strategy_counts = df_times['strategy'].value_counts()
        colors = plt.cm.Set3(np.linspace(0, 1, len(strategy_counts)))
        ax5.pie(strategy_counts.values, labels=strategy_counts.index, autopct='%1.1f%%', 
               startangle=90, colors=colors)
        ax5.set_title('Data Distribution by Strategy')
        
        # 6. Processing time violin plot
        ax6 = fig.add_subplot(gs[2, 1])
        sns.violinplot(data=df_times, x='strategy', y='processing_time', ax=ax6)
        ax6.set_title('Processing Time Distribution (Violin Plot)')
        ax6.set_xlabel('Strategy')
        ax6.set_ylabel('Processing Time (seconds)')
        ax6.tick_params(axis='x', rotation=45)
        
        plt.suptitle(f'Processing Time Analysis - Model: {MODEL}', fontsize=16, y=0.98)
        plt.show()
        
        # Force cleanup
        plt.close()
        gc.collect()
        print(f"Memory usage after visualization: {get_memory_usage():.1f} MB")
        
    except Exception as e:
        print(f"Error creating visualization: {e}")
        plt.close('all')

In [None]:
# Detailed Statistics Table - Complete Dataset Analysis
if len(df_times) == 0:
    print("No data available for statistical analysis.")
else:
    print(f"=== COMPLETE PROCESSING TIME STATISTICS FOR {MODEL} ===")
    print(f"Total records analyzed: {len(df_times):,}")

    # Detailed statistics by strategy
    detailed_stats = df_times.groupby('strategy')['processing_time'].agg([
        'count',
        'mean',
        'median', 
        'std',
        'min',
        ('q25', lambda x: x.quantile(0.25)),
        ('q75', lambda x: x.quantile(0.75)),
        'max',
        ('range', lambda x: x.max() - x.min()),
        ('cv', lambda x: x.std() / x.mean())  # Coefficient of variation
    ]).round(4)

    print("\n--- Processing Time Statistics by Strategy ---")
    print(detailed_stats)
    
    # Add efficiency metrics
    print("\n--- Efficiency Metrics ---")
    efficiency_stats = df_times.groupby('strategy')['processing_time'].agg(['mean', 'count']).round(4)
    efficiency_stats['predictions_per_second'] = (1 / efficiency_stats['mean']).round(4)
    efficiency_stats['records_per_hour'] = (3600 / efficiency_stats['mean']).round(0)
    print(efficiency_stats)

    # Statistics by anchor type if available
    if len(df_times['anchor'].unique()) > 1:
        print(f"\n--- Processing Time Statistics by Anchor Type ---")
        anchor_stats = df_times.groupby('anchor')['processing_time'].agg([
            'count', 'mean', 'median', 'std', 'min', 'max'
        ]).round(4)
        print(anchor_stats)
        
        # Cross-tabulation: Strategy × Anchor
        print(f"\n--- Strategy × Anchor Cross-Analysis ---")
        cross_stats = df_times.groupby(['strategy', 'anchor'])['processing_time'].agg([
            'count', 'mean', 'std'
        ]).round(4)
        print(cross_stats)

    # Dataset-level analysis if available
    if 'dataset' in df_times.columns and len(df_times['dataset'].unique()) > 1:
        print(f"\n--- Dataset Performance Analysis ---")
        dataset_stats = df_times.groupby('dataset')['processing_time'].agg([
            'count', 'mean', 'median', 'std'
        ]).round(4).sort_values('mean', ascending=False)
        
        print("Top 10 slowest datasets:")
        print(dataset_stats.head(10))
        
        print("\nTop 10 fastest datasets:")
        print(dataset_stats.tail(10))

    # Statistical significance tests - only if we have multiple strategies
    strategies = df_times['strategy'].unique()
    
    if len(strategies) > 1:
        try:
            from scipy import stats

            groups = [df_times[df_times['strategy'] == s]['processing_time'].values for s in strategies]

            # Perform one-way ANOVA
            f_stat, p_value = stats.f_oneway(*groups)

            print(f"\n--- Statistical Significance Test ---")
            print(f"One-way ANOVA F-statistic: {f_stat:.4f}")
            print(f"P-value: {p_value:.2e}")
            print(f"Significant difference between strategies: {'Yes' if p_value < 0.05 else 'No'}")

            # Effect size (eta-squared)
            total_sum_squares = sum([(group - df_times['processing_time'].mean())**2 for group in groups for _ in group])
            between_sum_squares = sum([len(group) * (group.mean() - df_times['processing_time'].mean())**2 for group in groups])
            eta_squared = between_sum_squares / total_sum_squares
            print(f"Effect size (η²): {eta_squared:.4f}")
            
            if eta_squared < 0.01:
                effect_size = "Small"
            elif eta_squared < 0.06:
                effect_size = "Medium"  
            else:
                effect_size = "Large"
            print(f"Effect size interpretation: {effect_size}")

            # Pairwise comparisons with Bonferroni correction
            if len(strategies) > 2:
                print(f"\n--- Pairwise Comparisons (Bonferroni corrected) ---")
                from itertools import combinations
                
                alpha = 0.05
                n_comparisons = len(list(combinations(strategies, 2)))
                bonferroni_alpha = alpha / n_comparisons
                
                print(f"Bonferroni corrected alpha: {bonferroni_alpha:.4f}")
                
                for i, strategy1 in enumerate(strategies):
                    for j, strategy2 in enumerate(strategies):
                        if i < j:
                            group1 = df_times[df_times['strategy'] == strategy1]['processing_time']
                            group2 = df_times[df_times['strategy'] == strategy2]['processing_time']
                            
                            t_stat, p_val = stats.ttest_ind(group1, group2)
                            is_significant = p_val < bonferroni_alpha
                            
                            mean_diff = group1.mean() - group2.mean()
                            cohen_d = mean_diff / np.sqrt((group1.var() + group2.var()) / 2)
                            
                            print(f"{strategy1} vs {strategy2}:")
                            print(f"  Mean difference: {mean_diff:.4f}s")
                            print(f"  t-statistic: {t_stat:.4f}")
                            print(f"  p-value: {p_val:.6f}")
                            print(f"  Significant: {'Yes' if is_significant else 'No'}")
                            print(f"  Cohen's d: {cohen_d:.4f}")
                            print()
                            
        except ImportError:
            print("scipy not available - skipping statistical tests")
        except Exception as e:
            print(f"Could not perform statistical tests: {e}")
    else:
        print("Only one strategy found - no statistical comparison possible.")
    
    print(f"\nMemory usage: {get_memory_usage():.1f} MB")

In [None]:
# Strategy and Anchor Comparison - Enhanced for single model
if len(df_times) == 0:
    print("No data available for strategy and anchor analysis.")
else:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    try:
        # 1. Strategy vs Anchor heatmap (if both dimensions available)
        if len(df_times['anchor'].unique()) > 1 and len(df_times['strategy'].unique()) > 1:
            strategy_anchor_stats = df_times.groupby(['strategy', 'anchor'])['processing_time'].mean().unstack()
            sns.heatmap(strategy_anchor_stats, annot=True, fmt='.2f', cmap='YlOrRd', ax=axes[0,0])
            axes[0,0].set_title(f'Mean Processing Time by Strategy and Anchor\n({MODEL})')
            axes[0,0].set_xlabel('Anchor Type')
            axes[0,0].set_ylabel('Strategy')
        else:
            # Show just strategy means if only one anchor type
            strategy_means = df_times.groupby('strategy')['processing_time'].mean()
            bars = axes[0,0].bar(range(len(strategy_means)), strategy_means.values)
            axes[0,0].set_title(f'Mean Processing Time by Strategy\n({MODEL})')
            axes[0,0].set_xlabel('Strategy')
            axes[0,0].set_ylabel('Processing Time (seconds)')
            axes[0,0].set_xticks(range(len(strategy_means)))
            axes[0,0].set_xticklabels(strategy_means.index, rotation=45, ha='right')

        # 2. Processing time comparison by strategy and anchor
        if len(df_times['anchor'].unique()) > 1:
            sns.boxplot(data=df_times, x='strategy', y='processing_time', hue='anchor', ax=axes[0,1])
            axes[0,1].set_title('Processing Time by Strategy and Anchor')
            axes[0,1].set_xlabel('Strategy')
            axes[0,1].set_ylabel('Processing Time (seconds)')
            axes[0,1].tick_params(axis='x', rotation=45)
            axes[0,1].legend(title='Anchor')
        else:
            # Show violin plot if only one anchor
            sns.violinplot(data=df_times, x='strategy', y='processing_time', ax=axes[0,1])
            axes[0,1].set_title('Processing Time Distribution by Strategy')
            axes[0,1].set_xlabel('Strategy')
            axes[0,1].set_ylabel('Processing Time (seconds)')
            axes[0,1].tick_params(axis='x', rotation=45)

        # 3. Efficiency comparison
        ax3 = axes[1,0]
        strategy_stats = df_times.groupby('strategy')['processing_time'].agg(['mean', 'std'])
        x_pos = np.arange(len(strategy_stats))
        bars = ax3.bar(x_pos, strategy_stats['mean'], yerr=strategy_stats['std'], capsize=5)
        ax3.set_title('Processing Time with Standard Deviation')
        ax3.set_xlabel('Strategy')
        ax3.set_ylabel('Processing Time (seconds)')
        ax3.set_xticks(x_pos)
        ax3.set_xticklabels(strategy_stats.index, rotation=45, ha='right')
        
        # Add value labels
        for i, bar in enumerate(bars):
            height = bar.get_height()
            ax3.text(bar.get_x() + bar.get_width()/2., height,
                     f'{height:.2f}±{strategy_stats.iloc[i]["std"]:.2f}', 
                     ha='center', va='bottom', fontsize=8)

        # 4. Dataset analysis (if available)
        ax4 = axes[1,1]
        if 'dataset' in df_times.columns and len(df_times['dataset'].unique()) > 1:
            dataset_means = df_times.groupby('dataset')['processing_time'].mean().sort_values()
            top_datasets = dataset_means.tail(10)  # Show top 10 slowest datasets
            bars = ax4.bar(range(len(top_datasets)), top_datasets.values)
            ax4.set_title('Top 10 Slowest Datasets')
            ax4.set_xlabel('Dataset')
            ax4.set_ylabel('Mean Processing Time (seconds)')
            ax4.set_xticks(range(len(top_datasets)))
            ax4.set_xticklabels(top_datasets.index, rotation=45, ha='right')
        else:
            # Show cumulative distribution
            sorted_times = np.sort(df_times['processing_time'])
            y_vals = np.arange(len(sorted_times)) / float(len(sorted_times))
            ax4.plot(sorted_times, y_vals)
            ax4.set_title('Cumulative Distribution of Processing Times')
            ax4.set_xlabel('Processing Time (seconds)')
            ax4.set_ylabel('Cumulative Probability')
            ax4.grid(True, alpha=0.3)

        plt.tight_layout()
        plt.show()
        
        # Print detailed comparison if multiple anchor types
        if len(df_times['anchor'].unique()) > 1:
            print(f"\n=== STRATEGY-ANCHOR COMBINATIONS FOR {MODEL} ===")
            combo_stats = df_times.groupby(['strategy', 'anchor'])['processing_time'].agg(['count', 'mean', 'std']).round(3)
            print(combo_stats)
        
        # Force cleanup
        plt.close()
        gc.collect()
        print(f"\nMemory usage after analysis: {get_memory_usage():.1f} MB")
        
    except Exception as e:
        print(f"Error creating strategy-anchor analysis: {e}")
        plt.close('all')

In [None]:
# Performance Summary Report
print("=== PROCESSING TIME PERFORMANCE SUMMARY ===")
print("\nStrategy Performance Ranking (by mean processing time):")

strategy_ranking = df_times.groupby('strategy')['processing_time'].agg([
    'mean', 'median', 'count'
]).sort_values('mean')

for i, (strategy, stats) in enumerate(strategy_ranking.iterrows(), 1):
    print(f"{i}. {strategy}:")
    print(f"   Mean: {stats['mean']:.2f}s")
    print(f"   Median: {stats['median']:.2f}s")
    print(f"   Records: {stats['count']:,}")
    print()

# Efficiency metrics
print("\n=== EFFICIENCY ANALYSIS ===")
base_mean = strategy_ranking.loc['Base Version', 'mean'] if 'Base Version' in strategy_ranking.index else None

if base_mean:
    print(f"Base Version mean processing time: {base_mean:.2f}s")
    print("\nOverhead compared to Base Version:")
    
    for strategy, stats in strategy_ranking.iterrows():
        if strategy != 'Base Version':
            overhead = ((stats['mean'] - base_mean) / base_mean) * 100
            print(f"{strategy}: +{overhead:.1f}% ({stats['mean'] - base_mean:.2f}s additional)")

# Processing rate (predictions per second)
print("\n=== PROCESSING RATE ===")
for strategy, stats in strategy_ranking.iterrows():
    rate = 1 / stats['mean']
    print(f"{strategy}: {rate:.3f} predictions/second")

In [None]:
# Export summary statistics to CSV - Optimized for single model
if len(df_times) == 0:
    print("No data available for export.")
else:
    try:
        output_path = base_path / 'notebook' / f'processing_time_summary_{MODEL}.csv'

        # Prepare detailed summary for single model
        summary_data = []

        for strategy in df_times['strategy'].unique():
            strategy_data = df_times[df_times['strategy'] == strategy]['processing_time']
            
            base_stats = {
                'model': MODEL,
                'strategy': strategy,
                'count': len(strategy_data),
                'mean': strategy_data.mean(),
                'median': strategy_data.median(),
                'std': strategy_data.std(),
                'min': strategy_data.min(),
                'max': strategy_data.max(),
                'q25': strategy_data.quantile(0.25),
                'q75': strategy_data.quantile(0.75),
                'predictions_per_second': 1 / strategy_data.mean() if strategy_data.mean() > 0 else 0
            }
            
            # Add anchor-specific statistics if multiple anchors
            if len(df_times['anchor'].unique()) > 1:
                for anchor in df_times['anchor'].unique():
                    anchor_data = df_times[(df_times['strategy'] == strategy) & 
                                         (df_times['anchor'] == anchor)]['processing_time']
                    
                    if len(anchor_data) > 0:
                        anchor_stats = base_stats.copy()
                        anchor_stats.update({
                            'anchor': anchor,
                            'count': len(anchor_data),
                            'mean': anchor_data.mean(),
                            'median': anchor_data.median(),
                            'std': anchor_data.std(),
                            'predictions_per_second': 1 / anchor_data.mean() if anchor_data.mean() > 0 else 0
                        })
                        summary_data.append(anchor_stats)
            else:
                base_stats['anchor'] = df_times['anchor'].iloc[0]
                summary_data.append(base_stats)

        summary_df = pd.DataFrame(summary_data)
        summary_df.to_csv(output_path, index=False)

        print(f"Summary statistics exported to: {output_path}")
        print(f"\nSummary table for {MODEL}:")
        display_cols = ['strategy', 'anchor', 'count', 'mean', 'median', 'std', 'predictions_per_second']
        available_cols = [col for col in display_cols if col in summary_df.columns]
        print(summary_df[available_cols].round(3))
        
        # Export additional dataset-level statistics if available
        if 'dataset' in df_times.columns and len(df_times['dataset'].unique()) > 1:
            dataset_output_path = base_path / 'notebook' / f'dataset_processing_times_{MODEL}.csv'
            dataset_stats = df_times.groupby(['dataset', 'strategy'])['processing_time'].agg([
                'count', 'mean', 'median', 'std', 'min', 'max'
            ]).round(3).reset_index()
            dataset_stats.to_csv(dataset_output_path, index=False)
            print(f"Dataset-level statistics exported to: {dataset_output_path}")
        
        # Force cleanup
        del summary_data, summary_df
        gc.collect()
        print(f"\nMemory usage: {get_memory_usage():.1f} MB")
        
    except Exception as e:
        print(f"Error exporting summary: {e}")

In [None]:
# Final Summary Report - Optimized for single model
if len(df_times) == 0:
    print(f"No data available for final analysis of model {MODEL}.")
else:
    print(f"=== PROCESSING TIME PERFORMANCE SUMMARY FOR {MODEL} ===")
    print(f"\nStrategy Performance Ranking (by mean processing time):")

    try:
        strategy_ranking = df_times.groupby('strategy')['processing_time'].agg([
            'mean', 'median', 'count', 'std'
        ]).sort_values('mean')

        for i, (strategy, stats) in enumerate(strategy_ranking.iterrows(), 1):
            efficiency_ratio = 1 / stats['mean'] if stats['mean'] > 0 else 0
            print(f"{i}. {strategy}:")
            print(f"   Mean: {stats['mean']:.2f}s (±{stats['std']:.2f}s)")
            print(f"   Median: {stats['median']:.2f}s")
            print(f"   Records: {stats['count']:,}")
            print(f"   Efficiency: {efficiency_ratio:.3f} predictions/second")
            print()

        # Efficiency analysis
        print(f"\n=== EFFICIENCY ANALYSIS FOR {MODEL} ===")
        base_mean = strategy_ranking.loc['Base Version', 'mean'] if 'Base Version' in strategy_ranking.index else None

        if base_mean:
            print(f"Base Version mean processing time: {base_mean:.2f}s")
            print(f"Overhead compared to Base Version:")
            
            for strategy, stats in strategy_ranking.iterrows():
                if strategy != 'Base Version':
                    overhead = ((stats['mean'] - base_mean) / base_mean) * 100
                    additional_time = stats['mean'] - base_mean
                    print(f"  {strategy}: +{overhead:.1f}% (+{additional_time:.2f}s)")
                    
            # Calculate efficiency loss
            base_rate = 1 / base_mean
            print(f"\nThroughput comparison:")
            print(f"  Base Version: {base_rate:.3f} predictions/second")
            for strategy, stats in strategy_ranking.iterrows():
                if strategy != 'Base Version':
                    strategy_rate = 1 / stats['mean']
                    rate_loss = ((base_rate - strategy_rate) / base_rate) * 100
                    print(f"  {strategy}: {strategy_rate:.3f} predictions/second (-{rate_loss:.1f}%)")
        else:
            print("Base Version not found - showing absolute performance:")
            for strategy, stats in strategy_ranking.iterrows():
                rate = 1 / stats['mean']
                print(f"  {strategy}: {rate:.3f} predictions/second")

        # Anchor type analysis if available
        if len(df_times['anchor'].unique()) > 1:
            print(f"\n=== ANCHOR TYPE COMPARISON FOR {MODEL} ===")
            anchor_ranking = df_times.groupby('anchor')['processing_time'].agg([
                'mean', 'median', 'count'
            ]).sort_values('mean')
            
            for anchor, stats in anchor_ranking.iterrows():
                rate = 1 / stats['mean']
                print(f"{anchor}: {stats['mean']:.2f}s avg ({rate:.3f} pred/sec, {stats['count']:,} records)")

        # Statistical summary
        print(f"\n=== STATISTICAL SUMMARY FOR {MODEL} ===")
        overall_stats = df_times['processing_time'].describe()
        print(f"Overall processing time statistics:")
        print(f"  Count: {overall_stats['count']:,.0f}")
        print(f"  Mean: {overall_stats['mean']:.2f}s")
        print(f"  Std: {overall_stats['std']:.2f}s")
        print(f"  Min: {overall_stats['min']:.2f}s")
        print(f"  25th percentile: {overall_stats['25%']:.2f}s")
        print(f"  Median: {overall_stats['50%']:.2f}s")
        print(f"  75th percentile: {overall_stats['75%']:.2f}s")
        print(f"  Max: {overall_stats['max']:.2f}s")

        print(f"\n=== ANALYSIS COMPLETE FOR {MODEL} ===")
        print(f"Total processing time records analyzed: {len(df_times):,}")
        print(f"Strategies analyzed: {', '.join(sorted(df_times['strategy'].unique()))}")
        print(f"Anchor types analyzed: {', '.join(sorted(df_times['anchor'].unique()))}")
        if 'dataset' in df_times.columns:
            print(f"Datasets covered: {len(df_times['dataset'].unique())}")
        print(f"Final memory usage: {get_memory_usage():.1f} MB")
        
        # Final cleanup
        gc.collect()
        
    except Exception as e:
        print(f"Error in final analysis: {e}")

# Memory cleanup at end
try:
    plt.close('all')  # Close any remaining plots
    gc.collect()
    print(f"\nFinal cleanup complete. Memory usage: {get_memory_usage():.1f} MB")
    print(f"\nTo analyze a different model, change the MODEL variable in the first cell and rerun the notebook.")
    print(f"Available models: qwen2.5_7b, qwen2.5_14b, llama3.1_8b, mixtral_8x7b, deepseek-coder_33b, mistral_7b")
except:
    pass