# Processing Time Analysis by Prompt Strategy

This notebook analyzes the processing time performance across different prompt strategies:
- **base_version**: Basic version with minimal context
- **with_geom**: Version with geospatial features  
- **base_version_second_try**: Base version alternative implementation (found in penultimate/)

The analysis focuses on the `processing_time` column from prediction CSV files.

**Current Status**: Analysis focuses on base_version and with_geom strategies for comprehensive performance comparison.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
import os
import gc
import psutil
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, List, Tuple, Optional
import pickle
import hashlib
from tqdm import tqdm

warnings.filterwarnings('ignore')
plt.style.use('default')
sns.set_palette("husl")

# ============= ENHANCED CONFIGURATION =============
# MODEL SELECTION - Change this to analyze specific model
MODEL = "qwen2.5_7b"  # Options: qwen2.5_7b, qwen2.5_14b, llama3.1_8b, mixtral_8x7b, deepseek-coder_33b, mistral_7b

# Strategy configuration - UPDATED: Only analyzing available strategies
EXPECTED_STRATEGIES = {
    'base_version': 'Base Version',
    'with_geom': 'With Geometry'
}

# Additional strategies that may only exist in specific anchors
OPTIONAL_STRATEGIES = {
    'base_version_second_try': 'Base Version (Second Try)'  # Only in penultimate/
}

# Anchor configuration - now configurable  
ANCHOR_DIRS = ['middle', 'penultimate']

# Performance configuration
MAX_WORKERS = 4  # Parallel file processing
BATCH_SIZE = 1000  # Records per batch before DataFrame creation
CHUNK_SIZE = 5000  # CSV reading chunk size
PROGRESS_UPDATE_INTERVAL = 5  # Files between progress updates

# Cache configuration
USE_CACHE = True
CACHE_DIR = Path('cache')

# Memory management settings
pd.set_option('mode.chained_assignment', None)
plt.rcParams['figure.max_open_warning'] = 0

# Set paths
base_path = Path('/leonardo_work/IscrC_LLM-Mob/LLM-Mob-As-Mobility-Interpreter')
results_path = base_path / 'results'
cache_path = base_path / 'notebook' / CACHE_DIR

# Ensure cache directory exists
cache_path.mkdir(parents=True, exist_ok=True)

def get_memory_usage():
    """Get current memory usage in MB"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / 1024 / 1024

def get_cache_key(model: str) -> str:
    """Generate cache key for model data"""
    # Include file modification times in cache key for invalidation
    cache_inputs = [model, str(EXPECTED_STRATEGIES), str(OPTIONAL_STRATEGIES), str(ANCHOR_DIRS)]
    return hashlib.md5(''.join(cache_inputs).encode()).hexdigest()

print(f"=== ENHANCED PROCESSING TIME ANALYSIS FOR MODEL: {MODEL} ===")
print(f"Configuration:")
print(f"  - Parallel workers: {MAX_WORKERS}")
print(f"  - Batch processing: {BATCH_SIZE} records per batch")
print(f"  - Cache enabled: {USE_CACHE}")
print(f"  - Core strategies: {len(EXPECTED_STRATEGIES)} ({', '.join(EXPECTED_STRATEGIES.keys())})")
print(f"  - Optional strategies: {len(OPTIONAL_STRATEGIES)} ({', '.join(OPTIONAL_STRATEGIES.keys())})")
print(f"  - Expected anchors: {len(ANCHOR_DIRS)} ({', '.join(ANCHOR_DIRS)})")
print(f"Directory structure:")
for anchor in ANCHOR_DIRS:
    print(f"  - {anchor}/MODEL/{{base_version,with_geom}} + optional strategies")
print(f"Estimated files:")
print(f"  - middle: 2 core strategies (base_version, with_geom)")
print(f"  - penultimate: 3 strategies (+ base_version_second_try)")
print(f"  - Total: ~40+ files (varies by strategy)")
print(f"Base path: {base_path}")
print(f"Results path: {results_path}")
print(f"Cache path: {cache_path}")
print(f"Results path exists: {results_path.exists()}")
print(f"✅ Focus on base_version and with_geom strategies for performance analysis")
print(f"Initial memory usage: {get_memory_usage():.1f} MB")

In [None]:
# ============= ENHANCED DATA LOADING WITH OPTIMIZATIONS =============

class ProcessingTimeAnalyzer:
    """Enhanced processing time analyzer with caching, validation and parallel processing"""
    
    def __init__(self, model: str, base_path: Path):
        self.model = model
        self.base_path = base_path
        self.results_path = base_path / 'results'
        self.cache_path = cache_path
        self.file_stats = defaultdict(int)
        self.record_stats = defaultdict(int)
        
    def validate_path_structure(self) -> Tuple[bool, List[str]]:
        """Validate that the expected directory structure exists"""
        validation_errors = []
        
        if not self.results_path.exists():
            validation_errors.append(f"Results directory not found: {self.results_path}")
            return False, validation_errors
            
        all_strategies = {**EXPECTED_STRATEGIES, **OPTIONAL_STRATEGIES}
        
        for anchor_type in ANCHOR_DIRS:
            model_path = self.results_path / anchor_type / self.model
            
            if not model_path.exists():
                validation_errors.append(f"Model directory not found: {model_path}")
                continue
                
            # Check for strategy directories
            existing_strategies = {d.name for d in model_path.iterdir() if d.is_dir()}
            
            # Core strategies validation - require at least these in middle
            if anchor_type == 'middle':
                missing_core = set(EXPECTED_STRATEGIES.keys()) - existing_strategies
                if missing_core:
                    validation_errors.append(f"Missing core strategies in {model_path}: {missing_core}")
            
            # Check for CSV files in each existing strategy
            for strategy in existing_strategies:
                if strategy in all_strategies:  # Only check known strategies
                    strategy_path = model_path / strategy
                    csv_files = list(strategy_path.glob('*.csv'))
                    csv_files = [f for f in csv_files if not f.name.endswith('_checkpoint.txt')]
                    
                    if not csv_files:
                        validation_errors.append(f"No CSV files found in {strategy_path}")
                        
        return len(validation_errors) == 0, validation_errors
    
    def get_cache_filepath(self) -> Path:
        """Get cache file path for current model"""
        cache_key = get_cache_key(self.model)
        return self.cache_path / f"processing_times_{self.model}_{cache_key}.pkl"
        
    def save_to_cache(self, data: pd.DataFrame) -> None:
        """Save processed data to cache"""
        if not USE_CACHE:
            return
            
        cache_file = self.get_cache_filepath()
        try:
            with open(cache_file, 'wb') as f:
                pickle.dump({
                    'data': data,
                    'model': self.model,
                    'timestamp': pd.Timestamp.now(),
                    'file_stats': dict(self.file_stats),
                    'record_stats': dict(self.record_stats)
                }, f)
            print(f"✅ Data cached to: {cache_file}")
        except Exception as e:
            print(f"⚠️ Failed to save cache: {e}")
    
    def load_from_cache(self) -> Optional[pd.DataFrame]:
        """Load processed data from cache if available"""
        if not USE_CACHE:
            return None
            
        cache_file = self.get_cache_filepath()
        if not cache_file.exists():
            return None
            
        try:
            with open(cache_file, 'rb') as f:
                cached = pickle.load(f)
                
            print(f"✅ Loaded from cache: {cache_file}")
            print(f"   Cache timestamp: {cached['timestamp']}")
            print(f"   Cached records: {len(cached['data']):,}")
            
            # Restore stats
            self.file_stats = defaultdict(int, cached.get('file_stats', {}))
            self.record_stats = defaultdict(int, cached.get('record_stats', {}))
            
            return cached['data']
            
        except Exception as e:
            print(f"⚠️ Failed to load cache: {e}")
            return None
    
    def process_single_file(self, file_info: Dict) -> Tuple[List[Dict], int, str]:
        """Process a single CSV file with enhanced error handling"""
        csv_file = Path(file_info['path'])
        model_name = file_info['model']
        strategy_label = file_info['strategy']
        anchor_type = file_info['anchor']
        
        records = []
        total_records = 0
        error_msg = ""
        
        try:
            # Validate file exists and is readable
            if not csv_file.exists():
                return [], 0, f"File not found: {csv_file}"
                
            if csv_file.stat().st_size == 0:
                return [], 0, f"Empty file: {csv_file}"
            
            # Read file in chunks with specific error handling
            # FIXED: Remove low_memory parameter when using python engine
            try:
                chunk_iter = pd.read_csv(
                    csv_file, 
                    chunksize=CHUNK_SIZE,
                    on_bad_lines='skip', 
                    engine='python'
                )
            except pd.errors.EmptyDataError:
                return [], 0, f"Empty data in file: {csv_file.name}"
            except pd.errors.ParserError as e:
                return [], 0, f"Parser error in {csv_file.name}: {str(e)[:100]}"
            except UnicodeDecodeError as e:
                return [], 0, f"Encoding error in {csv_file.name}: {str(e)[:100]}"
            
            # Process chunks
            for chunk_idx, chunk in enumerate(chunk_iter):
                try:
                    # Validate required columns
                    if 'processing_time' not in chunk.columns:
                        if chunk_idx == 0:  # Only warn once per file
                            error_msg = f"Missing 'processing_time' column in {csv_file.name}"
                        continue
                    
                    # Filter valid processing times
                    valid_chunk = chunk.dropna(subset=['processing_time'])
                    
                    # Validate data types
                    try:
                        valid_chunk['processing_time'] = pd.to_numeric(valid_chunk['processing_time'], errors='coerce')
                        valid_chunk = valid_chunk.dropna(subset=['processing_time'])
                    except Exception as e:
                        if chunk_idx == 0:
                            error_msg = f"Invalid processing_time format in {csv_file.name}: {str(e)[:100]}"
                        continue
                    
                    if len(valid_chunk) == 0:
                        continue
                    
                    # Filter reasonable processing times (0.1s to 1000s)
                    valid_chunk = valid_chunk[
                        (valid_chunk['processing_time'] >= 0.1) & 
                        (valid_chunk['processing_time'] <= 1000)
                    ]
                    
                    if len(valid_chunk) == 0:
                        continue
                    
                    # Extract dataset name
                    dataset_name = csv_file.stem.split('_pred_')[0] if '_pred_' in csv_file.stem else csv_file.stem
                    
                    # Batch process records for better performance
                    batch_records = [
                        {
                            'strategy': strategy_label,
                            'model': model_name,
                            'anchor': anchor_type,
                            'dataset': dataset_name,
                            'file': csv_file.name,
                            'processing_time': float(row['processing_time'])
                        }
                        for _, row in valid_chunk.iterrows()
                    ]
                    
                    records.extend(batch_records)
                    total_records += len(batch_records)
                    
                except Exception as e:
                    error_msg = f"Chunk processing error in {csv_file.name}: {str(e)[:100]}"
                    continue
        
        except PermissionError:
            error_msg = f"Permission denied: {csv_file}"
        except MemoryError:
            error_msg = f"Memory error processing: {csv_file.name}"
        except Exception as e:
            error_msg = f"Unexpected error in {csv_file.name}: {str(e)[:100]}"
        
        return records, total_records, error_msg
    
    def collect_file_list(self) -> List[Dict]:
        """Collect list of all CSV files to process"""
        file_list = []
        all_strategies = {**EXPECTED_STRATEGIES, **OPTIONAL_STRATEGIES}
        
        for anchor_type in ANCHOR_DIRS:
            model_path = self.results_path / anchor_type / self.model
            
            if not model_path.exists():
                print(f"⚠️ Skipping missing directory: {model_path}")
                continue
            
            strategy_dirs = [d for d in model_path.iterdir() if d.is_dir()]
            
            for strategy_dir in strategy_dirs:
                strategy = strategy_dir.name
                
                # Only process known strategies
                if strategy in all_strategies:
                    strategy_label = all_strategies[strategy]
                    
                    csv_files = list(strategy_dir.glob('*.csv'))
                    csv_files = [f for f in csv_files if not f.name.endswith('_checkpoint.txt')]
                    
                    print(f"📁 Found {len(csv_files)} CSV files in {anchor_type}/{self.model}/{strategy}")
                    
                    for csv_file in csv_files:
                        file_list.append({
                            'path': str(csv_file),
                            'model': self.model,
                            'strategy': strategy_label,
                            'anchor': anchor_type,
                            'filename': csv_file.name
                        })
                else:
                    print(f"⚠️ Ignoring unknown strategy: {strategy} in {anchor_type}")
        
        return file_list
    
    def load_all_processing_times_optimized(self) -> pd.DataFrame:
        """Load all processing times with optimizations"""
        
        # Try loading from cache first
        cached_data = self.load_from_cache()
        if cached_data is not None:
            print("🚀 Using cached data")
            return cached_data
        
        # Validate directory structure
        is_valid, errors = self.validate_path_structure()
        if not is_valid:
            print("❌ Directory structure validation failed:")
            for error in errors:
                print(f"   - {error}")
            print("⚠️ Continuing with available data...")
        
        # Collect all files to process
        file_list = self.collect_file_list()
        
        if not file_list:
            print("❌ No files found to process")
            return pd.DataFrame()
        
        print(f"📁 Found {len(file_list)} files to process")
        print(f"🔧 Processing with {MAX_WORKERS} parallel workers...")
        
        # Process files in parallel with progress bar
        all_dataframes = []
        successful_files = 0
        failed_files = 0
        
        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            # Submit all file processing tasks
            future_to_file = {
                executor.submit(self.process_single_file, file_info): file_info 
                for file_info in file_list
            }
            
            # Process completed tasks with progress bar
            with tqdm(total=len(file_list), desc="Processing files", unit="file") as pbar:
                batch_records = []
                
                for future in as_completed(future_to_file):
                    file_info = future_to_file[future]
                    
                    try:
                        records, record_count, error_msg = future.result()
                        
                        if error_msg:
                            print(f"⚠️ {error_msg}")
                            failed_files += 1
                        else:
                            successful_files += 1
                            
                        if records:
                            batch_records.extend(records)
                            
                            # Update statistics
                            strategy = file_info['strategy']
                            anchor = file_info['anchor']
                            self.file_stats[f"{strategy}_{anchor}"] += 1
                            self.record_stats[strategy] += record_count
                            
                            # Create DataFrame in batches to manage memory
                            if len(batch_records) >= BATCH_SIZE:
                                df_batch = pd.DataFrame(batch_records)
                                all_dataframes.append(df_batch)
                                batch_records.clear()
                                gc.collect()  # Free memory
                                
                        pbar.set_postfix({
                            'Success': successful_files, 
                            'Failed': failed_files,
                            'Memory': f"{get_memory_usage():.0f}MB"
                        })
                        
                    except Exception as e:
                        print(f"❌ Task execution error for {file_info['filename']}: {e}")
                        failed_files += 1
                    
                    pbar.update(1)
                
                # Process remaining records
                if batch_records:
                    df_batch = pd.DataFrame(batch_records)
                    all_dataframes.append(df_batch)
        
        # Combine all DataFrames
        if not all_dataframes:
            print("❌ No valid data found")
            return pd.DataFrame()
        
        print(f"🔄 Combining {len(all_dataframes)} DataFrame batches...")
        
        try:
            final_df = pd.concat(all_dataframes, ignore_index=True)
            
            # Final data validation and cleanup
            final_df = final_df.dropna(subset=['processing_time'])
            final_df = final_df[final_df['processing_time'] > 0]
            
            print(f"✅ Data loading completed:")
            print(f"   - Successful files: {successful_files}")
            print(f"   - Failed files: {failed_files}")
            print(f"   - Total records: {len(final_df):,}")
            print(f"   - Memory usage: {get_memory_usage():.1f} MB")
            
            # Save to cache for future use
            self.save_to_cache(final_df)
            
            return final_df
            
        except Exception as e:
            print(f"❌ Error combining DataFrames: {e}")
            return pd.DataFrame()

# Initialize analyzer and load data
print(f"🚀 Initializing enhanced analyzer for model: {MODEL}")
analyzer = ProcessingTimeAnalyzer(MODEL, base_path)

try:
    df_times = analyzer.load_all_processing_times_optimized()
    
    if len(df_times) == 0:
        print(f"❌ No data loaded for model {MODEL}")
    else:
        print(f"\n🎉 SUCCESS! Loaded {len(df_times):,} processing time records")
        print(f"📊 Data overview:")
        print(f"   - Strategies: {sorted(df_times['strategy'].unique())}")
        print(f"   - Anchors: {sorted(df_times['anchor'].unique())}")
        print(f"   - Datasets: {len(df_times['dataset'].unique())}")
        print(f"   - Processing time range: {df_times['processing_time'].min():.3f}s - {df_times['processing_time'].max():.3f}s")
        
        # Verify strategy-anchor combinations
        combinations = df_times.groupby(['strategy', 'anchor']).size()
        print(f"\n📈 Strategy-Anchor combinations:")
        for (strategy, anchor), count in combinations.items():
            print(f"   - {strategy} × {anchor}: {count:,} records")
        
        # Memory cleanup
        gc.collect()
        print(f"💾 Final memory usage: {get_memory_usage():.1f} MB")
        
except Exception as e:
    print(f"❌ CRITICAL ERROR: {e}")
    df_times = pd.DataFrame()

In [None]:
# Data overview with safety checks - optimized for single model
if len(df_times) == 0:
    print(f"ERROR: No data available for model {MODEL}. Please check the data loading step.")
else:
    print(f"=== DATA OVERVIEW FOR MODEL {MODEL} ===")
    print(f"Total records: {len(df_times):,}")
    print(f"Processing time range: {df_times['processing_time'].min():.2f} - {df_times['processing_time'].max():.2f} seconds")
    print(f"Mean processing time: {df_times['processing_time'].mean():.2f} seconds")
    print(f"Median processing time: {df_times['processing_time'].median():.2f} seconds")

    print("\n=== RECORDS PER STRATEGY ===")
    strategy_counts = df_times['strategy'].value_counts()
    for strategy, count in strategy_counts.items():
        print(f"{strategy}: {count:,} records")
        
    print("\n=== RECORDS PER ANCHOR TYPE ===")
    anchor_counts = df_times['anchor'].value_counts()
    for anchor, count in anchor_counts.items():
        print(f"{anchor}: {count:,} records")

    print("\n=== BASIC STATISTICS BY STRATEGY ===")
    strategy_stats = df_times.groupby('strategy')['processing_time'].agg([
        'count', 'mean', 'median', 'std', 'min', 'max'
    ]).round(3)
    print(strategy_stats)
    
    # Add anchor type comparison
    if len(df_times['anchor'].unique()) > 1:
        print("\n=== BASIC STATISTICS BY ANCHOR TYPE ===")
        anchor_stats = df_times.groupby('anchor')['processing_time'].agg([
            'count', 'mean', 'median', 'std', 'min', 'max'
        ]).round(3)
        print(anchor_stats)
    
    print(f"\nMemory usage: {get_memory_usage():.1f} MB")

In [None]:
# Processing Time Distribution by Strategy - Optimized for single model
if len(df_times) == 0:
    print("No data available for visualization.")
else:
    # Enhanced visualization for single model analysis
    fig = plt.figure(figsize=(15, 12))
    
    try:
        # Create a 2x3 grid for comprehensive analysis
        gs = fig.add_gridspec(3, 2, hspace=0.3, wspace=0.3)
        
        # 1. Box plot by strategy
        ax1 = fig.add_subplot(gs[0, 0])
        sns.boxplot(data=df_times, x='strategy', y='processing_time', ax=ax1)
        ax1.set_title(f'Processing Time Distribution by Strategy\n({MODEL})')
        ax1.set_xlabel('Strategy')
        ax1.set_ylabel('Processing Time (seconds)')
        ax1.tick_params(axis='x', rotation=45)
        
        # 2. Box plot by anchor type (if available)
        ax2 = fig.add_subplot(gs[0, 1])
        if len(df_times['anchor'].unique()) > 1:
            sns.boxplot(data=df_times, x='anchor', y='processing_time', ax=ax2)
            ax2.set_title('Processing Time by Anchor Type')
            ax2.set_xlabel('Anchor Type')
            ax2.set_ylabel('Processing Time (seconds)')
        else:
            # Show histogram if only one anchor type
            df_times['processing_time'].hist(bins=50, ax=ax2)
            ax2.set_title('Processing Time Distribution')
            ax2.set_xlabel('Processing Time (seconds)')
            ax2.set_ylabel('Frequency')

        # 3. Mean processing time bar chart
        ax3 = fig.add_subplot(gs[1, 0])
        strategy_means = df_times.groupby('strategy')['processing_time'].mean().sort_values()
        bars = ax3.bar(range(len(strategy_means)), strategy_means.values, 
                      color=['#ff9999', '#66b3ff', '#99ff99'][:len(strategy_means)])
        ax3.set_title('Mean Processing Time by Strategy')
        ax3.set_xlabel('Strategy')
        ax3.set_ylabel('Mean Processing Time (seconds)')
        ax3.set_xticks(range(len(strategy_means)))
        ax3.set_xticklabels(strategy_means.index, rotation=45, ha='right')
        
        # Add value labels on bars
        for i, bar in enumerate(bars):
            height = bar.get_height()
            ax3.text(bar.get_x() + bar.get_width()/2., height,
                     f'{height:.2f}s', ha='center', va='bottom', fontsize=9)
        
        # 4. Processing rate (predictions per second)
        ax4 = fig.add_subplot(gs[1, 1])
        processing_rates = 1 / strategy_means
        bars4 = ax4.bar(range(len(processing_rates)), processing_rates.values, 
                       color=['#ffcc99', '#c2c2f0', '#ccffcc'][:len(processing_rates)])
        ax4.set_title('Processing Rate by Strategy')
        ax4.set_xlabel('Strategy')
        ax4.set_ylabel('Predictions/Second')
        ax4.set_xticks(range(len(processing_rates)))
        ax4.set_xticklabels(processing_rates.index, rotation=45, ha='right')
        
        # Add value labels
        for i, bar in enumerate(bars4):
            height = bar.get_height()
            ax4.text(bar.get_x() + bar.get_width()/2., height,
                     f'{height:.3f}', ha='center', va='bottom', fontsize=9)
        
        # 5. Strategy comparison pie chart
        ax5 = fig.add_subplot(gs[2, 0])
        strategy_counts = df_times['strategy'].value_counts()
        colors = plt.cm.Set3(np.linspace(0, 1, len(strategy_counts)))
        ax5.pie(strategy_counts.values, labels=strategy_counts.index, autopct='%1.1f%%', 
               startangle=90, colors=colors)
        ax5.set_title('Data Distribution by Strategy')
        
        # 6. Processing time violin plot
        ax6 = fig.add_subplot(gs[2, 1])
        sns.violinplot(data=df_times, x='strategy', y='processing_time', ax=ax6)
        ax6.set_title('Processing Time Distribution (Violin Plot)')
        ax6.set_xlabel('Strategy')
        ax6.set_ylabel('Processing Time (seconds)')
        ax6.tick_params(axis='x', rotation=45)
        
        plt.suptitle(f'Processing Time Analysis - Model: {MODEL}', fontsize=16, y=0.98)
        plt.show()
        
        # Force cleanup
        plt.close()
        gc.collect()
        print(f"Memory usage after visualization: {get_memory_usage():.1f} MB")
        
    except Exception as e:
        print(f"Error creating visualization: {e}")
        plt.close('all')

In [None]:
# Detailed Statistics Table - Complete Dataset Analysis
if len(df_times) == 0:
    print("No data available for statistical analysis.")
else:
    print(f"=== COMPLETE PROCESSING TIME STATISTICS FOR {MODEL} ===")
    print(f"Total records analyzed: {len(df_times):,}")

    # Detailed statistics by strategy
    detailed_stats = df_times.groupby('strategy')['processing_time'].agg([
        'count',
        'mean',
        'median', 
        'std',
        'min',
        ('q25', lambda x: x.quantile(0.25)),
        ('q75', lambda x: x.quantile(0.75)),
        'max',
        ('range', lambda x: x.max() - x.min()),
        ('cv', lambda x: x.std() / x.mean())  # Coefficient of variation
    ]).round(4)

    print("\n--- Processing Time Statistics by Strategy ---")
    print(detailed_stats)
    
    # Add efficiency metrics
    print("\n--- Efficiency Metrics ---")
    efficiency_stats = df_times.groupby('strategy')['processing_time'].agg(['mean', 'count']).round(4)
    efficiency_stats['predictions_per_second'] = (1 / efficiency_stats['mean']).round(4)
    efficiency_stats['records_per_hour'] = (3600 / efficiency_stats['mean']).round(0)
    print(efficiency_stats)

    # Statistics by anchor type if available
    if len(df_times['anchor'].unique()) > 1:
        print(f"\n--- Processing Time Statistics by Anchor Type ---")
        anchor_stats = df_times.groupby('anchor')['processing_time'].agg([
            'count', 'mean', 'median', 'std', 'min', 'max'
        ]).round(4)
        print(anchor_stats)
        
        # Cross-tabulation: Strategy × Anchor
        print(f"\n--- Strategy × Anchor Cross-Analysis ---")
        cross_stats = df_times.groupby(['strategy', 'anchor'])['processing_time'].agg([
            'count', 'mean', 'std'
        ]).round(4)
        print(cross_stats)

    # Dataset-level analysis if available
    if 'dataset' in df_times.columns and len(df_times['dataset'].unique()) > 1:
        print(f"\n--- Dataset Performance Analysis ---")
        dataset_stats = df_times.groupby('dataset')['processing_time'].agg([
            'count', 'mean', 'median', 'std'
        ]).round(4).sort_values('mean', ascending=False)
        
        print("Top 10 slowest datasets:")
        print(dataset_stats.head(10))
        
        print("\nTop 10 fastest datasets:")
        print(dataset_stats.tail(10))

    # Statistical significance tests - only if we have multiple strategies
    strategies = df_times['strategy'].unique()
    
    if len(strategies) > 1:
        try:
            from scipy import stats

            groups = [df_times[df_times['strategy'] == s]['processing_time'].values for s in strategies]

            # Perform one-way ANOVA
            f_stat, p_value = stats.f_oneway(*groups)

            print(f"\n--- Statistical Significance Test ---")
            print(f"One-way ANOVA F-statistic: {f_stat:.4f}")
            print(f"P-value: {p_value:.2e}")
            print(f"Significant difference between strategies: {'Yes' if p_value < 0.05 else 'No'}")

            # Effect size (eta-squared)
            total_sum_squares = sum([(group - df_times['processing_time'].mean())**2 for group in groups for _ in group])
            between_sum_squares = sum([len(group) * (group.mean() - df_times['processing_time'].mean())**2 for group in groups])
            eta_squared = between_sum_squares / total_sum_squares
            print(f"Effect size (η²): {eta_squared:.4f}")
            
            if eta_squared < 0.01:
                effect_size = "Small"
            elif eta_squared < 0.06:
                effect_size = "Medium"  
            else:
                effect_size = "Large"
            print(f"Effect size interpretation: {effect_size}")

            # Pairwise comparisons with Bonferroni correction
            if len(strategies) > 2:
                print(f"\n--- Pairwise Comparisons (Bonferroni corrected) ---")
                from itertools import combinations
                
                alpha = 0.05
                n_comparisons = len(list(combinations(strategies, 2)))
                bonferroni_alpha = alpha / n_comparisons
                
                print(f"Bonferroni corrected alpha: {bonferroni_alpha:.4f}")
                
                for i, strategy1 in enumerate(strategies):
                    for j, strategy2 in enumerate(strategies):
                        if i < j:
                            group1 = df_times[df_times['strategy'] == strategy1]['processing_time']
                            group2 = df_times[df_times['strategy'] == strategy2]['processing_time']
                            
                            t_stat, p_val = stats.ttest_ind(group1, group2)
                            is_significant = p_val < bonferroni_alpha
                            
                            mean_diff = group1.mean() - group2.mean()
                            cohen_d = mean_diff / np.sqrt((group1.var() + group2.var()) / 2)
                            
                            print(f"{strategy1} vs {strategy2}:")
                            print(f"  Mean difference: {mean_diff:.4f}s")
                            print(f"  t-statistic: {t_stat:.4f}")
                            print(f"  p-value: {p_val:.6f}")
                            print(f"  Significant: {'Yes' if is_significant else 'No'}")
                            print(f"  Cohen's d: {cohen_d:.4f}")
                            print()
                            
        except ImportError:
            print("scipy not available - skipping statistical tests")
        except Exception as e:
            print(f"Could not perform statistical tests: {e}")
    else:
        print("Only one strategy found - no statistical comparison possible.")
    
    print(f"\nMemory usage: {get_memory_usage():.1f} MB")

In [None]:
# Strategy and Anchor Comparison - Enhanced for single model
if len(df_times) == 0:
    print("No data available for strategy and anchor analysis.")
else:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    try:
        # 1. Strategy vs Anchor heatmap (if both dimensions available)
        if len(df_times['anchor'].unique()) > 1 and len(df_times['strategy'].unique()) > 1:
            strategy_anchor_stats = df_times.groupby(['strategy', 'anchor'])['processing_time'].mean().unstack()
            sns.heatmap(strategy_anchor_stats, annot=True, fmt='.2f', cmap='YlOrRd', ax=axes[0,0])
            axes[0,0].set_title(f'Mean Processing Time by Strategy and Anchor\n({MODEL})')
            axes[0,0].set_xlabel('Anchor Type')
            axes[0,0].set_ylabel('Strategy')
        else:
            # Show just strategy means if only one anchor type
            strategy_means = df_times.groupby('strategy')['processing_time'].mean()
            bars = axes[0,0].bar(range(len(strategy_means)), strategy_means.values)
            axes[0,0].set_title(f'Mean Processing Time by Strategy\n({MODEL})')
            axes[0,0].set_xlabel('Strategy')
            axes[0,0].set_ylabel('Processing Time (seconds)')
            axes[0,0].set_xticks(range(len(strategy_means)))
            axes[0,0].set_xticklabels(strategy_means.index, rotation=45, ha='right')

        # 2. Processing time comparison by strategy and anchor
        if len(df_times['anchor'].unique()) > 1:
            sns.boxplot(data=df_times, x='strategy', y='processing_time', hue='anchor', ax=axes[0,1])
            axes[0,1].set_title('Processing Time by Strategy and Anchor')
            axes[0,1].set_xlabel('Strategy')
            axes[0,1].set_ylabel('Processing Time (seconds)')
            axes[0,1].tick_params(axis='x', rotation=45)
            axes[0,1].legend(title='Anchor')
        else:
            # Show violin plot if only one anchor
            sns.violinplot(data=df_times, x='strategy', y='processing_time', ax=axes[0,1])
            axes[0,1].set_title('Processing Time Distribution by Strategy')
            axes[0,1].set_xlabel('Strategy')
            axes[0,1].set_ylabel('Processing Time (seconds)')
            axes[0,1].tick_params(axis='x', rotation=45)

        # 3. Efficiency comparison
        ax3 = axes[1,0]
        strategy_stats = df_times.groupby('strategy')['processing_time'].agg(['mean', 'std'])
        x_pos = np.arange(len(strategy_stats))
        bars = ax3.bar(x_pos, strategy_stats['mean'], yerr=strategy_stats['std'], capsize=5)
        ax3.set_title('Processing Time with Standard Deviation')
        ax3.set_xlabel('Strategy')
        ax3.set_ylabel('Processing Time (seconds)')
        ax3.set_xticks(x_pos)
        ax3.set_xticklabels(strategy_stats.index, rotation=45, ha='right')
        
        # Add value labels
        for i, bar in enumerate(bars):
            height = bar.get_height()
            ax3.text(bar.get_x() + bar.get_width()/2., height,
                     f'{height:.2f}±{strategy_stats.iloc[i]["std"]:.2f}', 
                     ha='center', va='bottom', fontsize=8)

        # 4. Dataset analysis (if available)
        ax4 = axes[1,1]
        if 'dataset' in df_times.columns and len(df_times['dataset'].unique()) > 1:
            dataset_means = df_times.groupby('dataset')['processing_time'].mean().sort_values()
            top_datasets = dataset_means.tail(10)  # Show top 10 slowest datasets
            bars = ax4.bar(range(len(top_datasets)), top_datasets.values)
            ax4.set_title('Top 10 Slowest Datasets')
            ax4.set_xlabel('Dataset')
            ax4.set_ylabel('Mean Processing Time (seconds)')
            ax4.set_xticks(range(len(top_datasets)))
            ax4.set_xticklabels(top_datasets.index, rotation=45, ha='right')
        else:
            # Show cumulative distribution
            sorted_times = np.sort(df_times['processing_time'])
            y_vals = np.arange(len(sorted_times)) / float(len(sorted_times))
            ax4.plot(sorted_times, y_vals)
            ax4.set_title('Cumulative Distribution of Processing Times')
            ax4.set_xlabel('Processing Time (seconds)')
            ax4.set_ylabel('Cumulative Probability')
            ax4.grid(True, alpha=0.3)

        plt.tight_layout()
        plt.show()
        
        # Print detailed comparison if multiple anchor types
        if len(df_times['anchor'].unique()) > 1:
            print(f"\n=== STRATEGY-ANCHOR COMBINATIONS FOR {MODEL} ===")
            combo_stats = df_times.groupby(['strategy', 'anchor'])['processing_time'].agg(['count', 'mean', 'std']).round(3)
            print(combo_stats)
        
        # Force cleanup
        plt.close()
        gc.collect()
        print(f"\nMemory usage after analysis: {get_memory_usage():.1f} MB")
        
    except Exception as e:
        print(f"Error creating strategy-anchor analysis: {e}")
        plt.close('all')

In [None]:
# Performance Summary Report
if len(df_times) == 0:
    print("No data available for performance summary.")
else:
    print("=== PROCESSING TIME PERFORMANCE SUMMARY ===")
    print("\nStrategy Performance Ranking (by mean processing time):")

    strategy_ranking = df_times.groupby('strategy')['processing_time'].agg([
        'mean', 'median', 'count'
    ]).sort_values('mean')

    for i, (strategy, stats) in enumerate(strategy_ranking.iterrows(), 1):
        print(f"{i}. {strategy}:")
        print(f"   Mean: {stats['mean']:.2f}s")
        print(f"   Median: {stats['median']:.2f}s")
        print(f"   Records: {stats['count']:,}")
        print()

    # Efficiency metrics
    print("\n=== EFFICIENCY ANALYSIS ===")
    base_mean = strategy_ranking.loc['Base Version', 'mean'] if 'Base Version' in strategy_ranking.index else None

    if base_mean:
        print(f"Base Version mean processing time: {base_mean:.2f}s")
        print("\nOverhead compared to Base Version:")
        
        for strategy, stats in strategy_ranking.iterrows():
            if strategy != 'Base Version':
                overhead = ((stats['mean'] - base_mean) / base_mean) * 100
                print(f"{strategy}: +{overhead:.1f}% ({stats['mean'] - base_mean:.2f}s additional)")

    # Processing rate (predictions per second)
    print("\n=== PROCESSING RATE ===")
    for strategy, stats in strategy_ranking.iterrows():
        rate = 1 / stats['mean']
        print(f"{strategy}: {rate:.3f} predictions/second")

In [None]:
# Export summary statistics to CSV - Optimized for single model
if len(df_times) == 0:
    print("No data available for export.")
else:
    try:
        output_path = base_path / 'notebook' / f'processing_time_summary_{MODEL}.csv'

        # Prepare detailed summary for single model
        summary_data = []

        for strategy in df_times['strategy'].unique():
            strategy_data = df_times[df_times['strategy'] == strategy]['processing_time']
            
            base_stats = {
                'model': MODEL,
                'strategy': strategy,
                'count': len(strategy_data),
                'mean': strategy_data.mean(),
                'median': strategy_data.median(),
                'std': strategy_data.std(),
                'min': strategy_data.min(),
                'max': strategy_data.max(),
                'q25': strategy_data.quantile(0.25),
                'q75': strategy_data.quantile(0.75),
                'predictions_per_second': 1 / strategy_data.mean() if strategy_data.mean() > 0 else 0
            }
            
            # Add anchor-specific statistics if multiple anchors
            if len(df_times['anchor'].unique()) > 1:
                for anchor in df_times['anchor'].unique():
                    anchor_data = df_times[(df_times['strategy'] == strategy) & 
                                         (df_times['anchor'] == anchor)]['processing_time']
                    
                    if len(anchor_data) > 0:
                        anchor_stats = base_stats.copy()
                        anchor_stats.update({
                            'anchor': anchor,
                            'count': len(anchor_data),
                            'mean': anchor_data.mean(),
                            'median': anchor_data.median(),
                            'std': anchor_data.std(),
                            'predictions_per_second': 1 / anchor_data.mean() if anchor_data.mean() > 0 else 0
                        })
                        summary_data.append(anchor_stats)
            else:
                base_stats['anchor'] = df_times['anchor'].iloc[0]
                summary_data.append(base_stats)

        summary_df = pd.DataFrame(summary_data)
        summary_df.to_csv(output_path, index=False)

        print(f"Summary statistics exported to: {output_path}")
        print(f"\nSummary table for {MODEL}:")
        display_cols = ['strategy', 'anchor', 'count', 'mean', 'median', 'std', 'predictions_per_second']
        available_cols = [col for col in display_cols if col in summary_df.columns]
        print(summary_df[available_cols].round(3))
        
        # Export additional dataset-level statistics if available
        if 'dataset' in df_times.columns and len(df_times['dataset'].unique()) > 1:
            dataset_output_path = base_path / 'notebook' / f'dataset_processing_times_{MODEL}.csv'
            dataset_stats = df_times.groupby(['dataset', 'strategy'])['processing_time'].agg([
                'count', 'mean', 'median', 'std', 'min', 'max'
            ]).round(3).reset_index()
            dataset_stats.to_csv(dataset_output_path, index=False)
            print(f"Dataset-level statistics exported to: {dataset_output_path}")
        
        # Force cleanup
        del summary_data, summary_df
        gc.collect()
        print(f"\nMemory usage: {get_memory_usage():.1f} MB")
        
    except Exception as e:
        print(f"Error exporting summary: {e}")

In [None]:
# Final Summary Report - Optimized for single model
if len(df_times) == 0:
    print(f"No data available for final analysis of model {MODEL}.")
else:
    print(f"=== PROCESSING TIME PERFORMANCE SUMMARY FOR {MODEL} ===")
    print(f"\nStrategy Performance Ranking (by mean processing time):")

    try:
        strategy_ranking = df_times.groupby('strategy')['processing_time'].agg([
            'mean', 'median', 'count', 'std'
        ]).sort_values('mean')

        for i, (strategy, stats) in enumerate(strategy_ranking.iterrows(), 1):
            efficiency_ratio = 1 / stats['mean'] if stats['mean'] > 0 else 0
            print(f"{i}. {strategy}:")
            print(f"   Mean: {stats['mean']:.2f}s (±{stats['std']:.2f}s)")
            print(f"   Median: {stats['median']:.2f}s")
            print(f"   Records: {stats['count']:,}")
            print(f"   Efficiency: {efficiency_ratio:.3f} predictions/second")
            print()

        # Efficiency analysis
        print(f"\n=== EFFICIENCY ANALYSIS FOR {MODEL} ===")
        base_mean = strategy_ranking.loc['Base Version', 'mean'] if 'Base Version' in strategy_ranking.index else None

        if base_mean:
            print(f"Base Version mean processing time: {base_mean:.2f}s")
            print(f"Overhead compared to Base Version:")
            
            for strategy, stats in strategy_ranking.iterrows():
                if strategy != 'Base Version':
                    overhead = ((stats['mean'] - base_mean) / base_mean) * 100
                    additional_time = stats['mean'] - base_mean
                    print(f"  {strategy}: +{overhead:.1f}% (+{additional_time:.2f}s)")
                    
            # Calculate efficiency loss
            base_rate = 1 / base_mean
            print(f"\nThroughput comparison:")
            print(f"  Base Version: {base_rate:.3f} predictions/second")
            for strategy, stats in strategy_ranking.iterrows():
                if strategy != 'Base Version':
                    strategy_rate = 1 / stats['mean']
                    rate_loss = ((base_rate - strategy_rate) / base_rate) * 100
                    print(f"  {strategy}: {strategy_rate:.3f} predictions/second (-{rate_loss:.1f}%)")
        else:
            print("Base Version not found - showing absolute performance:")
            for strategy, stats in strategy_ranking.iterrows():
                rate = 1 / stats['mean']
                print(f"  {strategy}: {rate:.3f} predictions/second")

        # Anchor type analysis if available
        if len(df_times['anchor'].unique()) > 1:
            print(f"\n=== ANCHOR TYPE COMPARISON FOR {MODEL} ===")
            anchor_ranking = df_times.groupby('anchor')['processing_time'].agg([
                'mean', 'median', 'count'
            ]).sort_values('mean')
            
            for anchor, stats in anchor_ranking.iterrows():
                rate = 1 / stats['mean']
                print(f"{anchor}: {stats['mean']:.2f}s avg ({rate:.3f} pred/sec, {stats['count']:,} records)")

        # Statistical summary
        print(f"\n=== STATISTICAL SUMMARY FOR {MODEL} ===")
        overall_stats = df_times['processing_time'].describe()
        print(f"Overall processing time statistics:")
        print(f"  Count: {overall_stats['count']:,.0f}")
        print(f"  Mean: {overall_stats['mean']:.2f}s")
        print(f"  Std: {overall_stats['std']:.2f}s")
        print(f"  Min: {overall_stats['min']:.2f}s")
        print(f"  25th percentile: {overall_stats['25%']:.2f}s")
        print(f"  Median: {overall_stats['50%']:.2f}s")
        print(f"  75th percentile: {overall_stats['75%']:.2f}s")
        print(f"  Max: {overall_stats['max']:.2f}s")

        print(f"\n=== ANALYSIS COMPLETE FOR {MODEL} ===")
        print(f"Total processing time records analyzed: {len(df_times):,}")
        print(f"Strategies analyzed: {', '.join(sorted(df_times['strategy'].unique()))}")
        print(f"Anchor types analyzed: {', '.join(sorted(df_times['anchor'].unique()))}")
        if 'dataset' in df_times.columns:
            print(f"Datasets covered: {len(df_times['dataset'].unique())}")
        print(f"Final memory usage: {get_memory_usage():.1f} MB")
        
        # Final cleanup
        gc.collect()
        
    except Exception as e:
        print(f"Error in final analysis: {e}")

# Memory cleanup at end
try:
    plt.close('all')  # Close any remaining plots
    gc.collect()
    print(f"\nFinal cleanup complete. Memory usage: {get_memory_usage():.1f} MB")
    print(f"\nTo analyze a different model, change the MODEL variable in the first cell and rerun the notebook.")
    print(f"Available models: qwen2.5_7b, qwen2.5_14b, llama3.1_8b, mixtral_8x7b, deepseek-coder_33b, mistral_7b")
except:
    pass