In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from matplotlib.gridspec import GridSpec

In [17]:
# Set plot style and colors
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from matplotlib.gridspec import GridSpec
from scipy.signal import savgol_filter

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("Set2")
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 100

def smooth_data(data, method='savgol', window_size=5):
    """Apply smoothing to a data series using specified method"""
    if len(data) < window_size:
        return data  # Not enough data points to smooth
        
    if method == 'moving_avg':
        # Simple moving average
        return data.rolling(window=window_size, center=True).mean().fillna(data)
    elif method == 'exponential':
        # Exponential moving average
        return data.ewm(span=window_size, adjust=True).mean()
    elif method == 'savgol':
        # Savitzky-Golay filter (good for maintaining peaks)
        # For savgol_filter, window_length must be odd and > polyorder
        if window_size % 2 == 0:
            window_size += 1  # Ensure window size is odd
        polyorder = min(2, window_size - 1)  # Ensure polyorder < window_length
        return pd.Series(savgol_filter(data, window_length=window_size, polyorder=polyorder), index=data.index)
    else:
        return data

def load_model_csvs(base_directory='runs', smooth=False, smooth_method='savgol', window_size=5):
    """Load CSV files with option to generate smoothed versions"""
    # Dictionary to store dataframes by model
    model_dfs = {}
    model_dfs_smooth = {}  # For smoothed versions
    
    # Get all model directories
    model_dirs = [d for d in os.listdir(base_directory) 
                 if os.path.isdir(os.path.join(base_directory, d))]
    
    # Process each model directory
    for model_name in model_dirs:
        # Path to the CSV file
        csv_path = os.path.join(base_directory, model_name, 'pruning_results.csv')
        
        # Check if the CSV file exists
        if os.path.exists(csv_path):
            # Read the CSV
            df = pd.read_csv(csv_path)

            # Rename the overall_accuracy column to accuracy
            if 'overall_accuracy' in df.columns:
                df.rename(columns={'overall_accuracy': 'accuracy'}, inplace=True)
            
            # Add a column to identify the model
            df['model'] = model_name
            
            # Store in dictionary
            model_dfs[model_name] = df
            
            # Create smoothed version if requested
            if smooth:
                df_smooth = df.copy()
                
                # Apply smoothing to relevant metrics
                metrics_to_smooth = ['accuracy', 'overall_f1', 'ham_accuracy', 'ham_f1', 
                                     'spam_accuracy', 'spam_f1']
                
                for metric in metrics_to_smooth:
                    if metric in df_smooth.columns:
                        # Sort by threshold before smoothing for better results
                        df_sorted = df_smooth.sort_values('threshold')
                        smoothed_values = smooth_data(df_sorted[metric], method=smooth_method, window_size=window_size)
                        # Create new column with smooth_ prefix
                        df_smooth[f'smooth_{metric}'] = smoothed_values.values
                
                # Store the smoothed dataframe
                model_dfs_smooth[model_name] = df_smooth
                
                # Save smoothed version to CSV
                smooth_dir = os.path.join(base_directory, model_name, 'smoothed')
                os.makedirs(smooth_dir, exist_ok=True)
                smooth_csv_path = os.path.join(smooth_dir, f'pruning_results_smooth_{smooth_method}_w{window_size}.csv')
                df_smooth.to_csv(smooth_csv_path, index=False)
                
            print(f"Loaded data for model: {model_name}")
        else:
            print(f"Warning: No CSV file found for model {model_name} at {csv_path}")
    
    # Combine all for overall analysis
    if model_dfs:
        all_data = pd.concat(list(model_dfs.values()), ignore_index=True)
        if smooth:
            all_data_smooth = pd.concat(list(model_dfs_smooth.values()), ignore_index=True)
            return model_dfs, all_data, model_dfs_smooth, all_data_smooth
        return model_dfs, all_data
    else:
        print("No CSV files were found. Check your directory structure.")
        if smooth:
            return {}, pd.DataFrame(), {}, pd.DataFrame()
        return {}, pd.DataFrame()

def create_comparative_plots(model_dfs, all_data, output_dir='comparison_plots', 
                            use_smoothed=False, model_dfs_smooth=None, all_data_smooth=None, 
                            smooth_method='savgol', window_size=5):
    """Create comparative plots to visualize model behaviors with optional smoothed data"""
    os.makedirs(output_dir, exist_ok=True)
    
    # Determine if we need to add smoothed suffix to output files
    smooth_suffix = f"_smooth_{smooth_method}_w{window_size}" if use_smoothed else ""
    
    # Choose which dataframes to use based on smoothed preference
    if use_smoothed and model_dfs_smooth:
        plot_dfs = model_dfs_smooth
        accuracy_col = 'smooth_accuracy'  # Use smoothed version
    else:
        plot_dfs = model_dfs
        accuracy_col = 'accuracy'  # Use original version
    
    # Get list of models
    models = list(plot_dfs.keys())
    
    # Define colors for consistent plotting - expanded for 17 models
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', 
              '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf',
              '#aec7e8', '#ffbb78', '#98df8a', '#ff9896', '#c5b0d5',
              '#c49c94', '#f7b6d3']
    
    # 1. PLOT: FLOPS and Parameters Reduction by Model
    plt.figure(figsize=(14, 10))
    
    # Create a summary DataFrame
    reduction_summary = []
    for model, df in plot_dfs.items():
        # For each model, get the min/max threshold data points
        min_threshold_row = df.loc[df['threshold'].idxmin()]
        max_threshold_row = df.loc[df['threshold'].idxmax()]
        
        reduction_summary.append({
            'Model': model,
            'Initial Params Reduction (%)': min_threshold_row['params_reduction_pct'],
            'Final Params Reduction (%)': max_threshold_row['params_reduction_pct'],
            'Initial FLOPS Reduction (%)': min_threshold_row['flops_reduction_pct'],
            'Final FLOPS Reduction (%)': max_threshold_row['flops_reduction_pct'],
            'Initial Accuracy': min_threshold_row[accuracy_col],
            'Final Accuracy': max_threshold_row[accuracy_col],
            'Accuracy Change': max_threshold_row[accuracy_col] - min_threshold_row[accuracy_col]
        })
    
    red_df = pd.DataFrame(reduction_summary)
    
    # Sort by final params reduction
    red_df = red_df.sort_values('Final Params Reduction (%)', ascending=False)
    
    # Create subplot grid
    fig = plt.figure(figsize=(18, 12))
    gs = GridSpec(2, 2, figure=fig, height_ratios=[1, 1])
    
    # 1. Params Reduction Plot
    ax1 = fig.add_subplot(gs[0, 0])
    
    for i, model in enumerate(red_df['Model']):
        ax1.plot([0, 1], 
                [red_df.loc[red_df['Model']==model, 'Initial Params Reduction (%)'].values[0],
                 red_df.loc[red_df['Model']==model, 'Final Params Reduction (%)'].values[0]],
                'o-', linewidth=2, label=model, color=colors[i % len(colors)])
    
    ax1.set_xlim(-0.1, 1.1)
    ax1.set_xticks([0, 1])
    ax1.set_xticklabels(['Initial', 'Final'])
    ax1.set_ylabel('Parameters Reduction (%)')
    ax1.set_title('Parameters Reduction: Initial vs Final')
    ax1.grid(True, linestyle='--', alpha=0.7)
    
    # 2. FLOPS Reduction Plot
    ax2 = fig.add_subplot(gs[0, 1])
    
    for i, model in enumerate(red_df['Model']):
        ax2.plot([0, 1], 
                [red_df.loc[red_df['Model']==model, 'Initial FLOPS Reduction (%)'].values[0],
                 red_df.loc[red_df['Model']==model, 'Final FLOPS Reduction (%)'].values[0]],
                'o-', linewidth=2, label=model, color=colors[i % len(colors)])
    
    ax2.set_xlim(-0.1, 1.1)
    ax2.set_xticks([0, 1])
    ax2.set_xticklabels(['Initial', 'Final'])
    ax2.set_ylabel('FLOPS Reduction (%)')
    ax2.set_title('FLOPS Reduction: Initial vs Final')
    ax2.grid(True, linestyle='--', alpha=0.7)
    
    # 3. Accuracy Change Plot
    ax3 = fig.add_subplot(gs[1, 0])
    
    for i, model in enumerate(red_df['Model']):
        ax3.plot([0, 1], 
                [red_df.loc[red_df['Model']==model, 'Initial Accuracy'].values[0],
                 red_df.loc[red_df['Model']==model, 'Final Accuracy'].values[0]],
                'o-', linewidth=2, label=model, color=colors[i % len(colors)])
    
    ax3.set_xlim(-0.1, 1.1)
    ax3.set_xticks([0, 1])
    ax3.set_xticklabels(['Initial', 'Final'])
    ax3.set_ylabel('Accuracy')
    ax3.set_title('Accuracy: Initial vs Final')
    ax3.grid(True, linestyle='--', alpha=0.7)
    
    # 4. Tradeoff: Accuracy Change vs Params Reduction
    ax4 = fig.add_subplot(gs[1, 1])
    
    scatter = ax4.scatter(
        red_df['Final Params Reduction (%)'] - red_df['Initial Params Reduction (%)'],
        red_df['Accuracy Change'],
        s=100, c=np.arange(len(red_df)), cmap='viridis', alpha=0.8
    )
    
    # Add model names as annotations
    for i, row in red_df.iterrows():
        ax4.annotate(row['Model'], 
                   (row['Final Params Reduction (%)'] - row['Initial Params Reduction (%)'], 
                    row['Accuracy Change']),
                   xytext=(5, 5), textcoords='offset points')
    
    ax4.axhline(y=0, color='r', linestyle='--', alpha=0.5)
    ax4.set_xlabel('Params Reduction Change (%)')
    ax4.set_ylabel('Accuracy Change')
    ax4.set_title('Tradeoff: Accuracy Change vs Params Reduction Change')
    ax4.grid(True, linestyle='--', alpha=0.7)
    
    # Add a single legend for all subplots - FIXED POSITIONING
    handles, labels = ax1.get_legend_handles_labels()
    fig.legend(handles, labels, loc='center left', bbox_to_anchor=(1.02, 0.5), 
               ncol=1, frameon=True, fontsize=9)
    
    # Add smoothed title if applicable
    smooth_title = f" (Smoothed: {smooth_method}, window={window_size})" if use_smoothed else ""
    fig.suptitle(f"Reduction Comparison{smooth_title}", fontsize=16, y=0.98)
    
    plt.tight_layout()
    plt.subplots_adjust(right=0.75)  # Make room for legend on the right
    plt.savefig(f"{output_dir}/reduction_comparison_grid{smooth_suffix}.png", bbox_inches='tight', dpi=150)
    plt.close()
    
    # 3. PLOT: Threshold vs. Accuracy for all models - FIXED LEGEND
    plt.figure(figsize=(16, 10))
    
    for i, (model, df) in enumerate(plot_dfs.items()):
        # Sort by threshold for a smooth line
        df_sorted = df.sort_values('threshold')
        plt.plot(df_sorted['threshold'], df_sorted[accuracy_col], 'o-', 
                linewidth=2, label=model, color=colors[i % len(colors)])
    
    # Improve x-axis readability
    max_threshold = max([df['threshold'].max() for df in plot_dfs.values()])
    x_ticks = np.arange(0, max_threshold + 0.5, 0.5)
    plt.xticks(x_ticks)
    
    plt.xlabel('Threshold')
    plt.ylabel('Accuracy')
    title = f'Threshold vs Accuracy (All models){smooth_title}'
    plt.title(title)
    plt.grid(True, linestyle='--', alpha=0.7)
    
    # FIXED LEGEND POSITIONING - Use plt.legend instead of fig.legend
    plt.legend(loc='center left', bbox_to_anchor=(1.02, 0.5), 
               frameon=True, fontsize=9)
    
    plt.tight_layout()
    plt.subplots_adjust(right=0.75)  # Make room for legend
    plt.savefig(f"{output_dir}/threshold_vs_accuracy_all_models{smooth_suffix}.png", 
                bbox_inches='tight', dpi=150)
    plt.close()
    
    # 4. PLOT: Efficiency Frontier (Params Reduction vs Accuracy) - FIXED LEGEND
    plt.figure(figsize=(16, 10))
    
    for i, (model, df) in enumerate(plot_dfs.items()):
        plt.plot(df['params_reduction_pct'], df[accuracy_col], 'o-', 
                linewidth=2, label=model, color=colors[i % len(colors)])
    
    plt.xlabel('Parameters Reduction (%)')
    plt.ylabel('Accuracy')
    title = f'Efficiency Frontier: Params Reduction vs Accuracy{smooth_title}'
    plt.title(title)
    plt.grid(True, linestyle='--', alpha=0.7)
    
    # FIXED LEGEND POSITIONING
    plt.legend(loc='center left', bbox_to_anchor=(1.02, 0.5), 
               frameon=True, fontsize=9)
    
    plt.tight_layout()
    plt.subplots_adjust(right=0.75)
    plt.savefig(f"{output_dir}/params_reduction_vs_accuracy_all_models{smooth_suffix}.png", 
                bbox_inches='tight', dpi=150)
    plt.close()
    
    # 5. PLOT: FLOPS Reduction vs Accuracy - FIXED LEGEND
    plt.figure(figsize=(16, 10))
    
    for i, (model, df) in enumerate(plot_dfs.items()):
        plt.plot(df['flops_reduction_pct'], df[accuracy_col], 'o-', 
                linewidth=2, label=model, color=colors[i % len(colors)])
    
    plt.xlabel('FLOPS Reduction (%)')
    plt.ylabel('Accuracy')
    title = f'Efficiency Frontier: FLOPS Reduction vs Accuracy{smooth_title}'
    plt.title(title)
    plt.grid(True, linestyle='--', alpha=0.7)
    
    # FIXED LEGEND POSITIONING
    plt.legend(loc='center left', bbox_to_anchor=(1.02, 0.5), 
               frameon=True, fontsize=9)
    
    plt.tight_layout()
    plt.subplots_adjust(right=0.75)
    plt.savefig(f"{output_dir}/flops_reduction_vs_accuracy_all_models{smooth_suffix}.png", 
                bbox_inches='tight', dpi=150)
    plt.close()
    
    # 6. PLOT: Direct Model Comparison Table (as image)
    # Create a summary table for quick comparison
    summary_data = []
    for model, df in plot_dfs.items():
        min_threshold_row = df.loc[df['threshold'].idxmin()]
        max_threshold_row = df.loc[df['threshold'].idxmax()]
        best_acc_row = df.loc[df[accuracy_col].idxmax()]
        
        summary_data.append({
            'Model': model,
            'Best Acc.': f"{df[accuracy_col].max():.4f}",
            'At Thresh.': f"{best_acc_row['threshold']:.3f}",
            'Final Acc.': f"{max_threshold_row[accuracy_col]:.4f}",
            'Acc. Drop': f"{df[accuracy_col].max() - max_threshold_row[accuracy_col]:.4f}",
            'Params Red.': f"{max_threshold_row['params_reduction_pct']:.1f}%",
            'FLOPS Red.': f"{max_threshold_row['flops_reduction_pct']:.1f}%",
        })
    
    # Convert to DataFrame and sort by best accuracy
    summary_df = pd.DataFrame(summary_data)
    summary_df = summary_df.sort_values('Best Acc.', ascending=False)
    
    # Create figure and axis
    fig, ax = plt.subplots(figsize=(12, 2 + 0.5*len(plot_dfs)))
    
    # Turn off axis
    ax.axis('off')
    ax.axis('tight')
    
    # Create table
    table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns, 
                    loc='center', cellLoc='center')
    
    # Style the table
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.2, 1.5)
    
    # Color the header row
    for j in range(len(summary_df.columns)):
        table[(0, j)].set_facecolor('#4472C4')
        table[(0, j)].set_text_props(color='white', fontweight='bold')
    
    title = f'Model Performance Summary{smooth_title}'
    plt.title(title, pad=20, fontsize=14)
    plt.tight_layout()
    plt.savefig(f"{output_dir}/model_summary_table{smooth_suffix}.png", bbox_inches='tight', dpi=150)
    plt.close()
    
    # If we have both original and smoothed data, create comparison plots
    if use_smoothed and 'model_dfs' in locals():
        # Create original vs smoothed accuracy comparison
        plt.figure(figsize=(16, 10))
        
        for i, model in enumerate(models):
            if model in model_dfs and model in model_dfs_smooth:
                df_orig = model_dfs[model].sort_values('threshold')
                df_smooth = model_dfs_smooth[model].sort_values('threshold')
                
                # Plot original data with transparency
                plt.plot(df_orig['threshold'], df_orig['accuracy'], 
                         '--', alpha=0.3, linewidth=1, label=f"{model} (Original)",
                         color=colors[i % len(colors)])
                
                # Plot smoothed data with solid line
                plt.plot(df_smooth['threshold'], df_smooth['smooth_accuracy'], 
                         '-', linewidth=2, label=f"{model} (Smoothed)",
                         color=colors[i % len(colors)])
        
        # Improve x-axis readability
        max_threshold = max([df['threshold'].max() for df in plot_dfs.values()])
        x_ticks = np.arange(0, max_threshold + 0.5, 0.5)
        plt.xticks(x_ticks)
        
        plt.xlabel('Threshold')
        plt.ylabel('Accuracy')
        plt.title(f'Original vs. Smoothed Accuracy ({smooth_method}, window={window_size})')
        plt.grid(True, linestyle='--', alpha=0.7)
        
        # FIXED LEGEND POSITIONING
        plt.legend(loc='center left', bbox_to_anchor=(1.02, 0.5), 
                   frameon=True, fontsize=8)
        
        plt.tight_layout()
        plt.subplots_adjust(right=0.75)
        plt.savefig(f"{output_dir}/original_vs_smoothed_accuracy.png", bbox_inches='tight', dpi=150)
        plt.close()

    print(f"All comparative plots saved to '{output_dir}' directory")

def run_comparison_analysis(base_directory='datasets', smooth=True, smooth_method='savgol', window_size=5):
    """Run the comparative analysis with option for smoothed data"""
    print("Loading CSV files from model directories...")
    
    if smooth:
        model_dfs, all_data, model_dfs_smooth, all_data_smooth = load_model_csvs(
            base_directory, smooth=True, smooth_method=smooth_method, window_size=window_size)
    else:
        model_dfs, all_data = load_model_csvs(base_directory)
    
    if not model_dfs:
        print("No data to analyze. Exiting.")
        return
    
    # Create summary table for original data
    model_summary = []
    for model, df in model_dfs.items():
        model_summary.append({
            'Model': model,
            'Data Points': len(df),
            'Min Threshold': df['threshold'].min(),
            'Max Threshold': df['threshold'].max(),
            'Min Accuracy': df['accuracy'].min(),
            'Max Accuracy': df['accuracy'].max(),
            'Max Params Reduction': f"{df['params_reduction_pct'].max():.2f}%",
            'Max FLOPS Reduction': f"{df['flops_reduction_pct'].max():.2f}%"
        })
    
    # Print summary
    print("\nOriginal Data Summary:")
    summary_df = pd.DataFrame(model_summary)
    print(summary_df.to_string(index=False))
    
    print("\nCreating plots with original data...")
    create_comparative_plots(model_dfs, all_data)
    
    # If smoothing is enabled, create smoothed plots
    if smooth and model_dfs_smooth:
        print(f"\nCreating plots with smoothed data ({smooth_method}, window={window_size})...")
        
        # Create summary table for smoothed data
        smooth_summary = []
        for model, df in model_dfs_smooth.items():
            smooth_summary.append({
                'Model': model,
                'Data Points': len(df),
                'Min Threshold': df['threshold'].min(),
                'Max Threshold': df['threshold'].max(),
                'Min Smooth Acc': df['smooth_accuracy'].min(),
                'Max Smooth Acc': df['smooth_accuracy'].max(),
                'Max Params Reduction': f"{df['params_reduction_pct'].max():.2f}%",
                'Max FLOPS Reduction': f"{df['flops_reduction_pct'].max():.2f}%"
            })
        
        # Print smoothed summary
        print("\nSmoothed Data Summary:")
        smooth_df = pd.DataFrame(smooth_summary)
        print(smooth_df.to_string(index=False))
        
        # Create smoothed plots
        smooth_dir = f"comparison_plots_smooth_{smooth_method}_w{window_size}"
        create_comparative_plots(model_dfs, all_data, 
                                output_dir=smooth_dir,
                                use_smoothed=True, 
                                model_dfs_smooth=model_dfs_smooth, 
                                all_data_smooth=all_data_smooth,
                                smooth_method=smooth_method,
                                window_size=window_size)
        
        # Also create original vs smoothed comparison plots
        print("\nCreating original vs smoothed comparison plots...")
        comparison_dir = "comparison_plots_orig_vs_smooth"
        os.makedirs(comparison_dir, exist_ok=True)
        
        # For each model, create original vs smoothed plots
        for i, model in enumerate(model_dfs.keys()):
            if model in model_dfs_smooth:
                plt.figure(figsize=(16, 8))
                
                df_orig = model_dfs[model].sort_values('threshold')
                df_smooth = model_dfs_smooth[model].sort_values('threshold')
                
                # Plot original data
                plt.plot(df_orig['threshold'], df_orig['accuracy'], 
                         'o--', alpha=0.5, label='Original')
                
                # Plot smoothed data
                plt.plot(df_smooth['threshold'], df_smooth['smooth_accuracy'], 
                         '-', linewidth=2.5, label=f'Smoothed ({smooth_method}, window={window_size})')
                
                # Improve x-axis readability
                max_threshold = df_orig['threshold'].max()
                x_ticks = np.arange(0, max_threshold + 0.5, 0.5)
                plt.xticks(x_ticks)
                
                plt.xlabel('Threshold')
                plt.ylabel('Accuracy')
                plt.title(f'Original vs. Smoothed Accuracy: {model}')
                plt.grid(True, linestyle='--', alpha=0.7)
                plt.legend()
                plt.tight_layout()
                plt.savefig(f"{comparison_dir}/{model}_original_vs_smooth.png", bbox_inches='tight', dpi=150)
                plt.close()
    
    print("\nAnalysis complete!")

# Run the analysis with smoothing
if __name__ == "__main__":
    # Configure smoothing parameters
    SMOOTH_METHOD = 'moving_avg'  # Options: 'moving_avg', 'exponential', 'savgol'
    WINDOW_SIZE = 0  # Adjust based on how much smoothing you want
    
    run_comparison_analysis('runs', smooth=False, smooth_method=SMOOTH_METHOD, window_size=WINDOW_SIZE)

Loading CSV files from model directories...
Loaded data for model: WonderfulAnalytics_distilbert-base-uncased-finetuned-spam
Loaded data for model: fzn0x_bert-spam-classification-model
Loaded data for model: skandavivek2_spam-classifier
Loaded data for model: SalehAhmad_roberta-base-finetuned-sms-spam-ham-detection
Loaded data for model: sureshs_distilbert-large-sms-spam
Loaded data for model: Ngadou_bert-sms-spam-dectector
Loaded data for model: cesullivan99_sms-spam-weighted
Loaded data for model: dungnt_sms_spam_detection
Loaded data for model: mrm8488_bert-tiny-finetuned-enron-spam-detection
Loaded data for model: nelsi_test_spam
Loaded data for model: AntiSpamInstitute_spam-detector-bert-MoE-v2.2
Loaded data for model: HJOK_task2_deberta_spamMLM_v1
Loaded data for model: satish860_sms_spam_detection-manning
Loaded data for model: wesleyacheng_sms-spam-classification-with-bert
Loaded data for model: mshenoda_roberta-spam
Loaded data for model: mrm8488_bert-tiny-finetuned-sms-spam-d

<Figure size 1400x1000 with 0 Axes>