In [9]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path

# Simple, reliable data loading function
def load_experimental_data(results_dir='../data/results'):
    """Load the most recent experimental results CSV file."""
    results_path = Path(results_dir)
    if not results_path.exists():
        print(f"⚠️  Results directory not found: {results_path}")
        return None
    
    csv_files = list(results_path.glob('*.csv'))
    if not csv_files:
        print(f"⚠️  No CSV files found in {results_path}")
        return None
    
    # Load the most recent file
    latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
    
    try:
        df = pd.read_csv(latest_file)
        print(f"✅ Loaded experimental data: {latest_file.name}")
        print(f"   Shape: {df.shape}")
        
        # Show column info
        if not df.empty:
            print(f"   Columns: {list(df.columns)}")
            if 'strategy' in df.columns:
                print(f"   Strategies: {df['strategy'].value_counts().to_dict()}")
            if 'dataset' in df.columns:
                print(f"   Datasets: {df['dataset'].value_counts().to_dict()}")
        
        return df
        
    except Exception as e:
        print(f"❌ Error loading {latest_file}: {e}")
        return None

# Simple bias analysis function
def simple_bias_analysis(results_df):
    """Perform basic bias analysis on results DataFrame."""
    if results_df is None or results_df.empty:
        return None
    
    analysis = {
        'summary_stats': {
            'total_evaluations': len(results_df),
            'mean_bias': results_df['bias_score'].mean(),
            'std_bias': results_df['bias_score'].std(),
            'median_bias': results_df['bias_score'].median(),
            'min_bias': results_df['bias_score'].min(),
            'max_bias': results_df['bias_score'].max()
        }
    }
    
    # Analysis by strategy
    if 'strategy' in results_df.columns:
        analysis['by_strategy'] = {}
        for strategy in results_df['strategy'].unique():
            strategy_data = results_df[results_df['strategy'] == strategy]
            bias_scores = strategy_data['bias_score'].tolist()
            
            analysis['by_strategy'][strategy] = {
                'mean_bias': np.mean(bias_scores),
                'std_bias': np.std(bias_scores),
                'n_examples': len(bias_scores)
            }
    
    # Analysis by dataset
    if 'dataset' in results_df.columns:
        analysis['by_dataset'] = {}
        for dataset in results_df['dataset'].unique():
            dataset_data = results_df[results_df['dataset'] == dataset]
            bias_scores = dataset_data['bias_score'].tolist()
            
            analysis['by_dataset'][dataset] = {
                'mean_bias': np.mean(bias_scores),
                'std_bias': np.std(bias_scores),
                'n_examples': len(bias_scores)
            }
    
    return analysis

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("🎨 Visualization Module Loaded!")
print("📊 Ready to create advanced plots for expanded datasets")
print("🔧 Using standalone functions to avoid import issues")


ImportError: cannot import name 'load_experimental_data' from 'evaluate' (/Users/mohsinkhawaja/Desktop/LLM-Sensitivity-Eval-to-Politics/notebooks/../src/evaluate.py)

In [7]:
# 📊 Load Expanded Datasets and Results
print("📂 Loading expanded datasets for visualization...")

# Load stimulus datasets
conflict_df = pd.read_csv('../data/stimuli/political_conflict_pairs_50.csv')
ideology_df = pd.read_csv('../data/stimuli/ideology_pairs.csv')

print(f"✅ Stimulus datasets loaded:")
print(f"   Political Conflict: {len(conflict_df)} Gaza framing pairs")
print(f"   Cultural-Ideological: {len(ideology_df)} religious vs secular pairs")
print(f"   Total: {len(conflict_df) + len(ideology_df)} stimulus pairs")

# Try to load experimental results
results_df = None
try:
    results_df = load_experimental_data()
    if results_df is not None:
        print(f"\n📈 Results loaded successfully:")
        print(f"   Total evaluations: {len(results_df)}")
        
        # Dataset breakdown
        if 'dataset' in results_df.columns:
            dataset_counts = results_df['dataset'].value_counts()
            print(f"   By dataset: {dataset_counts.to_dict()}")
            
        # Strategy breakdown  
        if 'strategy' in results_df.columns:
            strategy_counts = results_df['strategy'].value_counts()
            print(f"   By strategy: {strategy_counts.to_dict()}")
            
        print("🎨 Ready for advanced visualizations!")
    else:
        print(f"\n⚠️  No experimental results found.")
        print("   Run notebook 01_bias_probe.ipynb to generate data first.")
        
except Exception as e:
    print(f"\n⚠️  Could not load results: {e}")
    print("   Run notebook 01_bias_probe.ipynb to generate data first.")


📂 Loading expanded datasets for visualization...
✅ Stimulus datasets loaded:
   Political Conflict: 90 Gaza framing pairs
   Cultural-Ideological: 95 religious vs secular pairs
   Total: 185 stimulus pairs

⚠️  Could not load results: name 'load_experimental_data' is not defined
   Run notebook 01_bias_probe.ipynb to generate data first.
