In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("Standalone Analysis Notebook Loaded!")
print("All functions included inline - no import issues!")


📊 Standalone Analysis Notebook Loaded!
 All functions included inline - no import issues!


In [2]:
# Load datasets and experimental results
def load_datasets():
    """Load the stimulus datasets."""
    try:
        conflict_df = pd.read_csv('../data/stimuli/political_conflict_pairs_50.csv')
        ideology_df = pd.read_csv('../data/stimuli/ideology_pairs.csv')
        
        print(f"Datasets loaded:")
        print(f"   Political Conflict: {len(conflict_df)} items")
        print(f"   Cultural-Ideological: {len(ideology_df)} items")
        print(f"   Total: {len(conflict_df) + len(ideology_df)} stimulus pairs")
        
        return conflict_df, ideology_df
    except Exception as e:
        print(f"Error loading datasets: {e}")
        return None, None

def load_experimental_data(results_dir='../data/results'):
    """Load the most recent experimental results."""
    results_path = Path(results_dir)
    if not results_path.exists():
        print(f"Results directory not found: {results_path}")
        return None
    
    csv_files = list(results_path.glob('*.csv'))
    if not csv_files:
        print(f"No CSV files found in {results_path}")
        return None
    
    # Load the most recent file
    latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
    
    try:
        df = pd.read_csv(latest_file)
        print(f"Loaded experimental data: {latest_file.name}")
        print(f"   Shape: {df.shape}")
        
        if not df.empty:
            print(f"   Columns: {list(df.columns)}")
            if 'strategy' in df.columns:
                print(f"   Strategies: {df['strategy'].value_counts().to_dict()}")
            if 'dataset' in df.columns:
                print(f"   Datasets: {df['dataset'].value_counts().to_dict()}")
        
        return df
    except Exception as e:
        print(f"Error loading {latest_file}: {e}")
        return None

# Load data
conflict_df, ideology_df = load_datasets()
results_df = load_experimental_data()


 Datasets loaded:
   Political Conflict: 90 items
   Cultural-Ideological: 95 items
   Total: 185 stimulus pairs
 Loaded experimental data: comprehensive_metrics_20250609_233155.csv
   Shape: (3, 6)
   Columns: ['strategy', 'mean_bias', 'std_bias', 'n_examples', 'ci_lower', 'ci_upper']
   Strategies: {'zero_shot': 1, 'chain_of_thought': 1, 'few_shot': 1}
