# üìä Experiment C: Comprehensive Analysis

## üéØ Objective
Process all experimental logs to compute paper-ready metrics across all 3 Pillars.

### Key Metrics to Compute

**Pillar 1 (Robustness):**
1. **Trembling Robustness Score (R)**: dC/dŒµ - slope of cooperation decline
2. **Coalition Entropy (H)**: Stability of CCC state under noise
3. **Forgiveness Rate**: Recovery after accidental defections

**Pillar 2 (Collectivism):**
4. **Punishment Rate (P)**: % of punishment actions
5. **Contribution Decline**: With vs. without punishment
6. **Welfare Gap**: Social optimum - Individual optimum

**Pillar 3 (Safety):**
7. **Disaster Rate (D)**: % of no-volunteer rounds
8. **Volunteer Distribution**: Who sacrifices most?
9. **Bystander Effect**: Correlation with model size

### Expected Figures for Paper
- Figure 1: Cooperation vs. Noise (Pillar 1)
- Figure 2: Punishment Impact on Contributions (Pillar 2)
- Figure 3: Volunteer Distribution & Disaster Rate (Pillar 3)
- Figure 4: Cross-Pillar Model Comparison
- Figure 5: Reasoning Pattern Analysis

In [None]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

pd.set_option('display.max_colwidth', None)
plt.style.use('seaborn-v0_8-darkgrid')

print("üî¨ Loading and analyzing all experimental results...")
print("=" * 80)

# Load all result files
files = [f for f in os.listdir('.') if f.endswith('.json') and 'experiment_results' in f]
print(f"üìÅ Found {len(files)} result files\n")

if not files:
    print("‚ö†Ô∏è No results yet! Run Exp A and Exp B first.")
else:
    all_data = []
    
    for file in files:
        with open(file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        for key, val in data.items():
            if 'ERROR' in key:
                continue
            
            # Parse metadata
            parts = key.split('_')
            game_type = parts[0]
            
            # Extract noise if present
            noise = 0.0
            if 'Noise' in key:
                try:
                    noise = float(key.split('Noise')[1].split('_')[0])
                except:
                    pass
            
            history = val['history']
            total_rounds = len(history)
            
            # Compute metrics
            noise_events = 0
            punishment_events = 0
            strategies = []
            reasoning_samples = []
            
            for round_key, round_data in history.values():
                for agent in round_data:
                    strategies.append(agent.get('strategy', 'Unknown'))
                    
                    if agent.get('is_noise', False):
                        noise_events += 1
                    
                    if 'punished' in agent:
                        punishment_events += 1
                    
                    # Collect reasoning samples
                    if agent.get('reasoning') and agent['reasoning'] != 'Reasoning extraction disabled':
                        reasoning_samples.append(agent['reasoning'])
            
            # Calculate cooperation/contribution rate
            coop_keywords = ['Coop', 'Contri', 'Volun']
            coop_count = sum(1 for s in strategies if any(kw in s for kw in coop_keywords))
            coop_rate = coop_count / len(strategies) if strategies else 0
            
            all_data.append({
                "File": file,
                "Experiment": key[:55],
                "Game": game_type,
                "Noise": f"{noise:.0%}",
                "Rounds": total_rounds,
                "Coop/Contrib Rate": f"{coop_rate:.1%}",
                "Noise Events": noise_events,
                "Punishment Acts": punishment_events,
                "Reasoning Samples": len(reasoning_samples)
            })
    
    df = pd.DataFrame(all_data)
    print("\nüìä SUMMARY: All Experiments")
    print("=" * 80)
    print(df.to_string(index=False))
    
    print("\n\n‚úÖ Data loaded successfully!")
    print(f"Total experiments: {len(df)}")
    print(f"Total rounds analyzed: {df['Rounds'].sum()}")
    print("\nProceed to metric calculations below...")

---

## üìà Metric 1-3: Pillar 1 (Robustness) Analysis

Compute Trembling Robustness Score, Coalition Entropy, and Forgiveness Rate


In [None]:
# Filter PD experiments
pd_data = [f for f in files if 'PD_' in f]

if pd_data:
    print("üéØ PILLAR 1: ROBUSTNESS TEST (3-IPD)")
    print("=" * 80)
    
    robustness_summary = []
    
    for file in pd_data:
        with open(file, 'r') as f:
            data = json.load(f)
        
        for exp_name, exp_data in data.items():
            if 'ERROR' in exp_name:
                continue
            
            # Extract noise level
            noise = 0.0
            if 'Noise' in exp_name:
                noise = float(exp_name.split('Noise')[1])
            
            history = exp_data['history']
            
            # Calculate cooperation rate
            total = 0
            coops = 0
            
            for round_data in history.values():
                for agent in round_data:
                    total += 1
                    if agent['strategy'] == 'Cooperate':
                        coops += 1
            
            coop_rate = coops / total if total > 0 else 0
            
            robustness_summary.append({
                'Noise (Œµ)': noise,
                'Cooperation Rate': coop_rate,
                'Experiment': exp_name[:50]
            })
    
    # Calculate Trembling Robustness Score (R)
    df_rob = pd.DataFrame(robustness_summary)
    if len(df_rob) >= 2:
        # Linear fit: C(Œµ) = R * Œµ + b
        slope, intercept = np.polyfit(df_rob['Noise (Œµ)'], df_rob['Cooperation Rate'], 1)
        print(f"\nüìê Trembling Robustness Score (R): {slope:.3f}")
        print(f"   (Slope of cooperation curve - more negative = more fragile)")
        print(f"   Intercept (baseline cooperation): {intercept:.3f}")
    
    # Display summary
    print("\nüìä Cooperation vs. Noise:")
    print(df_rob.sort_values('Noise (Œµ)').to_string(index=False))
    
    print("\nüí° Interpretation:")
    print("- R close to 0: Robust coalition (forgives accidents)")
    print("- R < -3: Fragile coalition (collapses quickly)")
    print("- Check if cooperation rate stays >80% at Œµ=5%")
    
else:
    print("‚ö†Ô∏è No PD results found. Run Exp_A first!")
