# Cooperation & Defection Analysis

This notebook analyzes negotiation dynamics, defection rates, and cooperation patterns across different LLM models.

**Metrics covered:**
1. Negotiation length (turns)
2. Contract acceptance rates
3. Strict contract defection rates
4. P4P promise-keeping vs breaking
5. Cooperation without enforcement (contract=none)
6. Advantaged player exploitation
7. Trade rejection patterns

**To add a new model**, update the `MODEL_PATHS` dictionary in the next cell.

In [None]:
import json
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Configure plotting style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# =============================================================================
# CONFIGURE MODELS HERE - Add new models to this dictionary
# =============================================================================
MODEL_PATHS = {
    'Sonnet 4.5': 'logs/experiments/per_grid/2026_01_11_18/SONNET_4_5-SONNET_4_5',
    'GPT 4.1': 'logs/experiments/per_grid/2026_01_08_17/FOUR_1-FOUR_1',
    # Add more models here:
    # 'Haiku 4.5': 'logs/experiments/per_grid/2026_01_13_17/HAIKU_4_5-HAIKU_4_5',
}

print(f"Analyzing {len(MODEL_PATHS)} models: {list(MODEL_PATHS.keys())}")

## Data Loading

Load all experiment logs into a single DataFrame with cooperation metrics.

In [None]:
def load_experiment_data(model_paths):
    """Load all experiment data from specified model paths."""
    data_rows = []
    
    for model_name, model_path in model_paths.items():
        # Regular trading mode (p4pfalse)
        logs = glob.glob(f'{model_path}/**/*p4pfalse*/**/event_log*.json', recursive=True)
        print(f"{model_name} - Regular Trading: {len(logs)} logs")
        
        for log_path in logs:
            row = parse_event_log(log_path, model_name, 'Regular')
            if row:
                data_rows.append(row)
        
        # P4P mode (p4ptrue)
        logs = glob.glob(f'{model_path}/**/*p4ptrue*/**/event_log*.json', recursive=True)
        print(f"{model_name} - P4P Mode: {len(logs)} logs")
        
        for log_path in logs:
            row = parse_event_log(log_path, model_name, 'P4P')
            if row:
                data_rows.append(row)
    
    return pd.DataFrame(data_rows)


def parse_event_log(log_path, model_name, mode):
    """Parse a single event log and extract cooperation metrics."""
    try:
        with open(log_path) as f:
            data = json.load(f)
    except:
        return None
    
    config = data.get('config', {})
    contract_type = config.get('contract_type', 'unknown')
    
    # Extract bucket from path
    parts = log_path.split('/')
    bucket_raw = [p for p in parts if 'Needy' in p or 'Mutual' in p or 'Independent' in p]
    bucket = bucket_raw[0].replace('_', ' ') if bucket_raw else 'Unknown'
    
    final = data.get('game', {}).get('final_state', {})
    metrics = final.get('metrics', {})
    players = final.get('players', {})
    scores = final.get('scores', {})
    
    # Safe extraction with defaults
    tiles_in_contract = metrics.get('num_tiles_in_contract', 0) or 0
    unfulfilled = metrics.get('num_unfulfilled_contract_moves', 0) or 0
    promises_kept = metrics.get('total_p4p_promises_kept', 0) or 0
    promises_broken = metrics.get('total_p4p_promises_broken', 0) or 0
    p0_promised = metrics.get('num_tiles_promised_to_receive_from_contract_0', 0) or 0
    p1_promised = metrics.get('num_tiles_promised_to_receive_from_contract_1', 0) or 0
    
    return {
        'Model': model_name,
        'Bucket': bucket,
        'Contract': contract_type,
        'Mode': mode,
        'Log_Path': log_path,
        # Negotiation metrics
        'Negotiation_Length': metrics.get('contract_negotiaion_length', 0) or 0,
        'Contract_Accepted': metrics.get('contract_accepted', 0) or 0,
        # Defection metrics
        'Tiles_in_Contract': tiles_in_contract,
        'Unfulfilled_Moves': unfulfilled,
        'Defection_Rate': unfulfilled / tiles_in_contract if tiles_in_contract > 0 else 0,
        # P4P metrics
        'Promises_Kept': promises_kept,
        'Promises_Broken': promises_broken,
        'P4P_Defection_Rate': promises_broken / (promises_kept + promises_broken) if (promises_kept + promises_broken) > 0 else np.nan,
        # Trade metrics
        'Trades_Proposed': metrics.get('total_trades_proposed', 0) or 0,
        'Trades_Accepted': metrics.get('total_trades_accepted', 0) or 0,
        'Trades_Rejected': metrics.get('total_trades_rejected', 0) or 0,
        # Outcome metrics
        'P0_Reached_Goal': players.get('0', {}).get('reached_goal', False),
        'P1_Reached_Goal': players.get('1', {}).get('reached_goal', False),
        'Both_Reached': players.get('0', {}).get('reached_goal', False) and players.get('1', {}).get('reached_goal', False),
        'Score_P0': scores.get('Player 0', 0),
        'Score_P1': scores.get('Player 1', 0),
        # Contract balance (who got better deal)
        'P0_Promised_Tiles': p0_promised,
        'P1_Promised_Tiles': p1_promised,
        'Contract_Balance': p0_promised - p1_promised,  # Positive = P0 favored
    }


# Load all data
df = load_experiment_data(MODEL_PATHS)
print(f"\nTotal experiments loaded: {len(df)}")
df.head()

In [None]:
# =============================================================================
# 1. NEGOTIATION LENGTH ANALYSIS
# =============================================================================
# Filter to contract modes only (none has no negotiation)
contract_df = df[(df['Contract'] != 'none') & (df['Mode'] == 'Regular')]

print("=== NEGOTIATION LENGTH BY MODEL ===")
neg_len = contract_df.groupby('Model')['Negotiation_Length'].agg(['mean', 'std', 'count'])
print(neg_len.round(2))
print()

print("=== NEGOTIATION LENGTH BY MODEL AND BUCKET ===")
neg_len_detail = contract_df.groupby(['Model', 'Bucket'])['Negotiation_Length'].agg(['mean', 'std', 'count'])
print(neg_len_detail.round(2))
print()

# Extended negotiation (>2 turns = counter-proposals occurred)
contract_df_copy = contract_df.copy()
contract_df_copy['Extended_Negotiation'] = contract_df_copy['Negotiation_Length'] > 2
print("=== % OF GAMES WITH EXTENDED NEGOTIATION (>2 turns) ===")
extended = contract_df_copy.groupby(['Model', 'Bucket'])['Extended_Negotiation'].mean() * 100
print(extended.round(1))

In [None]:
# Plot negotiation length distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Box plot by model
sns.boxplot(data=contract_df, x='Model', y='Negotiation_Length', ax=axes[0])
axes[0].set_title('Negotiation Length by Model')
axes[0].set_ylabel('Number of Turns')

# Box plot by model and bucket
sns.boxplot(data=contract_df, x='Bucket', y='Negotiation_Length', hue='Model', ax=axes[1])
axes[1].set_title('Negotiation Length by Bucket and Model')
axes[1].set_ylabel('Number of Turns')
axes[1].tick_params(axis='x', rotation=15)
axes[1].legend(title='Model')

plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# 2. CONTRACT ACCEPTANCE RATES
# =============================================================================
print("=== CONTRACT ACCEPTANCE RATES ===")
acceptance = contract_df.groupby(['Model', 'Contract'])['Contract_Accepted'].mean() * 100
print(acceptance.round(1))

# Plot acceptance rates
acceptance_pivot = contract_df.groupby(['Model', 'Contract'])['Contract_Accepted'].mean().unstack() * 100

fig, ax = plt.subplots(figsize=(10, 6))
acceptance_pivot.plot(kind='bar', ax=ax)
ax.set_title('Contract Acceptance Rate by Model and Contract Type')
ax.set_ylabel('Acceptance Rate (%)')
ax.set_xlabel('Model')
ax.legend(title='Contract Type')
ax.tick_params(axis='x', rotation=0)
ax.set_ylim(0, 105)
plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# 3. STRICT CONTRACT DEFECTION RATES
# Defection = Not fulfilling agreed contract moves (unfulfilled / total tiles)
# =============================================================================
strict_df = df[(df['Contract'] == 'strict') & (df['Tiles_in_Contract'] > 0) & (df['Mode'] == 'Regular')]

print("=== STRICT CONTRACT DEFECTION RATES ===")
defection = strict_df.groupby('Model').agg({
    'Unfulfilled_Moves': 'sum',
    'Tiles_in_Contract': 'sum'
})
defection['Defection_Rate_%'] = (defection['Unfulfilled_Moves'] / defection['Tiles_in_Contract'] * 100).round(2)
print(defection)
print()

# Defection by bucket
print("=== DEFECTION RATES BY MODEL AND BUCKET ===")
defection_detail = strict_df.groupby(['Model', 'Bucket']).agg({
    'Unfulfilled_Moves': 'sum',
    'Tiles_in_Contract': 'sum'
})
defection_detail['Defection_%'] = (defection_detail['Unfulfilled_Moves'] / defection_detail['Tiles_in_Contract'] * 100).round(2)
print(defection_detail)

# Plot defection rates
fig, ax = plt.subplots(figsize=(10, 6))
defection_plot = defection_detail.reset_index()
sns.barplot(data=defection_plot, x='Bucket', y='Defection_%', hue='Model', ax=ax)
ax.set_title('Strict Contract Defection Rate by Bucket')
ax.set_ylabel('Defection Rate (%)')
ax.tick_params(axis='x', rotation=15)
plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# 4. P4P PROMISE-KEEPING VS BREAKING
# In Pay4Partner mode, players promise to pay for each other's moves
# Defection = breaking these promises
# =============================================================================
p4p_df = df[df['Mode'] == 'P4P']

print("=== P4P PROMISE KEEPING VS DEFECTION ===")
p4p_summary = p4p_df.groupby(['Model', 'Bucket']).agg({
    'Promises_Kept': 'sum',
    'Promises_Broken': 'sum'
})
p4p_summary['Total_Promises'] = p4p_summary['Promises_Kept'] + p4p_summary['Promises_Broken']
p4p_summary['Defection_Rate_%'] = (p4p_summary['Promises_Broken'] / p4p_summary['Total_Promises'] * 100).round(1)
print(p4p_summary)

# Plot P4P defection
fig, ax = plt.subplots(figsize=(10, 6))
p4p_plot = p4p_summary.reset_index()
p4p_plot = p4p_plot[p4p_plot['Total_Promises'] > 0]  # Only show where promises were made
sns.barplot(data=p4p_plot, x='Bucket', y='Defection_Rate_%', hue='Model', ax=ax)
ax.set_title('P4P Promise Defection Rate by Bucket')
ax.set_ylabel('Defection Rate (%)')
ax.tick_params(axis='x', rotation=15)
plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# 5. COOPERATION WITHOUT ENFORCEMENT (contract = none)
# When there's no contract, will players still cooperate?
# This measures inherent cooperation tendency
# =============================================================================
none_df = df[(df['Contract'] == 'none') & (df['Mode'] == 'Regular')]

print("=== COOPERATION WITHOUT CONTRACTS (All Buckets) ===")
print("% of games where BOTH players reached their goal:\n")

coop_all = none_df.groupby(['Model', 'Bucket']).agg({
    'Both_Reached': ['sum', 'count']
})
coop_all.columns = ['Both_Reached', 'Total']
coop_all['Coop_Rate_%'] = (coop_all['Both_Reached'] / coop_all['Total'] * 100).round(1)
print(coop_all)

# Plot cooperation without contracts
fig, ax = plt.subplots(figsize=(10, 6))
coop_plot = coop_all.reset_index()
sns.barplot(data=coop_plot, x='Bucket', y='Coop_Rate_%', hue='Model', ax=ax)
ax.set_title('Cooperation Rate Without Contracts (Both Players Reach Goal)')
ax.set_ylabel('Success Rate (%)')
ax.set_ylim(0, 105)
ax.tick_params(axis='x', rotation=15)
plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# 6. ADVANTAGED PLAYER EXPLOITATION
# In Needy Player Blue: P0 has power (can finish alone), P1 is needy
# Does P0 exploit this advantage in contract negotiations?
# =============================================================================
needy_strict = df[(df['Bucket'] == 'Needy Player Blue') & 
                   (df['Contract'] == 'strict') & 
                   (df['Mode'] == 'Regular')]

print("=== ADVANTAGED PLAYER EXPLOITATION (Needy Player Blue, Strict) ===")
print()

for model in needy_strict['Model'].unique():
    model_data = needy_strict[needy_strict['Model'] == model]
    
    p0_better = (model_data['Contract_Balance'] > 0).sum()
    p1_better = (model_data['Contract_Balance'] < 0).sum()
    equal = (model_data['Contract_Balance'] == 0).sum()
    total = len(model_data)
    
    print(f"{model}:")
    print(f"  P0 (advantaged) gets better deal: {p0_better}/{total} = {p0_better/total*100:.1f}%")
    print(f"  P1 (needy) gets better deal: {p1_better}/{total} = {p1_better/total*100:.1f}%")
    print(f"  Equal: {equal}/{total} = {equal/total*100:.1f}%")
    print(f"  Average contract balance (P0-P1 tiles): {model_data['Contract_Balance'].mean():.2f}")
    print()

# Average tiles promised
print("=== AVERAGE TILES PROMISED BY PLAYER ===")
tiles_promised = needy_strict.groupby('Model').agg({
    'P0_Promised_Tiles': 'mean',
    'P1_Promised_Tiles': 'mean',
    'Tiles_in_Contract': 'mean'
}).round(2)
print(tiles_promised)

In [None]:
# =============================================================================
# 7. TRADE REJECTION PATTERNS
# Trade rejection can signal non-cooperation or strategic behavior
# =============================================================================
regular_df = df[df['Mode'] == 'Regular']

print("=== TRADE ACCEPTANCE RATES BY CONTRACT TYPE ===")
trade_agg = regular_df.groupby(['Model', 'Contract']).agg({
    'Trades_Proposed': 'sum',
    'Trades_Accepted': 'sum',
    'Trades_Rejected': 'sum'
})
trade_agg['Accept_Rate_%'] = (trade_agg['Trades_Accepted'] / trade_agg['Trades_Proposed'] * 100).round(1)
trade_agg = trade_agg[trade_agg['Trades_Proposed'] > 0]
print(trade_agg)

# Plot trade acceptance
fig, ax = plt.subplots(figsize=(12, 6))
trade_plot = trade_agg.reset_index()
sns.barplot(data=trade_plot, x='Contract', y='Accept_Rate_%', hue='Model', ax=ax)
ax.set_title('Trade Acceptance Rate by Contract Type')
ax.set_ylabel('Acceptance Rate (%)')
ax.tick_params(axis='x', rotation=15)
plt.tight_layout()
plt.show()

In [None]:
# =============================================================================
# SUMMARY STATISTICS
# =============================================================================
print("="*70)
print("COOPERATION/DEFECTION SUMMARY")
print("="*70)
print()

for model in df['Model'].unique():
    model_data = df[df['Model'] == model]
    regular = model_data[model_data['Mode'] == 'Regular']
    p4p = model_data[model_data['Mode'] == 'P4P']
    
    # Strict contract defection
    strict = regular[(regular['Contract'] == 'strict') & (regular['Tiles_in_Contract'] > 0)]
    strict_defection = strict['Unfulfilled_Moves'].sum() / strict['Tiles_in_Contract'].sum() * 100 if strict['Tiles_in_Contract'].sum() > 0 else 0
    
    # P4P defection
    p4p_kept = p4p['Promises_Kept'].sum()
    p4p_broken = p4p['Promises_Broken'].sum()
    p4p_defection = p4p_broken / (p4p_kept + p4p_broken) * 100 if (p4p_kept + p4p_broken) > 0 else 0
    
    # No-contract cooperation in Mutual Dependency
    none_mutual = regular[(regular['Contract'] == 'none') & (regular['Bucket'] == 'Mutual Dependency')]
    none_coop = none_mutual['Both_Reached'].sum() / len(none_mutual) * 100 if len(none_mutual) > 0 else 0
    
    # Average negotiation length
    contract_modes = regular[regular['Contract'] != 'none']
    avg_neg = contract_modes['Negotiation_Length'].mean()
    
    print(f"{model}:")
    print(f"  Average negotiation turns: {avg_neg:.2f}")
    print(f"  Strict contract defection: {strict_defection:.2f}%")
    print(f"  P4P promise defection: {p4p_defection:.1f}% ({int(p4p_broken)}/{int(p4p_kept + p4p_broken)})")
    print(f"  No-contract cooperation (Mutual Dep.): {none_coop:.1f}%")
    print()

In [None]:
# =============================================================================
# EXPORT DATA FOR FURTHER ANALYSIS
# =============================================================================
import os
os.makedirs('results/final', exist_ok=True)

output_path = 'results/final/cooperation_metrics.csv'
df.to_csv(output_path, index=False)
print(f"Saved {len(df)} rows to {output_path}")
print(f"\nColumns available for custom analysis:")
for col in df.columns:
    print(f"  - {col}")