"""
Results analysis and visualization tools for Monte Carlo simulations.

This module provides tools to analyze simulation results and create visualizations
to understand the performance of different Player10 configurations.
"""

In [1]:
from __future__ import annotations

import argparse
import os
from pathlib import Path
from typing import Any

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from ..sim.monte_carlo import MonteCarloSimulator, SimulationResult

Matplotlib is building the font cache; this may take a moment.


ImportError: attempted relative import with no known parent package

In [None]:
# ----------------------------
# Analyzer
# ----------------------------

class ResultsAnalyzer:
    """Analyzer for Monte Carlo simulation results (enhanced)."""

    def __init__(self, results_file: str | None = None):
        """
        Initialize the analyzer.

        Args:
            results_file: Path to results JSON file to load
        """
        self.simulator = MonteCarloSimulator()
        self.results: list[SimulationResult] = []
        self.metadata: dict[str, Any] = {}

        if results_file:
            self.load_results(results_file)

    # ---------- IO ----------

    def load_results(self, filename: str):
        """Load results from a JSON file (produced by MonteCarloSimulator.save_results)."""
        self.results = self.simulator.load_results(filename)
        self.metadata = self.simulator.last_metadata
        print(f'Loaded {len(self.results)} simulation results')

    # ---------- DataFrames ----------

    def create_dataframe(self) -> pd.DataFrame:
        """
        Convert results to a pandas DataFrame for run-level analysis.

        Includes:
        - All important config knobs (altruism prob, tau, epsilons, weights)
        - Run outcomes (scores, lengths, pauses, etc.)
        - Shared component breakdown (prefixed with shared_)
        - Derived feature: length_utilization
        """
        data: list[dict[str, Any]] = []

        for r in self.results:
            row = {
                # core config knobs
                'altruism_prob': r.config.altruism_prob,
                'tau_margin': r.config.tau_margin,
                'epsilon_fresh': r.config.epsilon_fresh,
                'epsilon_mono': r.config.epsilon_mono,
                'subjects': r.config.subjects,
                'memory_size': r.config.memory_size,
                'conversation_length_cfg': r.config.conversation_length,
                'seed': r.config.seed,

                # weights / algo params
                'min_samples_pid': r.config.min_samples_pid,
                'ewma_alpha': r.config.ewma_alpha,
                'importance_weight': r.config.importance_weight,
                'coherence_weight': r.config.coherence_weight,
                'freshness_weight': r.config.freshness_weight,
                'monotony_weight': r.config.monotony_weight,

                # run-level outcomes
                'total_score': r.total_score,
                'player10_score': r.player10_total_mean,
                'player10_individual': r.player10_individual_mean,
                'player10_rank': r.player10_rank_mean,
                'player10_gap_to_best': r.player10_gap_to_best,
                'player10_instances': r.player10_instances,
                'best_total_score': r.best_total_score,
                'conversation_length': r.conversation_length,
                'early_termination': float(r.early_termination),
                'pause_count': r.pause_count,
                'unique_items_used': r.unique_items_used,
                'execution_time': r.execution_time,
            }

            # Include shared score components (flatten)
            for comp, val in (r.score_breakdown or {}).items():
                if comp == 'total':
                    continue
                row[f'shared_{comp}'] = val

            data.append(row)

        df = pd.DataFrame(data)

        # Derived features
        if 'conversation_length_cfg' in df and 'conversation_length' in df:
            with np.errstate(divide='ignore', invalid='ignore'):
                df['length_utilization'] = df['conversation_length'] / df['conversation_length_cfg']

        return df

    def create_player_long(self) -> pd.DataFrame:
        """
        Explode SimulationResult.player_metrics into a long-form dataframe.

        Columns: seed, config knobs, label, class_name, alias, total, shared, individual, rank
        Useful for rank distributions and per-player analyses.
        """
        rows: list[dict[str, Any]] = []
        for r in self.results:
            cfg = {
                'altruism_prob': r.config.altruism_prob,
                'tau_margin': r.config.tau_margin,
                'epsilon_fresh': r.config.epsilon_fresh,
                'epsilon_mono': r.config.epsilon_mono,
                'seed': r.config.seed,
            }
            for label, m in (r.player_metrics or {}).items():
                rows.append({
                    **cfg,
                    'label': label,
                    'class_name': m.get('class_name'),
                    'alias': m.get('alias'),
                    'total': m.get('total'),
                    'shared': m.get('shared'),
                    'individual': m.get('individual'),
                    'rank': m.get('rank'),
                })
        return pd.DataFrame(rows)

    # ---------- Statistics helpers ----------

    def bootstrap_ci(
        self,
        df: pd.DataFrame,
        group_cols: list[str],
        metric: str,
        B: int = 1000,
        ci: float = 0.95
    ) -> pd.DataFrame:
        """
        Bootstrapped mean & CI for (group_cols, metric).
        Returns columns: group_cols..., mean, ci_low, ci_high, n
        """
        out = []
        q_lo, q_hi = (1 - ci) / 2, 1 - (1 - ci) / 2
        for key, g in df.groupby(group_cols):
            values = g[metric].dropna().to_numpy()
            if len(values) == 0:
                continue
            boot = []
            for _ in range(B):
                sample = np.random.choice(values, size=len(values), replace=True)
                boot.append(sample.mean())
            lo, hi = np.quantile(boot, [q_lo, q_hi])
            row = {'mean': values.mean(), 'ci_low': float(lo), 'ci_high': float(hi), 'n': len(values)}
            # Attach group key(s)
            if isinstance(key, tuple):
                for c, v in zip(group_cols, key, strict=False):
                    row[c] = v
            else:
                row[group_cols[0]] = key
            out.append(row)
        cols = group_cols + ['mean', 'ci_low', 'ci_high', 'n']
        return pd.DataFrame(out)[cols]

    def pairwise_altruism_deltas(self, metric: str = 'total_score') -> pd.DataFrame:
        """
        Pairwise mean delta & Cohen's d between altruism levels for a metric.
        Returns: a, b, delta_mean, cohens_d, nx, ny
        """
        df = self.create_dataframe()
        levels = sorted(df['altruism_prob'].unique())
        rows = []
        for i, a in enumerate(levels):
            for b in levels[i + 1:]:
                x = df[df.altruism_prob == a][metric].dropna()
                y = df[df.altruism_prob == b][metric].dropna()
                if len(x) and len(y):
                    delta = y.mean() - x.mean()
                    pooled = np.sqrt(
                        ((x.var(ddof=1) * (len(x) - 1)) + (y.var(ddof=1) * (len(y) - 1)))
                        / (len(x) + len(y) - 2)
                    )
                    d = delta / pooled if pooled > 0 else np.nan
                    rows.append({
                        'a': a, 'b': b,
                        'delta_mean': float(delta),
                        'cohens_d': float(d),
                        'nx': int(len(x)), 'ny': int(len(y)),
                    })
        return pd.DataFrame(rows)

    # ---------- Plots (existing + new) ----------

    def plot_altruism_comparison(self, save_path: str | None = None):
        """Create plots comparing different altruism probabilities (existing, kept)."""
        if not self.results:
            print('No results loaded. Please load results first.')
            return

        df = self.create_dataframe()

        altruism_groups = (
            df.groupby('altruism_prob')
            .agg(
                {
                    'total_score': ['mean', 'std', 'count'],
                    'player10_score': ['mean', 'std'],
                    'conversation_length': 'mean',
                    'early_termination': 'mean',
                    'pause_count': 'mean',
                }
            )
            .round(3)
        )

        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle('Player10 Altruism Probability Comparison', fontsize=16)

        # Plot 1: Total Score vs Altruism Probability
        ax1 = axes[0, 0]
        altruism_probs = altruism_groups.index
        mean_scores = altruism_groups[('total_score', 'mean')]
        std_scores = altruism_groups[('total_score', 'std')]
        ax1.errorbar(altruism_probs, mean_scores, yerr=std_scores, marker='o', capsize=5, capthick=2)
        ax1.set_xlabel('Altruism Probability')
        ax1.set_ylabel('Total Score')
        ax1.set_title('Total Score vs Altruism Probability')
        ax1.grid(True, alpha=0.3)

        # Plot 2: Player10 Score vs Altruism Probability
        ax2 = axes[0, 1]
        mean_p10_scores = altruism_groups[('player10_score', 'mean')]
        std_p10_scores = altruism_groups[('player10_score', 'std')]
        ax2.errorbar(
            altruism_probs, mean_p10_scores, yerr=std_p10_scores,
            marker='s', capsize=5, capthick=2, color='orange'
        )
        ax2.set_xlabel('Altruism Probability')
        ax2.set_ylabel('Player10 Score')
        ax2.set_title('Player10 Individual Score vs Altruism Probability')
        ax2.grid(True, alpha=0.3)

        # Plot 3: Conversation Length vs Altruism Probability
        ax3 = axes[1, 0]
        conv_lengths = altruism_groups[('conversation_length', 'mean')]
        ax3.plot(altruism_probs, conv_lengths, marker='^')
        ax3.set_xlabel('Altruism Probability')
        ax3.set_ylabel('Average Conversation Length')
        ax3.set_title('Conversation Length vs Altruism Probability')
        ax3.grid(True, alpha=0.3)

        # Plot 4: Early Termination Rate vs Altruism Probability
        ax4 = axes[1, 1]
        early_term_rates = altruism_groups[('early_termination', 'mean')]
        ax4.plot(altruism_probs, early_term_rates, marker='d')
        ax4.set_xlabel('Altruism Probability')
        ax4.set_ylabel('Early Termination Rate')
        ax4.set_title('Early Termination Rate vs Altruism Probability')
        ax4.grid(True, alpha=0.3)

        plt.tight_layout()
        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Plot saved to: {save_path}')
        plt.show()

    def plot_parameter_heatmap(
        self, param1: str, param2: str, metric: str = 'total_score', save_path: str | None = None
    ):
        """Create a heatmap showing the interaction between two parameters."""
        if not self.results:
            print('No results loaded. Please load results first.')
            return

        df = self.create_dataframe()
        pivot = df.groupby([param1, param2])[metric].mean().unstack()

        plt.figure(figsize=(10, 8))
        sns.heatmap(pivot, annot=True, fmt='.2f', cmap='viridis')
        plt.title(f'{metric.title()} Heatmap: {param1} vs {param2}')
        plt.xlabel(param2.replace('_', ' ').title())
        plt.ylabel(param1.replace('_', ' ').title())

        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Heatmap saved to: {save_path}')
        plt.show()

    def plot_score_distributions(self, save_path: str | None = None):
        """Plot score distributions for different altruism probabilities."""
        if not self.results:
            print('No results loaded. Please load results first.')
            return

        df = self.create_dataframe()
        altruism_probs = sorted(df['altruism_prob'].unique())

        fig, axes = plt.subplots(1, 2, figsize=(15, 6))
        fig.suptitle('Score Distributions by Altruism Probability', fontsize=16)

        # Plot 1: Total Score Distributions
        ax1 = axes[0]
        for prob in altruism_probs:
            scores = df[df['altruism_prob'] == prob]['total_score']
            ax1.hist(scores, alpha=0.6, label=f'Altruism: {prob:.1f}', bins=20)
        ax1.set_xlabel('Total Score'); ax1.set_ylabel('Frequency')
        ax1.set_title('Total Score Distributions'); ax1.legend(); ax1.grid(True, alpha=0.3)

        # Plot 2: Player10 Score Distributions
        ax2 = axes[1]
        for prob in altruism_probs:
            scores = df[df['altruism_prob'] == prob]['player10_score']
            ax2.hist(scores, alpha=0.6, label=f'Altruism: {prob:.1f}', bins=20)
        ax2.set_xlabel('Player10 Score'); ax2.set_ylabel('Frequency')
        ax2.set_title('Player10 Individual Score Distributions'); ax2.legend(); ax2.grid(True, alpha=0.3)

        plt.tight_layout()
        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Distributions plot saved to: {save_path}')
        plt.show()



In [None]:
# ----- NEW PLOTS -----

    def plot_component_stack(self, save_path: str | None = None):
        """Stacked bars of shared component means vs altruism_prob."""
        df = self.create_dataframe()
        keep = ['shared_importance', 'shared_coherence', 'shared_freshness', 'shared_nonmonotonousness']
        have = [c for c in keep if c in df.columns]
        if not have:
            print('No shared component breakdown in results.')
            return
        g = df.groupby('altruism_prob')[have].mean().reset_index().sort_values('altruism_prob')
        ax = g.set_index('altruism_prob')[have].plot(kind='bar', stacked=True, figsize=(12, 6))
        ax.set_ylabel('Mean shared component score'); ax.set_xlabel('Altruism probability')
        ax.set_title('Shared score component breakdown vs altruism'); ax.legend(title='Component')
        ax.grid(True, axis='y', alpha=0.3); plt.tight_layout()
        if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')
        plt.show()

    def plot_pareto_tradeoff(self, save_path: str | None = None):
        """Scatter of mean Player10 individual vs mean total score, colored by altruism."""
        df = self.create_dataframe()
        agg = (df.groupby(['altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono'])
               .agg({'total_score': 'mean', 'player10_individual': 'mean', 'early_termination': 'mean'})
               .reset_index())
        plt.figure(figsize=(9, 7))
        s = plt.scatter(
            agg['player10_individual'], agg['total_score'],
            c=agg['altruism_prob'], cmap='viridis', s=60, alpha=0.85
        )
        plt.colorbar(s, label='altruism_prob')
        # Annotate "risky" configs
        for _, r in agg.iterrows():
            if r['early_termination'] > 0.30:
                plt.annotate('ET>0.3', (r['player10_individual'], r['total_score']), fontsize=8)
        plt.xlabel('Player10 individual (mean)')
        plt.ylabel('Total score (mean)')
        plt.title('Pareto trade-off: Player10 individual vs Total')
        plt.grid(True, alpha=0.3); plt.tight_layout()
        if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')
        plt.show()

    def plot_rank_distribution(self, save_path: str | None = None):
        """Violin plots of Player10 rank across seeds vs altruism."""
        dfp = self.create_player_long()
        if dfp.empty or 'rank' not in dfp:
            print('No per-player metrics available.')
            return
        dfp_p10 = dfp[dfp['class_name'] == 'Player10']
        if dfp_p10.empty:
            print('No Player10 entries in per-player metrics.')
            return
        plt.figure(figsize=(10, 5))
        sns.violinplot(data=dfp_p10, x='altruism_prob', y='rank', inner='quartile', cut=0)
        plt.gca().invert_yaxis()  # rank 1 is best
        plt.title('Player10 rank distribution across seeds'); plt.xlabel('Altruism probability')
        plt.ylabel('Rank (lower is better)'); plt.tight_layout()
        if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')
        plt.show()

    def plot_seed_stability(self, metric: str = 'total_score', save_path: str | None = None):
        """Cumulative mean vs number of simulations (sorted by seed) to show stabilization."""
        df = self.create_dataframe().sort_values('seed')
        curves: list[tuple[float, np.ndarray]] = []
        for p, g in df.groupby('altruism_prob'):
            means = g[metric].expanding().mean().values
            curves.append((p, means))

        plt.figure(figsize=(10, 6))
        for p, means in curves:
            plt.plot(range(1, len(means) + 1), means, label=f'p={p}')
        plt.xlabel('Number of simulations (cumulative)'); plt.ylabel(f'Cumulative mean {metric}')
        plt.title('Seed stability of the estimate'); plt.legend(title='altruism_prob')
        plt.grid(True, alpha=0.3); plt.tight_layout()
        if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')
        plt.show()

    def plot_correlation_heatmap(self, save_path: str | None = None):
        """Correlation among knobs and outcomes."""
        df = self.create_dataframe()
        cols = [
            'altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono',
            'importance_weight', 'coherence_weight', 'freshness_weight', 'monotony_weight',
            'total_score', 'player10_score', 'early_termination', 'pause_count',
            'unique_items_used', 'length_utilization'
        ]
        cols = [c for c in cols if c in df.columns]
        corr = df[cols].corr(numeric_only=True)
        plt.figure(figsize=(10, 8))
        sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', center=0)
        plt.title('Correlation matrix: knobs vs outcomes'); plt.tight_layout()
        if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')
        plt.show()

    def plot_multi_heatmaps(
        self,
        fixed: str = 'altruism_prob',
        metric: str = 'total_score',
        cols: tuple[str, str] = ('tau_margin', 'epsilon_fresh'),
        save_path: str | None = None
    ):
        """Small-multiple heatmaps for metric by two parameters, faceted by a fixed param."""
        df = self.create_dataframe()
        vals = sorted(df[fixed].unique())
        n = len(vals)
        fig, axes = plt.subplots(1, n, figsize=(6 * n, 5), sharey=True)
        if n == 1:
            axes = [axes]
        for ax, v in zip(axes, vals, strict=False):
            sub = df[df[fixed] == v]
            if sub.empty:
                ax.set_visible(False)
                continue
            pivot = sub.groupby(list(cols))[metric].mean().unstack()
            sns.heatmap(pivot, ax=ax, annot=True, fmt='.2f', cmap='viridis')
            ax.set_title(f'{metric} | {fixed}={v}')
            ax.set_xlabel(cols[1]); ax.set_ylabel(cols[0])
        plt.tight_layout()
        if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight'); print(f'Saved: {save_path}')
        plt.show()


In [None]:
# ---------- Modeling (optional) ----------

    def run_ols(self, metric: str = 'total_score'):
        """
        OLS regression of metric on config knobs (robust SE). Requires statsmodels.
        Returns the fitted model.
        """
        try:
            import statsmodels.api as sm
        except ImportError:
            print("statsmodels not installed. `pip install statsmodels` to use run_ols().")
            return None

        df = self.create_dataframe().dropna(subset=[metric])
        X_cols = [
            'altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono',
            'importance_weight', 'coherence_weight', 'freshness_weight', 'monotony_weight'
        ]
        X_cols = [c for c in X_cols if c in df.columns]
        X = df[X_cols].copy()
        X = sm.add_constant(X)
        y = df[metric]
        model = sm.OLS(y, X).fit(cov_type='HC3')  # robust SE
        print(model.summary())
        return model

    def run_logistic_early_term(self):
        """
        Logistic regression predicting early termination. Requires statsmodels.
        Returns the fitted model.
        """
        try:
            import statsmodels.api as sm
        except ImportError:
            print("statsmodels not installed. `pip install statsmodels` to use run_logistic_early_term().")
            return None

        df = self.create_dataframe().dropna(subset=['early_termination'])
        X_cols = [
            'altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono',
            'pause_count', 'unique_items_used', 'conversation_length_cfg'
        ]
        X_cols = [c for c in X_cols if c in df.columns]
        X = df[X_cols].copy()
        X = sm.add_constant(X)
        y = df['early_termination'].astype(int)
        model = sm.Logit(y, X).fit(disp=False)
        print(model.summary())
        return model

    # ---------- Config search ----------

    def best_configs(
        self,
        objective: str = 'total_score',
        constraints: dict[str, tuple[float | None, float | None]] | None = None,
        top_k: int = 10
    ) -> pd.DataFrame:
        """
        Find top configs by objective subject to optional constraints.
        constraints example: {'early_termination': (None, 0.2)}  # <= 0.2
        """
        df = self.create_dataframe()
        agg = (
            df.groupby(['altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono'])
            .agg({
                objective: 'mean',
                'early_termination': 'mean',
                'player10_individual': 'mean',
                'total_score': 'mean'
            })
            .reset_index()
        )
        if constraints:
            mask = pd.Series(True, index=agg.index)
            for col, (lo, hi) in constraints.items():
                if lo is not None:
                    mask &= agg[col] >= lo
                if hi is not None:
                    mask &= agg[col] <= hi
            agg = agg[mask]
        return agg.sort_values(objective, ascending=False).head(top_k)

    # ---------- Quick report ----------

    def save_quick_report(self, out_dir: str = 'report_out'):
        """Save a set of figures and a short markdown summary to a directory."""
        os.makedirs(out_dir, exist_ok=True)

        # figures
        self.plot_altruism_comparison(f'{out_dir}/altruism_comparison.png')
        self.plot_component_stack(f'{out_dir}/component_stack.png')
        self.plot_pareto_tradeoff(f'{out_dir}/pareto.png')
        self.plot_rank_distribution(f'{out_dir}/rank_violin.png')
        self.plot_seed_stability(save_path=f'{out_dir}/seed_stability.png')
        self.plot_correlation_heatmap(f'{out_dir}/corr.png')
        self.plot_multi_heatmaps(save_path=f'{out_dir}/multi_heatmaps.png')

        # analysis text
        df = self.create_dataframe()
        lines = [
            '# Simulation Summary',
            '',
            f'- Total sims: {len(df)}',
            f'- Unique configs: {df.groupby(["altruism_prob","tau_margin","epsilon_fresh","epsilon_mono"]).ngroups}',
            f'- Overall total mean ± std: {df["total_score"].mean():.2f} ± {df["total_score"].std():.2f}',
            f'- Early termination rate: {df["early_termination"].mean():.2f}',
        ]
        (Path(out_dir) / 'SUMMARY.md').write_text('\n'.join(lines), encoding='utf-8')
        print(f'Report written to {out_dir}/')

In [None]:
 # ---------- Text summary (existing, kept with minor tweaks) ----------

    def print_detailed_analysis(self):
        """Print detailed analysis of the results."""
        if not self.results:
            print('No results loaded. Please load results first.')
            return

        df = self.create_dataframe()

        print('=== DETAILED ANALYSIS ===')
        print(f'Total simulations: {len(df)}')
        print(
            f'Unique configurations: {df.groupby(["altruism_prob", "tau_margin", "epsilon_fresh", "epsilon_mono"]).ngroups}'
        )

        # Overall statistics
        print('\n=== OVERALL STATISTICS ===')
        print(f'Total Score - Mean: {df["total_score"].mean():.2f}, Std: {df["total_score"].std():.2f}')
        print(f'Player10 Score - Mean: {df["player10_score"].mean():.2f}, Std: {df["player10_score"].std():.2f}')
        if 'player10_individual' in df:
            print(
                f'Player10 Individual - Mean: {df["player10_individual"].mean():.2f}, '
                f'Std: {df["player10_individual"].std():.2f}'
            )
        if 'player10_rank' in df:
            print(
                f'Player10 Rank - Mean: {df["player10_rank"].mean():.2f}, '
                f'Std: {df["player10_rank"].std():.2f}'
            )
        print(
            f'Conversation Length - Mean: {df["conversation_length"].mean():.1f}, '
            f'Std: {df["conversation_length"].std():.1f}'
        )
        print(f'Early Termination Rate: {df["early_termination"].mean():.2f}')

        # Best configurations
        print('\n=== TOP 10 CONFIGURATIONS ===')
        agg_map = {'total_score': ['mean', 'std', 'count'], 'player10_score': 'mean'}
        if 'player10_rank' in df:
            agg_map['player10_rank'] = 'mean'
        if 'player10_individual' in df:
            agg_map['player10_individual'] = 'mean'

        top_configs = (
            df.groupby(['altruism_prob', 'tau_margin', 'epsilon_fresh', 'epsilon_mono'])
            .agg(agg_map)
            .round(3)
        )

        new_columns = ['total_mean', 'total_std', 'count', 'p10_mean']
        if 'player10_rank' in agg_map:
            new_columns.append('p10_rank_mean')
        if 'player10_individual' in agg_map:
            new_columns.append('p10_individual_mean')
        top_configs.columns = new_columns
        top_configs = top_configs.sort_values('total_mean', ascending=False).head(10)

        for i, (config, row) in enumerate(top_configs.iterrows(), 1):
            altruism, tau, fresh, mono = config
            parts = [
                f'{i:2d}. Altruism: {altruism:.1f}',
                f'Tau: {tau:.2f}',
                f'Fresh: {fresh:.2f}',
                f'Mono: {mono:.2f}',
                f'Total: {row["total_mean"]:.2f}±{row["total_std"]:.2f}',
                f'P10: {row["p10_mean"]:.2f}',
            ]
            if 'p10_rank_mean' in row:
                parts.append(f'P10 Rank: {row["p10_rank_mean"]:.2f}')
            if 'p10_individual_mean' in row:
                parts.append(f'P10 Individual: {row["p10_individual_mean"]:.2f}')
            print(' -> '.join(parts))

        # Altruism analysis
        print('\n=== ALTRUISM ANALYSIS ===')
        agg_map = {
            'total_score': ['mean', 'std'],
            'player10_score': ['mean', 'std'],
            'conversation_length': 'mean',
            'early_termination': 'mean',
        }
        if 'player10_rank' in df:
            agg_map['player10_rank'] = ['mean', 'std']
        if 'player10_individual' in df:
            agg_map['player10_individual'] = ['mean', 'std']

        altruism_stats = df.groupby('altruism_prob').agg(agg_map).round(3)
        for prob in sorted(df['altruism_prob'].unique()):
            stats = altruism_stats.loc[prob]
            parts = [
                f'Altruism {prob:.1f}:',
                f'Total={stats[("total_score", "mean")]:.2f}±{stats[("total_score", "std")]:.2f}',
                f'P10={stats[("player10_score", "mean")]:.2f}±{stats[("player10_score", "std")]:.2f}',
                f'Length={stats[("conversation_length", "mean")]:.1f}',
                f'EarlyTerm={stats[("early_termination", "mean")]:.2f}',
            ]
            if ('player10_rank', 'mean') in stats:
                parts.append(
                    f'P10 Rank={stats[("player10_rank", "mean")]:.2f}±{stats[("player10_rank", "std")]:.2f}'
                )
            if ('player10_individual', 'mean') in stats:
                parts.append(
                    f'P10 Ind={stats[("player10_individual", "mean")]:.2f}±{stats[("player10_individual", "std")]:.2f}'
                )
            print(' '.join(parts))


In [None]:
# ---------- Convenience: expose bootstrap/effect sizes quickly ----------

    def print_ci_and_effects(self, metric: str = 'total_score'):
        """Print bootstrapped CIs per altruism level and pairwise effect sizes."""
        df = self.create_dataframe()
        ci = self.bootstrap_ci(df, ['altruism_prob'], metric)
        print('\n=== BOOTSTRAP CI (by altruism_prob) ===')
        print(ci.sort_values('altruism_prob').to_string(index=False))
        deltas = self.pairwise_altruism_deltas(metric=metric)
        print('\n=== PAIRWISE DELTAS (a->b) ===')
        print(deltas.to_string(index=False))

In [None]:
# ----------------------------
# CLI
# ----------------------------

def main():
    """Main function for command-line usage."""
    parser = argparse.ArgumentParser(description='Analyze Monte Carlo simulation results')
    parser.add_argument('results_file', help='Path to results JSON file')

    parser.add_argument(
        '--plot',
        choices=[
            'altruism', 'heatmap', 'distributions',
            'components', 'pareto', 'rank', 'seed', 'corr', 'multi-heatmap'
        ],
        default='altruism',
        help='Type of plot to create',
    )
    parser.add_argument('--param1', default='altruism_prob', help='Param for heatmap / multi-heatmap (rows)')
    parser.add_argument('--param2', default='tau_margin', help='Param for heatmap / multi-heatmap (cols)')
    parser.add_argument('--metric', default='total_score', help='Metric for heatmaps / stability')
    parser.add_argument('--fixed', default='altruism_prob', help='Facet for multi-heatmap')
    parser.add_argument('--save', help='Save plot to file')
    parser.add_argument('--analysis', action='store_true', help='Print detailed analysis')
    parser.add_argument('--ci', action='store_true', help='Print bootstrapped CIs and effect sizes')
    parser.add_argument('--report', help='Save a quick report to a directory (path)')
    parser.add_argument('--ols', action='store_true', help='Run OLS on total_score with knobs')
    parser.add_argument('--logit', action='store_true', help='Run logistic regression for early termination')

    args = parser.parse_args()

    # Load results
    analyzer = ResultsAnalyzer(args.results_file)

    # Print analysis tables
    if args.analysis:
        analyzer.print_detailed_analysis()
    if args.ci:
        analyzer.print_ci_and_effects(metric=args.metric)
    if args.report:
        analyzer.save_quick_report(args.report)

    # Optional modeling
    if args.ols:
        analyzer.run_ols(metric='total_score')
    if args.logit:
        analyzer.run_logistic_early_term()

    # Create plots
    if args.plot == 'altruism':
        analyzer.plot_altruism_comparison(args.save)
    elif args.plot == 'heatmap':
        analyzer.plot_parameter_heatmap(args.param1, args.param2, metric=args.metric, save_path=args.save)
    elif args.plot == 'distributions':
        analyzer.plot_score_distributions(args.save)
    elif args.plot == 'components':
        analyzer.plot_component_stack(args.save)
    elif args.plot == 'pareto':
        analyzer.plot_pareto_tradeoff(args.save)
    elif args.plot == 'rank':
        analyzer.plot_rank_distribution(args.save)
    elif args.plot == 'seed':
        analyzer.plot_seed_stability(metric=args.metric, save_path=args.save)
    elif args.plot == 'corr':
        analyzer.plot_correlation_heatmap(args.save)
    elif args.plot == 'multi-heatmap':
        analyzer.plot_multi_heatmaps(fixed=args.fixed, metric=args.metric, save_path=args.save)

    # Done


if __name__ == '__main__':
    main()