# Pareto Frontier Analysis: Eval Performance vs Jailbreak Harm Reduction

This notebook analyzes the tradeoff between:
- **Eval Performance**: Percentage change in benchmark scores (MMLU, IFEval, EQ-Bench)
- **Harm Reduction**: Percentage change in jailbreak harmful response rate

The Pareto frontier identifies configurations that achieve optimal tradeoffs between these objectives.

In [None]:
import json
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import shared plotting utilities
from plots import (
    parse_experiment_id, parse_layer_sort_key, cap_sort_key,
    format_layer_range, format_cap_label,
    CONFIG_COLORS, CONFIG_DISPLAY_NAMES, CONFIG_ORDER
)

In [None]:
# ============================================================================
# Configuration
# ============================================================================

model = "qwen-3-32b"
total_layers = 64
subtitle = f"{model.replace('-', ' ').title()}, Single-Shot & No Thinking"
base_dir = f"/workspace/{model}"
out_dir = f"/root/git/plots/{model}/capped/results/pareto"

os.makedirs(out_dir, exist_ok=True)

# Configs to analyze
config_names = ["baseline", "role_trait", "jailbreak", "lmsys_10000"]

# Eval-specific metric mappings
EVAL_METRICS = {
    'ifeval': {
        'metric': 'inst_level_strict_acc,none',
        'display_name': 'IFEval Instruction-level Accuracy',
        'higher_is_better': True
    },
    'mmlu_pro': {
        'metric': 'exact_match,custom-extract',
        'display_name': 'MMLU Pro Exact Match Accuracy',
        'higher_is_better': True
    },
    'eq_bench': {
        'metric': 'eqbench,none',
        'display_name': 'EQ-Bench Score',
        'higher_is_better': True
    }
}

print("Configuration loaded")

## Data Loading Functions

In [None]:
def load_experiment_data(tasks, config_names, base_dir):
    """
    Load experiment data for specified tasks and configs.

    Args:
        tasks: List of task names (e.g., ['ifeval', 'mmlu_pro', 'eq_bench'])
        config_names: List of config names (e.g., ['baseline', 'role_trait', 'jailbreak'])
        base_dir: Base directory containing benchmarks folder

    Returns:
        DataFrame with columns: task_name, config_name, experiment_id, run_dir,
                               thinking, apply_chat_template, and all metrics from results
    """
    all_rows = []
    bench_dir = f"{base_dir}/capped/benchmarks"

    for task in tasks:
        task_dir = f"{bench_dir}/{task}"

        if not os.path.exists(task_dir):
            print(f"Warning: Task directory not found: {task_dir}")
            continue

        for config_name in config_names:
            config_dir = f"{task_dir}/{config_name}"

            if not os.path.exists(config_dir):
                print(f"Warning: Config directory not found: {config_dir}")
                continue

            if config_name == "baseline":
                # Baseline: iterate through all timestamped runs directly
                run_dirs = [d for d in os.listdir(config_dir) if d.startswith("2025-")]

                for run_dir in sorted(run_dirs):
                    results_path = os.path.join(config_dir, run_dir, "results.json")
                    manifest_path = os.path.join(config_dir, run_dir, "manifest.json")

                    if os.path.exists(results_path):
                        with open(results_path, "r") as f:
                            data = json.load(f)

                        # Load manifest for thinking and apply_chat_template
                        thinking = False
                        apply_chat_template = False
                        vllm = False
                        if os.path.exists(manifest_path):
                            with open(manifest_path, "r") as f:
                                manifest = json.load(f)
                                thinking = manifest.get("thinking", None)
                                if thinking is None:
                                    thinking = False
                                apply_chat_template = manifest.get("apply_chat_template", False)
                                vllm = manifest.get("vllm", False)

                        # Get the task results
                        if "results" in data and task in data["results"]:
                            row = {
                                "task_name": task,
                                "config_name": config_name,
                                "experiment_id": "baseline",
                                "run_dir": run_dir,
                                "thinking": thinking,
                                "apply_chat_template": apply_chat_template,
                                "vllm": vllm
                            }
                            # Add all metrics from the task results
                            row.update(data["results"][task])
                            
                            # Drop alias field if present
                            row.pop("alias", None)

                            all_rows.append(row)
            else:
                # Other configs: iterate through experiment_id directories
                for experiment_id in os.listdir(config_dir):
                    exp_dir = os.path.join(config_dir, experiment_id)

                    if not os.path.isdir(exp_dir):
                        continue

                    # Load all timestamped runs for this experiment
                    run_dirs = [d for d in os.listdir(exp_dir) if d.startswith("2025-")]

                    for run_dir in sorted(run_dirs):
                        results_path = os.path.join(exp_dir, run_dir, "results.json")
                        manifest_path = os.path.join(exp_dir, run_dir, "manifest.json")

                        if os.path.exists(results_path):
                            with open(results_path, "r") as f:
                                data = json.load(f)

                            # Load manifest for thinking and apply_chat_template
                            thinking = False
                            apply_chat_template = False
                            vllm = False
                            if os.path.exists(manifest_path):
                                with open(manifest_path, "r") as f:
                                    manifest = json.load(f)
                                    thinking = manifest.get("thinking", None)
                                    if thinking is None:
                                        thinking = False
                                    apply_chat_template = manifest.get("apply_chat_template", False)
                                    vllm = manifest.get("vllm", False)

                            # Get the task results
                            if "results" in data and task in data["results"]:
                                row = {
                                    "task_name": task,
                                    "config_name": config_name,
                                    "experiment_id": experiment_id,
                                    "run_dir": run_dir,
                                    "thinking": thinking,
                                    "apply_chat_template": apply_chat_template,
                                    "vllm": vllm
                                }
                                # Add all metrics from the task results
                                row.update(data["results"][task])
                                
                                # Drop alias field if present
                                row.pop("alias", None)

                                all_rows.append(row)

    df = pd.DataFrame(all_rows)
    return df

print("Benchmark loading function defined")

In [None]:
def load_jailbreak_scores(config_name, base_dir):
    """
    Load jailbreak scores and calculate combined harm rate.
    
    Args:
        config_name: Name of config (e.g., 'jailbreak', 'lmsys_10000')
        base_dir: Base directory containing results folder
    
    Returns:
        DataFrame with columns: experiment_id, harm_rate (percentage)
    """
    scores_path = f"{base_dir}/capped/results/{config_name}_jailbreak_1100_scores.jsonl"
    
    if not os.path.exists(scores_path):
        print(f"Warning: Jailbreak scores not found: {scores_path}")
        return pd.DataFrame()
    
    # Load scores
    with open(scores_path, "r") as f:
        scores = [json.loads(line) for line in f]
    df = pd.DataFrame(scores)
    
    # Calculate harm rate per experiment
    harm_labels = ["enough_info", "enough_info_and_follow_perfectly"]
    
    results = []
    for exp_id, group in df.groupby("experiment_id"):
        n_total = len(group)
        n_harmful = len(group[group["score"].isin(harm_labels)])
        harm_rate = (n_harmful / n_total * 100) if n_total > 0 else 0.0
        
        results.append({
            "experiment_id": exp_id,
            "config_name": config_name,
            "harm_rate": harm_rate,
            "n_harmful": n_harmful,
            "n_total": n_total
        })
    
    return pd.DataFrame(results)

def load_baseline_jailbreak(base_dir, baseline_type="prompted"):
    """
    Load baseline (unsteered) jailbreak scores.
    
    Args:
        base_dir: Base directory
        baseline_type: 'prompted' or 'default'
    
    Returns:
        Harm rate as percentage
    """
    if baseline_type == "prompted":
        scores_path = f"{base_dir}/evals/unsteered/unsteered_scores.jsonl"
    else:
        scores_path = f"{base_dir}/evals/unsteered/unsteered_default_scores.jsonl"
    
    if not os.path.exists(scores_path):
        print(f"Warning: Baseline scores not found: {scores_path}")
        return None
    
    with open(scores_path, "r") as f:
        scores = [json.loads(line) for line in f]
    df = pd.DataFrame(scores)
    
    harm_labels = ["enough_info", "enough_info_and_follow_perfectly"]
    n_total = len(df)
    n_harmful = len(df[df["score"].isin(harm_labels)])
    harm_rate = (n_harmful / n_total * 100) if n_total > 0 else 0.0
    
    return harm_rate

print("Jailbreak loading functions defined")

## Pareto Frontier Calculation

In [None]:
def calculate_pareto_frontier(df, x_col='eval_pct_change', y_col='harm_pct_change'):
    """
    Identify Pareto-optimal points.
    
    For our case:
    - X-axis (eval_pct_change): Higher is better (less performance degradation)
    - Y-axis (harm_pct_change): Lower is better (more harm reduction, i.e., more negative)
    
    A point is Pareto-optimal if no other point has both:
    - x >= x_i (better or equal eval performance)
    - y <= y_i (better or equal harm reduction)
    with at least one strict inequality.
    
    Args:
        df: DataFrame with x_col and y_col columns
        x_col: Column name for x-axis (eval percentage change)
        y_col: Column name for y-axis (harm percentage change)
    
    Returns:
        DataFrame with additional 'is_pareto' boolean column
    """
    df = df.copy()
    df['is_pareto'] = False
    
    points = df[[x_col, y_col]].values
    
    for i in range(len(points)):
        is_dominated = False
        
        for j in range(len(points)):
            if i == j:
                continue
            
            # Check if point j dominates point i
            # j dominates i if: j.x >= i.x AND j.y <= i.y (with at least one strict)
            x_better_or_equal = points[j][0] >= points[i][0]
            y_better_or_equal = points[j][1] <= points[i][1]
            strictly_better = (points[j][0] > points[i][0]) or (points[j][1] < points[i][1])
            
            if x_better_or_equal and y_better_or_equal and strictly_better:
                is_dominated = True
                break
        
        if not is_dominated:
            df.loc[df.index[i], 'is_pareto'] = True
    
    return df

print("Pareto calculation function defined")

## Plotting Function

In [None]:
def plot_pareto_frontier(df, eval_name, eval_display_name, title_suffix="", subtitle=""):
    """
    Plot Pareto frontier for eval performance vs jailbreak harm reduction.
    
    Args:
        df: DataFrame with columns: config_name, experiment_id, eval_pct_change, 
            harm_pct_change, is_pareto, display_name
        eval_name: Short eval name (e.g., 'ifeval')
        eval_display_name: Display name for eval (e.g., 'IFEval Prompt-level Accuracy')
        title_suffix: Additional text for title
        subtitle: Subtitle text
    
    Returns:
        Plotly figure object
    """
    fig = go.Figure()
    
    # Plot all points by config (both Pareto and non-Pareto with same style)
    for config in ['baseline', 'jailbreak', 'role_trait', 'lmsys_10000']:
        if config not in df['config_name'].values:
            continue
        
        config_df = df[df['config_name'] == config]
        
        # Plot all points for this config
        fig.add_trace(go.Scatter(
            x=config_df['eval_pct_change'],
            y=config_df['harm_pct_change'],
            mode='markers',
            name=CONFIG_DISPLAY_NAMES[config],
            marker=dict(
                size=8,
                color=CONFIG_COLORS[config],
                opacity=0.6
            ),
            hovertemplate=(
                "<b>%{customdata[0]}</b><br>"
                "Config: %{customdata[1]}<br>"
                "Eval Change: %{x:.1f}%<br>"
                "Harm Change: %{y:.1f}%<br>"
                "%{customdata[2]}"
                "<extra></extra>"
            ),
            customdata=np.column_stack([
                config_df['display_name'],
                config_df['config_name'],
                config_df['is_pareto'].apply(lambda x: '<b>PARETO OPTIMAL</b>' if x else '')
            ]),
            legendgroup=config,
            showlegend=True
        ))
    
    # Add text labels for Pareto-optimal points using parsed experiment info
    pareto_points = df[df['is_pareto']]
    for _, row in pareto_points.iterrows():
        # Parse experiment_id to get layer and cap info
        layer_spec, _, cap_value = parse_experiment_id(row['experiment_id'])
        
        # Format the label
        layer_label = format_layer_range(layer_spec)
        cap_label = format_cap_label(cap_value)
        label_text = f"{layer_label}, {cap_label} %ile"
        
        # Get the color for this config
        point_color = CONFIG_COLORS[row['config_name']]
        
        fig.add_annotation(
            x=row['eval_pct_change'],
            y=row['harm_pct_change'],
            text=label_text,
            showarrow=False,
            xshift=10,  # Position label to the right of the point
            font=dict(size=9, color=point_color),
            xanchor='left',
            yanchor='middle',
            align='left'
        )
    
    # Draw Pareto frontier line
    pareto_sorted = pareto_points.sort_values('eval_pct_change')
    if len(pareto_sorted) > 1:
        fig.add_trace(go.Scatter(
            x=pareto_sorted['eval_pct_change'],
            y=pareto_sorted['harm_pct_change'],
            mode='lines',
            name='Pareto Frontier',
            line=dict(color='grey', width=1, dash='dash'),
            showlegend=True,
            hoverinfo='skip'
        ))
    
    # Add reference lines at origin
    fig.add_hline(y=0, line_dash="dot", line_color="gray", line_width=1, opacity=0.5)
    fig.add_vline(x=0, line_dash="dot", line_color="gray", line_width=1, opacity=0.5)
    
    # Add quadrant annotations
    max_x = df['eval_pct_change'].max()
    min_x = df['eval_pct_change'].min()
    max_y = df['harm_pct_change'].max()
    min_y = df['harm_pct_change'].min()
    
    # Top-right quadrant (ideal: better eval, reduced harm)
    fig.add_annotation(
        x=max_x * 0.9, y=min_y * 0.9,
        text="<b>Ideal</b><br>Unchanged eval<br>Reduced harm",
        showarrow=False,
        font=dict(size=10, color="green")
    )
    
    # Bottom-left quadrant (worst: worse eval, increased harm)
    fig.add_annotation(
        x=min_x * 0.9, y=max_y * 0.9,
        text="<b>Worst</b><br>Worse eval<br>Unchanged harm",
        showarrow=False,
        font=dict(size=10, color="red")
    )
    
    fig.update_layout(
        title=dict(
            text=f"Pareto Frontier: {eval_display_name} vs. Harmful Response Rate{title_suffix}",
            subtitle=dict(text=subtitle)
        ),
        xaxis=dict(
            title="Eval Performance Change (%)",
            zeroline=True,
            showgrid=True,
            gridcolor='lightgray'
        ),
        yaxis=dict(
            title="Harmful Response Rate Reduced (%)",
            zeroline=True,
            showgrid=True,
            gridcolor='lightgray'
        ),
        width=1000,
        height=700,
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02
        ),
        hovermode='closest'
    )
    
    return fig

print("Plotting function defined")

## Analysis: Generate Pareto Frontiers

For each eval, we'll:
1. Load benchmark and jailbreak data
2. Calculate percentage changes from baseline
3. Identify Pareto frontier
4. Visualize the tradeoff

## Load Jailbreak Data

Load jailbreak harm rates for all configs once.

In [None]:
# Get baseline harm rate (using prompted baseline)
baseline_harm = load_baseline_jailbreak(base_dir, baseline_type="prompted")
print(f"Baseline jailbreak harm rate: {baseline_harm:.2f}%")

# Load jailbreak data for each config
jailbreak_data = {}
for config_name in config_names:
    if config_name == 'baseline':
        continue
    
    df_jb = load_jailbreak_scores(config_name, base_dir)
    if len(df_jb) > 0:
        jailbreak_data[config_name] = df_jb
        print(f"Loaded {len(df_jb)} experiments for {config_name}")
    else:
        print(f"Warning: No jailbreak data found for {config_name}")

print(f"\nLoaded jailbreak data for {len(jailbreak_data)} configs")

In [None]:
def prepare_pareto_data(df_eval, df_jailbreak_dict, eval_name, baseline_eval_value, baseline_harm_rate):
    """
    Prepare data for Pareto analysis by combining eval and jailbreak metrics.

    Args:
        df_eval: Pre-loaded eval DataFrame (filtered as desired)
        df_jailbreak_dict: Dict mapping config_name to jailbreak DataFrames
        eval_name: Name of eval task (for metric lookup)
        baseline_eval_value: Baseline eval metric value
        baseline_harm_rate: Baseline jailbreak harm rate (percentage)

    Returns:
        DataFrame ready for Pareto analysis with Pareto frontier calculated
    """
    eval_config = EVAL_METRICS[eval_name]
    metric_col = eval_config['metric']

    print(f"\n{eval_name.upper()}:")
    print(f"Baseline eval value: {baseline_eval_value:.4f}")
    print(f"Baseline harm rate: {baseline_harm_rate:.2f}%")

    # Combine eval and jailbreak data for each config
    all_data = []

    for config_name, df_jailbreak in df_jailbreak_dict.items():
        if config_name == 'baseline':
            continue

        # Get eval data for this config
        config_eval = df_eval[df_eval['config_name'] == config_name]

        if len(config_eval) == 0:
            print(f"Skipping {config_name}: no eval data")
            continue

        # Merge eval and jailbreak data
        merged = config_eval.merge(
            df_jailbreak[['experiment_id', 'harm_rate']],
            on='experiment_id',
            how='inner'
        )

        if len(merged) == 0:
            print(f"Skipping {config_name}: no matching experiments")
            continue

        # Calculate percentage changes
        merged['eval_value'] = merged[metric_col]
        merged['eval_pct_change'] = ((merged['eval_value'] - baseline_eval_value) / baseline_eval_value * 100)
        merged['harm_pct_change'] = ((merged['harm_rate'] - baseline_harm_rate) / baseline_harm_rate * 100)

        # Add parsed columns for display
        parsed = merged['experiment_id'].apply(parse_experiment_id)
        merged['layer_spec'] = parsed.apply(lambda x: x[0])
        merged['cap_value'] = parsed.apply(lambda x: x[2])
        merged['layer_label'] = merged['layer_spec'].apply(format_layer_range)
        merged['cap_label'] = merged['cap_value'].apply(format_cap_label)
        merged['display_name'] = merged.apply(
            lambda row: f"{row['layer_label']}, {row['cap_label']}",
            axis=1
        )

        all_data.append(merged)
        print(f"Loaded {len(merged)} experiments for {config_name}")

    if len(all_data) == 0:
        print("No data available for Pareto analysis")
        return pd.DataFrame()

    # Combine all configs
    df_combined = pd.concat(all_data, ignore_index=True)

    # Calculate Pareto frontier
    df_combined = calculate_pareto_frontier(df_combined)

    print(f"\nTotal experiments: {len(df_combined)}")
    print(f"Pareto-optimal points: {df_combined['is_pareto'].sum()}")

    return df_combined

print("Data preparation helper defined")

### IFEval vs Jailbreak Harm

In [None]:
# Load IFEval data
df_ifeval = load_experiment_data(['ifeval'], config_names, base_dir)
print(f"Loaded {len(df_ifeval)} IFEval experiment runs")
print(f"\nConfig breakdown:")
for config in df_ifeval['config_name'].unique():
    print(f"  {config}: {len(df_ifeval[df_ifeval['config_name'] == config])} runs")

In [None]:
# Filter IFEval data
# Filter to no thinking, with chat template
df_ifeval_filtered = df_ifeval

print(f"After filtering: {len(df_ifeval_filtered)} runs")

# Get baseline value for percentage calculation
metric_col = EVAL_METRICS['ifeval']['metric']
baseline_ifeval = df_ifeval_filtered[df_ifeval_filtered['config_name'] == 'baseline']
if len(baseline_ifeval) > 0:
    baseline_ifeval_value = baseline_ifeval[metric_col].iloc[0]
    print(f"Baseline IFEval value: {baseline_ifeval_value:.4f}")
else:
    print("Warning: No baseline found!")
    baseline_ifeval_value = None

In [None]:
# Prepare Pareto data for IFEval
if baseline_ifeval_value is not None:
    df_ifeval_pareto = prepare_pareto_data(
        df_ifeval_filtered,
        jailbreak_data,
        'ifeval',
        baseline_ifeval_value,
        baseline_harm
    )
else:
    df_ifeval_pareto = pd.DataFrame()
    print("Skipping Pareto analysis due to missing baseline")

In [None]:
# Plot IFEval Pareto frontier
if len(df_ifeval_pareto) > 0:
    fig_ifeval = plot_pareto_frontier(
        df_ifeval_pareto,
        'ifeval',
        EVAL_METRICS['ifeval']['display_name'],
        subtitle=subtitle
    )
    fig_ifeval.show()
    fig_ifeval.write_html(f"{out_dir}/ifeval.html")
else:
    print("No data available for IFEval Pareto plot")

### MMLU Pro vs Jailbreak Harm

In [None]:
# Load MMLU Pro data
df_mmlu = load_experiment_data(['mmlu_pro'], config_names, base_dir)
print(f"Loaded {len(df_mmlu)} MMLU Pro experiment runs")
print(f"\nConfig breakdown:")
for config in df_mmlu['config_name'].unique():
    print(f"  {config}: {len(df_mmlu[df_mmlu['config_name'] == config])} runs")

In [None]:
# Filter MMLU Pro data
df_mmlu_filtered = df_mmlu[
    ((df_mmlu['config_name'] == 'baseline') & 
     (~df_mmlu['thinking']) & 
     (df_mmlu['apply_chat_template']) & (~df_mmlu['vllm'])) |
    ((df_mmlu['config_name'] != 'baseline') & 
     (~df_mmlu['thinking']) & 
     (df_mmlu['apply_chat_template']) & (~df_mmlu['vllm']))
]

print(f"After filtering: {len(df_mmlu_filtered)} runs")
for config in df_mmlu['config_name'].unique():
    print(f"  {config}: {len(df_mmlu_filtered[df_mmlu_filtered['config_name'] == config])} runs")

# Get baseline value
metric_col = EVAL_METRICS['mmlu_pro']['metric']
baseline_mmlu = df_mmlu_filtered[df_mmlu_filtered['config_name'] == 'baseline']
if len(baseline_mmlu) > 0:
    baseline_mmlu_value = baseline_mmlu[metric_col].iloc[0]
    print(f"Baseline MMLU Pro value: {baseline_mmlu_value:.4f}")
else:
    print("Warning: No baseline found!")
    baseline_mmlu_value = None

In [None]:
# Prepare Pareto data for MMLU Pro
if baseline_mmlu_value is not None:
    df_mmlu_pareto = prepare_pareto_data(
        df_mmlu_filtered,
        jailbreak_data,
        'mmlu_pro',
        baseline_mmlu_value,
        baseline_harm
    )
else:
    df_mmlu_pareto = pd.DataFrame()
    print("Skipping Pareto analysis due to missing baseline")

In [None]:
# Plot MMLU Pro Pareto frontier
if len(df_mmlu_pareto) > 0:
    fig_mmlu = plot_pareto_frontier(
        df_mmlu_pareto,
        'mmlu_pro',
        EVAL_METRICS['mmlu_pro']['display_name'],
        subtitle=subtitle
    )
    fig_mmlu.show()
    fig_mmlu.write_html(f"{out_dir}/mmlu_pro.html")
else:
    print("No data available for MMLU Pro Pareto plot")

### EQ-Bench vs Jailbreak Harm

In [None]:
# Load EQ-Bench data
df_eq = load_experiment_data(['eq_bench'], config_names, base_dir)
print(f"Loaded {len(df_eq)} EQ-Bench experiment runs")
print(f"\nConfig breakdown:")
for config in df_eq['config_name'].unique():
    print(f"  {config}: {len(df_eq[df_eq['config_name'] == config])} runs")

In [None]:
# Filter EQ-Bench data
df_eq_filtered = df_eq

print(f"After filtering: {len(df_eq_filtered)} runs")

# Get baseline value
metric_col = EVAL_METRICS['eq_bench']['metric']
baseline_eq = df_eq_filtered[df_eq_filtered['config_name'] == 'baseline']
if len(baseline_eq) > 0:
    baseline_eq_value = baseline_eq[metric_col].iloc[0]
    print(f"Baseline EQ-Bench value: {baseline_eq_value:.4f}")
else:
    print("Warning: No baseline found!")
    baseline_eq_value = None

In [None]:
# Prepare Pareto data for EQ-Bench
if baseline_eq_value is not None:
    df_eq_pareto = prepare_pareto_data(
        df_eq_filtered,
        jailbreak_data,
        'eq_bench',
        baseline_eq_value,
        baseline_harm
    )
else:
    df_eq_pareto = pd.DataFrame()
    print("Skipping Pareto analysis due to missing baseline")

In [None]:
# Plot EQ-Bench Pareto frontier
if len(df_eq_pareto) > 0:
    fig_eq = plot_pareto_frontier(
        df_eq_pareto,
        'eq_bench',
        EVAL_METRICS['eq_bench']['display_name'],
        subtitle=subtitle
    )
    fig_eq.show()
    fig_eq.write_html(f"{out_dir}/eq_bench.html")
else:
    print("No data available for EQ-Bench Pareto plot")

## Summary of Pareto-Optimal Configurations

Display the Pareto-optimal experiments across all evals.

In [None]:
# Combine Pareto points from all evals
pareto_summary = []

for eval_name, df_pareto in [
    ('ifeval', df_ifeval_pareto),
    ('mmlu_pro', df_mmlu_pareto),
    ('eq_bench', df_eq_pareto)
]:
    if len(df_pareto) > 0:
        pareto_points = df_pareto[df_pareto['is_pareto']].copy()
        pareto_points['eval_name'] = eval_name
        pareto_summary.append(pareto_points[[
            'eval_name', 'config_name', 'experiment_id', 'display_name',
            'eval_pct_change', 'harm_pct_change'
        ]])

if len(pareto_summary) > 0:
    df_summary = pd.concat(pareto_summary, ignore_index=True)
    df_summary = df_summary.sort_values(['eval_name', 'eval_pct_change'], ascending=[True, False])
    
    print("\n=== PARETO-OPTIMAL CONFIGURATIONS ===")
    print(f"\nTotal Pareto points across all evals: {len(df_summary)}")
    print("\nBy eval:")
    print(df_summary.groupby('eval_name').size())
    print("\nBy config:")
    print(df_summary.groupby('config_name').size())
    
    display(df_summary)
else:
    print("No Pareto-optimal points found")