# Sweep Explorer - Interactive Parameter Visualization

Interactive explorer for experimental sweep results. Map sweep parameters
(entry_max, label_max, dataset, summarizer, k) onto chart axes, facets,
color, and filters.

**Prerequisites:** Run `scripts/run_experimental_sweep.py` or `pca_kllmeans_sweep.ipynb`
to generate `experimental_sweep_*.pkl` files in the repo root.

In [1]:
# Install hvplot if needed (safe to re-run)
%pip install -q hvplot

import pickle
import warnings
from pathlib import Path

import numpy as np
import pandas as pd

import hvplot.pandas  # registers .hvplot accessor on DataFrames
import panel as pn

# Enable Panel widgets in notebook
pn.extension('tabulator', design='material', sizing_mode='stretch_width')

warnings.filterwarnings('ignore', category=FutureWarning)


[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.


In [2]:
def load_sweep_dataframe(data_dir: str = "../experimental_results") -> pd.DataFrame:
    """
    Load all experimental_sweep_*.pkl files and flatten into one row per (file, k).

    Args:
        data_dir: Directory containing pickle files (default: ../experimental_results)

    Returns:
        DataFrame with sweep parameters and metrics as columns.
    """
    rows = []
    errors = []

    for pkl_path in sorted(Path(data_dir).glob("experimental_sweep_*.pkl")):
        try:
            with open(pkl_path, "rb") as f:
                data = pickle.load(f)
        except Exception as e:
            errors.append((pkl_path.name, str(e)))
            continue

        meta = data.get("metadata", {})
        result = data.get("result", {})
        by_k = result.get("by_k", {})

        for k_str, k_data in by_k.items():
            stab = k_data.get("stability") or {}
            
            # Handle StabilityMetrics dataclass instances vs plain dicts
            if hasattr(stab, "__dict__") and not isinstance(stab, dict):
                stab = stab.__dict__
            
            # Extract metrics from nested structure
            # Stability can be: {'silhouette': {'mean': x, 'std': y}, 'stability_ari': {...}, ...}
            # Or flat: {'silhouette_mean': x, 'silhouette_std': y, ...}
            def get_metric(metric_dict, name):
                """Extract metric from nested or flat structure"""
                if isinstance(metric_dict, dict):
                    if name in metric_dict:
                        return metric_dict[name]
                    # Check nested structure
                    for key in ['silhouette', 'stability_ari', 'dispersion', 'coverage', 'inertia']:
                        if key in metric_dict and isinstance(metric_dict[key], dict):
                            if 'mean' in name.lower() and 'mean' in metric_dict[key]:
                                if key in name:
                                    return metric_dict[key]['mean']
                            if 'std' in name.lower() and 'std' in metric_dict[key]:
                                if key in name:
                                    return metric_dict[key]['std']
                return None
            
            rows.append({
                "entry_max": meta.get("entry_max"),
                "dataset": meta.get("benchmark_source", data.get("dataset_name", "unknown")),
                "label_max": meta.get("label_max"),
                "summarizer": meta.get("summarizer", "unknown"),
                "k": int(k_str),
                # Handle both nested and flat structures
                "ari_mean": (stab.get('stability_ari', {}).get('mean') if isinstance(stab.get('stability_ari'), dict) 
                            else stab.get('stability_ari_mean')),
                "ari_std": (stab.get('stability_ari', {}).get('std') if isinstance(stab.get('stability_ari'), dict) 
                           else stab.get('stability_ari_std')),
                "silhouette_mean": (stab.get('silhouette', {}).get('mean') if isinstance(stab.get('silhouette'), dict) 
                                   else stab.get('silhouette_mean')),
                "silhouette_std": (stab.get('silhouette', {}).get('std') if isinstance(stab.get('silhouette'), dict) 
                                  else stab.get('silhouette_std')),
                "coverage_mean": (stab.get('coverage', {}).get('mean') if isinstance(stab.get('coverage'), dict) 
                                 else stab.get('coverage_mean')),
                "coverage_std": (stab.get('coverage', {}).get('std') if isinstance(stab.get('coverage'), dict) 
                                else stab.get('coverage_std')),
                "inertia_mean": (stab.get('dispersion', {}).get('mean') if isinstance(stab.get('dispersion'), dict) 
                                else stab.get('inertia_mean', stab.get('dispersion_mean'))),
                "inertia_std": (stab.get('dispersion', {}).get('std') if isinstance(stab.get('dispersion'), dict) 
                               else stab.get('inertia_std', stab.get('dispersion_std'))),
                "ari_vs_ground_truth": stab.get("ari_vs_ground_truth"),
                "objective": k_data.get("objective"),
                "actual_entry_count": meta.get("actual_entry_count"),
                "actual_label_count": meta.get("actual_label_count"),
                "source_file": pkl_path.name,
            })

    if errors:
        print(f"[WARN] Failed to load {len(errors)} file(s):")
        for name, err in errors[:5]:
            print(f"  {name}: {err}")

    df = pd.DataFrame(rows)
    
    # Convert None to NaN for numeric columns (fixes hvplot/numpy comparison errors)
    if not df.empty:
        numeric_cols = [
            'ari_mean', 'ari_std', 'silhouette_mean', 'silhouette_std',
            'coverage_mean', 'coverage_std', 'inertia_mean', 'inertia_std',
            'ari_vs_ground_truth', 'objective', 'actual_entry_count', 
            'actual_label_count', 'entry_max', 'label_max'
        ]
        for col in numeric_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce')
        
        # Drop columns that are completely empty
        df = df.dropna(axis=1, how='all')
        
        print(f"[OK] Loaded {len(df)} rows from {df['source_file'].nunique()} pickle files.")
        print(f"     Datasets: {sorted(df['dataset'].unique())}")
        print(f"     Summarizers: {sorted(df['summarizer'].unique())}")
        print(f"     entry_max: {sorted(df['entry_max'].dropna().unique())}")
        print(f"     label_max: {sorted(df['label_max'].dropna().unique())}")
        print(f"     k range: {df['k'].min()} - {df['k'].max()}")
        print(f"     Available metric columns: {[c for c in df.columns if c in numeric_cols]}")
        print(f"     DataFrame shape: {df.shape}")
    else:
        print("[WARN] No pickle files found. Run sweep script first to generate data.")
    return df


df = load_sweep_dataframe()
print(f"\n[DEBUG] df.empty = {df.empty}")
print(f"[DEBUG] len(df) = {len(df)}")
df.head()

[OK] Loaded 3141 rows from 349 pickle files.
     Datasets: ['20newsgroups_10cat', '20newsgroups_6cat', 'dbpedia', 'yahoo_answers']
     Summarizers: ['None', 'gpt-4o', 'gpt-4o-mini', 'gpt-5-chat']
     entry_max: [np.int64(100), np.int64(200), np.int64(300), np.int64(400), np.int64(500)]
     label_max: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6)]
     k range: 2 - 10
     Available metric columns: ['entry_max', 'label_max', 'ari_mean', 'ari_std', 'silhouette_mean', 'silhouette_std', 'coverage_mean', 'coverage_std', 'inertia_mean', 'inertia_std', 'objective', 'actual_entry_count', 'actual_label_count']
     DataFrame shape: (3141, 17)

[DEBUG] df.empty = False
[DEBUG] len(df) = 3141


Unnamed: 0,entry_max,dataset,label_max,summarizer,k,ari_mean,ari_std,silhouette_mean,silhouette_std,coverage_mean,coverage_std,inertia_mean,inertia_std,objective,actual_entry_count,actual_label_count,source_file
0,100,20newsgroups_10cat,1,gpt-4o,2,0.221651,0.234452,0.065419,0.004809,0.0,0.0,0.789241,0.006044,78.003882,100,1,experimental_sweep_entry100_20newsgroups_10cat...
1,100,20newsgroups_10cat,1,gpt-4o,3,0.273558,0.136619,0.061819,0.006334,0.0,0.0,0.721322,0.007871,71.174634,100,1,experimental_sweep_entry100_20newsgroups_10cat...
2,100,20newsgroups_10cat,1,gpt-4o,4,0.265482,0.095696,0.058911,0.009215,0.0005,0.002179,0.681253,0.011009,66.949057,100,1,experimental_sweep_entry100_20newsgroups_10cat...
3,100,20newsgroups_10cat,1,gpt-4o,5,0.208497,0.069513,0.05059,0.008673,0.001,0.003,0.657712,0.01215,64.152566,100,1,experimental_sweep_entry100_20newsgroups_10cat...
4,100,20newsgroups_10cat,1,gpt-4o,6,0.187624,0.06219,0.048295,0.008808,0.001,0.003,0.633295,0.010585,62.162543,100,1,experimental_sweep_entry100_20newsgroups_10cat...


## Data Filtering

Select specific parameter combinations to analyze:

In [3]:
if not df.empty:
    # Create filter widgets
    datasets = sorted(df['dataset'].unique())
    summarizers = sorted(df['summarizer'].unique())
    entry_maxes = sorted(df['entry_max'].dropna().unique())
    label_maxes = sorted(df['label_max'].dropna().unique())
    
    dataset_filter = pn.widgets.MultiChoice(
        name='Datasets', 
        options=datasets, 
        value=datasets[:3] if len(datasets) > 3 else datasets,
        solid=False
    )
    summarizer_filter = pn.widgets.MultiChoice(
        name='Summarizers', 
        options=summarizers, 
        value=summarizers,
        solid=False
    )
    entry_max_filter = pn.widgets.MultiChoice(
        name='Entry Max', 
        options=[int(x) for x in entry_maxes],
        value=[int(x) for x in entry_maxes[:2]] if len(entry_maxes) > 2 else [int(x) for x in entry_maxes],
        solid=False
    )
    label_max_filter = pn.widgets.MultiChoice(
        name='Label Max', 
        options=[int(x) for x in label_maxes],
        value=[int(x) for x in label_maxes[:3]] if len(label_maxes) > 3 else [int(x) for x in label_maxes],
        solid=False
    )
    
    # Function to apply filters
    @pn.depends(dataset_filter, summarizer_filter, entry_max_filter, label_max_filter)
    def get_filtered_df(datasets, summarizers, entry_maxes, label_maxes):
        filtered = df.copy()
        if datasets:
            filtered = filtered[filtered['dataset'].isin(datasets)]
        if summarizers:
            filtered = filtered[filtered['summarizer'].isin(summarizers)]
        if entry_maxes:
            filtered = filtered[filtered['entry_max'].isin(entry_maxes)]
        if label_maxes:
            filtered = filtered[filtered['label_max'].isin(label_maxes)]
        
        print(f"Filtered to {len(filtered)} rows (from {len(df)} total)")
        return filtered
    
    # Display filter controls
    filter_panel = pn.Column(
        "### Filter Data",
        pn.Row(dataset_filter, summarizer_filter),
        pn.Row(entry_max_filter, label_max_filter),
        pn.pane.Markdown("**Note:** Select values above, then run cells below to see filtered results."),
    )
    filter_panel
else:
    print("[WARN] No data to filter")

In [4]:
# Apply filters to create filtered dataframe
if not df.empty:
    df_filtered = get_filtered_df(
        dataset_filter.value,
        summarizer_filter.value, 
        entry_max_filter.value,
        label_max_filter.value
    )
    
    print(f"\nFiltered data summary:")
    print(f"  Datasets: {sorted(df_filtered['dataset'].unique())}")
    print(f"  Summarizers: {sorted(df_filtered['summarizer'].unique())}")
    print(f"  Entry max: {sorted(df_filtered['entry_max'].dropna().unique())}")
    print(f"  Label max: {sorted(df_filtered['label_max'].dropna().unique())}")
    print(f"  K range: {df_filtered['k'].min()} - {df_filtered['k'].max()}")
else:
    df_filtered = df

Filtered to 639 rows (from 3141 total)

Filtered data summary:
  Datasets: ['20newsgroups_10cat', '20newsgroups_6cat', 'dbpedia']
  Summarizers: ['None', 'gpt-4o', 'gpt-4o-mini', 'gpt-5-chat']
  Entry max: [np.int64(100), np.int64(200)]
  Label max: [np.int64(1), np.int64(2), np.int64(3)]
  K range: 2 - 10


## Interactive Explorer

The explorer below lets you change:
- **Kind**: Chart type (line, scatter, bar, box, etc.)
- **X / Y**: Which column on each axis
- **by**: Color grouping
- **groupby**: Creates a widget (slider/dropdown) to step through values
- Additional options in the sidebar (aggregation, faceting, etc.)

If `hvDataFrameExplorer` is not available in your hvplot version, Cell 5 provides
a manual widget-based fallback.

In [5]:
# Try the built-in explorer first - using FILTERED data
try:
    from hvplot.ui import hvDataFrameExplorer
except ImportError:
    try:
        from hvplot.explorer import hvDataFrameExplorer
    except ImportError:
        hvDataFrameExplorer = None

print(f"[DEBUG] hvDataFrameExplorer available: {hvDataFrameExplorer is not None}")
print(f"[DEBUG] df_filtered.empty: {df_filtered.empty}")

if hvDataFrameExplorer is not None and not df_filtered.empty:
    # Find first metric column (exclude experimental parameters)
    numeric_cols = df_filtered.select_dtypes(include=['number']).columns
    # Prioritize actual metrics over experimental parameters
    metric_priority = ['ari_mean', 'silhouette_mean', 'coverage_mean', 'inertia_mean']
    metadata_cols = ['k', 'entry_max', 'label_max', 'actual_entry_count', 'actual_label_count']
    
    available_metrics = [col for col in numeric_cols if df_filtered[col].notna().any() and col not in metadata_cols]
    
    # Try to use a priority metric first
    default_y = None
    for metric in metric_priority:
        if metric in available_metrics:
            default_y = metric
            break
    
    # Fall back to first available if no priority metric found
    if default_y is None and available_metrics:
        default_y = available_metrics[0]
    
    if default_y:
        print(f"[INFO] Using '{default_y}' as default y-axis. Available metrics: {available_metrics}")
        print(f"[DEBUG] Creating explorer with {len(df_filtered)} rows...")
        
        try:
            explorer = hvDataFrameExplorer(
                df_filtered,
                kind="line",
                x="k",
                y=default_y,
                by=["summarizer"],
            )
            print(f"[DEBUG] Explorer created successfully: {type(explorer)}")
            print(f"[DEBUG] Displaying explorer...")
            display(explorer)  # Explicit display
        except Exception as e:
            print(f"[ERROR] Failed to create/display explorer: {e}")
            import traceback
            traceback.print_exc()
    else:
        print("[WARN] No metric columns with data available for plotting")
else:
    if df_filtered.empty:
        print("[WARN] No data after filtering. Adjust filters above.")
    else:
        print("[INFO] hvDataFrameExplorer not available. Use the manual explorer in the next cell.")

[DEBUG] hvDataFrameExplorer available: True
[DEBUG] df_filtered.empty: False
[INFO] Using 'ari_mean' as default y-axis. Available metrics: ['ari_mean', 'ari_std', 'silhouette_mean', 'silhouette_std', 'coverage_mean', 'coverage_std', 'inertia_mean', 'inertia_std', 'objective']
[DEBUG] Creating explorer with 639 rows...
[DEBUG] Explorer created successfully: <class 'hvplot.ui.hvDataFrameExplorer'>
[DEBUG] Displaying explorer...


In [6]:
# Manual explorer: Panel widgets + hvplot (works in any hvplot version)
# Run this cell if hvDataFrameExplorer is not available, or if you want more control.

if df.empty:
    print("[WARN] No data loaded.")
else:
    numeric_cols = list(df.select_dtypes(include="number").columns)
    categorical_cols = [c for c in df.columns if c not in numeric_cols]
    all_cols = numeric_cols + categorical_cols

    # --- Chart controls ---
    kind_select = pn.widgets.Select(name="Chart Type", options=["line", "scatter", "bar", "box", "area", "hist"], value="line", width=140)
    x_select = pn.widgets.Select(name="X Axis", options=numeric_cols, value="k", width=180)
    y_select = pn.widgets.Select(name="Y Axis", options=numeric_cols, value="ari_mean", width=180)
    by_select = pn.widgets.Select(name="Color (by)", options=["None"] + all_cols, value="summarizer", width=180)
    facet_col = pn.widgets.Select(name="Facet Column", options=["None"] + categorical_cols, value="None", width=180)
    facet_row = pn.widgets.Select(name="Facet Row", options=["None"] + categorical_cols, value="None", width=180)

    # --- Filter widgets for sweep parameters ---
    filter_widgets = {}
    for col in ["entry_max", "label_max", "dataset", "summarizer"]:
        if col in df.columns:
            unique_vals = sorted(df[col].dropna().unique(), key=str)
            if len(unique_vals) <= 30:
                filter_widgets[col] = pn.widgets.MultiChoice(
                    name=f"Filter: {col}",
                    options=[str(v) for v in unique_vals],
                    value=[str(v) for v in unique_vals],
                    width=300,
                )

    plot_pane = pn.pane.HoloViews(None, sizing_mode="stretch_width", min_height=450)

    def update_plot(*events):
        filtered = df.copy()

        # Apply filters
        for col, widget in filter_widgets.items():
            if widget.value:
                filtered = filtered[filtered[col].astype(str).isin(widget.value)]

        if filtered.empty:
            plot_pane.object = None
            return

        kwargs = {
            "kind": kind_select.value,
            "x": x_select.value,
            "y": y_select.value,
            "responsive": True,
            "height": 400,
        }
        if by_select.value != "None":
            kwargs["by"] = by_select.value
        if facet_col.value != "None":
            kwargs["col"] = facet_col.value
        if facet_row.value != "None":
            kwargs["row"] = facet_row.value

        try:
            plot = filtered.hvplot(**kwargs)
            plot_pane.object = plot
        except Exception as e:
            print(f"Plot error: {e}")
            plot_pane.object = None

    # Wire widgets
    for w in [kind_select, x_select, y_select, by_select, facet_col, facet_row]:
        w.param.watch(update_plot, "value")
    for w in filter_widgets.values():
        w.param.watch(update_plot, "value")

    # Initial plot
    update_plot()

    # Layout
    controls = pn.Column(
        pn.pane.Markdown("### Chart Controls"),
        pn.Row(kind_select, x_select, y_select, by_select),
        pn.Row(facet_col, facet_row),
        pn.pane.Markdown("### Filters"),
        *list(filter_widgets.values()),
    )

    pn.Column(controls, plot_pane)

## Simple Plot Test

Basic hvplot to verify plotting works:

In [7]:
# Simple test plot - using FILTERED data
if not df_filtered.empty and 'ari_mean' in df_filtered.columns:
    print(f"[DEBUG] Creating simple line plot with {len(df_filtered)} rows...")
    try:
        plot = df_filtered.hvplot.line(x='k', y='ari_mean', by='summarizer', width=800, height=400, 
                                       title="ARI Mean vs K (Filtered Data)")
        print(f"[DEBUG] Plot created: {type(plot)}")
        display(plot)
    except Exception as e:
        print(f"[ERROR] Simple plot failed: {e}")
        import traceback
        traceback.print_exc()
else:
    print(f"[WARN] Cannot create plot. df_filtered.empty={df_filtered.empty}, has ari_mean={'ari_mean' in df_filtered.columns if not df_filtered.empty else 'N/A'}")

[DEBUG] Creating simple line plot with 639 rows...
[DEBUG] Plot created: <class 'holoviews.core.overlay.NdOverlay'>


## Quick Preset Plots

The cells below provide one-liner hvplot calls for common views.
Modify them as needed.

In [8]:
if not df.empty:
    # Check which metrics are available
    numeric_cols = df.select_dtypes(include=['number']).columns
    available_metrics = [col for col in numeric_cols if df[col].notna().any() and col != 'k']
    
    if 'ari_mean' in df.columns and df['ari_mean'].notna().any():
        y_col = 'ari_mean'
        title = "ARI vs K"
    elif available_metrics:
        y_col = available_metrics[0]
        title = f"{y_col} vs K"
    else:
        print("[WARN] No numeric columns available for plotting")
        y_col = None
    
    if y_col:
        df.hvplot.line(
            x="k", y=y_col, by="summarizer",
            groupby=["dataset", "entry_max", "label_max"],
            title=title,
            height=400, responsive=True,
        )
else:
    print("[WARN] No data to plot")

In [9]:
if not df.empty:
    df.hvplot.heatmap(
        x="k", y="label_max", C="silhouette_mean",
        groupby=["dataset", "summarizer", "entry_max"],
        cmap="viridis", title="Silhouette Mean",
        height=400, responsive=True,
    )

In [10]:
if not df.empty:
    # Check which metrics are available
    numeric_cols = df.select_dtypes(include=['number']).columns
    available_metrics = [col for col in numeric_cols if df[col].notna().any() and col != 'k']
    
    if 'ari_mean' in df.columns and df['ari_mean'].notna().any():
        y_col = 'ari_mean'
        title = "ARI Distribution by Dataset"
    elif available_metrics:
        y_col = available_metrics[0]
        title = f"{y_col} Distribution by Dataset"
    else:
        print("[WARN] No numeric columns available for plotting")
        y_col = None
    
    if y_col:
        df.hvplot.box(
            y=y_col, by="dataset",
            groupby=["entry_max", "label_max"],
            title=title,
            height=400, responsive=True,
        )
else:
    print("[WARN] No data to plot")

## Subplot Grid: Metric vs K across Label Max and Summarizers

Matplotlib subplot grid where:
- **Pre-filter**: Select entry_max, dataset, and metric
- **Rows**: Summarizers
- **Columns**: Label max values
- **Each subplot**: K (x-axis) vs metric value (y-axis)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

if not df.empty:
    # Get available options
    datasets_avail = sorted(df['dataset'].unique())
    entry_maxes_avail = sorted(df['entry_max'].dropna().unique())
    
    # Metric options (actual clustering metrics)
    metric_options = {
        'ARI Mean': 'ari_mean',
        'Silhouette Mean': 'silhouette_mean',
        'Coverage Mean': 'coverage_mean',
        'Inertia Mean': 'inertia_mean',
        'ARI vs Ground Truth': 'ari_vs_ground_truth',
        'Objective': 'objective'
    }
    
    # Create selection widgets
    dataset_select = pn.widgets.Select(
        name='Dataset',
        options=datasets_avail,
        value=datasets_avail[0] if datasets_avail else None
    )
    
    entry_max_select = pn.widgets.Select(
        name='Entry Max',
        options=[int(x) for x in entry_maxes_avail],
        value=int(entry_maxes_avail[0]) if entry_maxes_avail else None
    )
    
    metric_select = pn.widgets.Select(
        name='Metric',
        options=metric_options,
        value='ari_mean'
    )
    
    # Function to create the subplot grid
    @pn.depends(dataset_select, entry_max_select, metric_select)
    def create_subplot_grid(dataset, entry_max, metric):
        if dataset is None or entry_max is None:
            return pn.pane.Markdown("**Please select dataset and entry_max**")
        
        # Filter data
        df_filtered = df[
            (df['dataset'] == dataset) & 
            (df['entry_max'] == entry_max)
        ]
        
        if df_filtered.empty:
            return pn.pane.Markdown(f"**No data for dataset={dataset}, entry_max={entry_max}**")
        
        # Check if metric has data
        if metric not in df_filtered.columns or df_filtered[metric].isna().all():
            return pn.pane.Markdown(f"**Metric '{metric}' has no data for this selection**")
        
        # Get unique values for subplot dimensions
        summarizers = sorted(df_filtered['summarizer'].unique())
        label_maxes = sorted(df_filtered['label_max'].dropna().unique())
        
        if not summarizers or not label_maxes:
            return pn.pane.Markdown("**No data available for plotting**")
        
        # Create subplot grid
        n_rows = len(summarizers)
        n_cols = len(label_maxes)
        
        fig, axes = plt.subplots(
            nrows=n_rows,
            ncols=n_cols,
            figsize=(n_cols * 3.5, n_rows * 2.5),
            squeeze=False
        )
        
        fig.suptitle(f'{metric.replace("_", " ").title()}\nDataset: {dataset}, Entry Max: {entry_max}', 
                     fontsize=14, fontweight='bold')
        
        # Plot each combination
        for i, summarizer in enumerate(summarizers):
            for j, label_max in enumerate(label_maxes):
                ax = axes[i, j]
                
                # Get data for this specific combination
                data = df_filtered[
                    (df_filtered['summarizer'] == summarizer) &
                    (df_filtered['label_max'] == label_max)
                ].sort_values('k')
                
                if not data.empty:
                    # Plot line
                    ax.plot(data['k'], data[metric], marker='o', linewidth=2, markersize=4)
                    
                    # Add error bars if std is available
                    std_col = metric.replace('_mean', '_std')
                    if std_col in data.columns and not data[std_col].isna().all():
                        ax.fill_between(
                            data['k'],
                            data[metric] - data[std_col],
                            data[metric] + data[std_col],
                            alpha=0.2
                        )
                else:
                    ax.text(0.5, 0.5, 'No data', ha='center', va='center', 
                           transform=ax.transAxes, fontsize=10, color='gray')
                
                # Styling
                ax.set_title(f"{summarizer}\nlabel_max={int(label_max)}", fontsize=9)
                ax.grid(True, alpha=0.3)
                
                # Labels only on edges
                if j == 0:
                    ax.set_ylabel(metric.replace('_', ' ').title(), fontsize=9)
                if i == n_rows - 1:
                    ax.set_xlabel('k (# clusters)', fontsize=9)
        
        plt.tight_layout()
        return pn.pane.Matplotlib(fig, dpi=100)
    
    # Layout
    controls = pn.Column(
        "### Controls",
        pn.Row(dataset_select, entry_max_select, metric_select),
        pn.pane.Markdown("**Note:** Change selections above and the plot will update automatically.")
    )
    
    pn.Column(controls, create_subplot_grid).servable()
else:
    print("[WARN] No data available")