diff --git a/docs/plot_dataset/__init__.py b/docs/plot_dataset/__init__.py
new file mode 100644
index 00000000..85942823
--- /dev/null
+++ b/docs/plot_dataset/__init__.py
@@ -0,0 +1,13 @@
+"""Plot generation utilities for EEGDash documentation."""
+
+from .bubble import generate_dataset_bubble  # noqa: F401
+from .colours import (  # noqa: F401
+    CANONICAL_MAP,
+    COLUMN_COLOR_MAPS,
+    MODALITY_COLOR_MAP,
+    PATHOLOGY_COLOR_MAP,
+    TYPE_COLOR_MAP,
+    hex_to_rgba,
+)
+from .plot_sankey import generate_dataset_sankey  # noqa: F401
+from .ridgeline import generate_modality_ridgeline  # noqa: F401
diff --git a/docs/plot_dataset/bubble.py b/docs/plot_dataset/bubble.py
new file mode 100644
index 00000000..3e5c3ae5
--- /dev/null
+++ b/docs/plot_dataset/bubble.py
@@ -0,0 +1,404 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+
+try:  # Allow execution as a script or module
+    from .colours import MODALITY_COLOR_MAP
+    from .utils import get_dataset_url, human_readable_size, primary_modality, safe_int
+except ImportError:  # pragma: no cover - fallback for direct script execution
+    from colours import MODALITY_COLOR_MAP  # type: ignore
+    from utils import (  # type: ignore
+        get_dataset_url,
+        human_readable_size,
+        primary_modality,
+        safe_int,
+    )
+
+__all__ = ["generate_dataset_bubble"]
+
+
+def _to_numeric_median_list(val) -> float | None:
+    if pd.isna(val):
+        return None
+    try:
+        return float(val)
+    except Exception:
+        pass
+
+    s = str(val).strip().strip("[]")
+    if not s:
+        return None
+
+    try:
+        nums = [float(x) for x in s.split(",") if str(x).strip()]
+        if not nums:
+            return None
+        return float(np.median(nums))
+    except Exception:
+        return None
+
+
+def _format_int(value) -> str:
+    if value is None or pd.isna(value):
+        return ""
+    try:
+        return str(int(round(float(value))))
+    except Exception:
+        return str(value)
+
+
+def _build_hover_template(x_field: str, y_field: str) -> tuple[str, str]:
+    x_map = {
+        "duration_h": "Duration (x): %{x:.2f} h",
+        "size_gb": "Size (x): %{x:.2f} GB",
+        "tasks": "Tasks (x): %{x:,}",
+        "subjects": "Subjects (x): %{x:,}",
+    }
+    y_map = {
+        "subjects": "Subjects (y): %{y:,}",
+    }
+    x_hover = x_map.get(x_field, "Records (x): %{x:,}")
+    y_hover = y_map.get(y_field, "Records (y): %{y:,}")
+    return x_hover, y_hover
+
+
+def generate_dataset_bubble(
+    df: pd.DataFrame,
+    out_html: str | Path,
+    *,
+    x_var: str = "records",
+    max_width: int = 1280,
+    height: int = 720,
+) -> Path:
+    """Generate the dataset landscape bubble chart."""
+    data = df.copy()
+    data = data[data["dataset"].str.lower() != "test"]
+
+    data["duration_h"] = pd.to_numeric(
+        data.get("duration_hours_total"), errors="coerce"
+    )
+    data["subjects"] = pd.to_numeric(data.get("n_subjects"), errors="coerce")
+    data["records"] = pd.to_numeric(data.get("n_records"), errors="coerce")
+    data["tasks"] = pd.to_numeric(data.get("n_tasks"), errors="coerce")
+    data["size_bytes"] = pd.to_numeric(data.get("size_bytes"), errors="coerce")
+
+    data["sfreq"] = data["sampling_freqs"].map(_to_numeric_median_list)
+    data["nchans"] = data["nchans_set"].map(_to_numeric_median_list)
+
+    data["modality_label"] = data.get("modality of exp").apply(primary_modality)
+
+    GB = 1024**3
+    data["size_gb"] = data["size_bytes"] / GB
+
+    x_field = (
+        x_var
+        if x_var in {"records", "duration_h", "size_gb", "tasks", "subjects"}
+        else "records"
+    )
+    axis_labels = {
+        "records": "#Records",
+        "duration_h": "Duration (hours)",
+        "size_gb": "Size (GB)",
+        "tasks": "#Tasks",
+        "subjects": "#Subjects",
+    }
+    x_label = f"{axis_labels[x_field]} (log scale)"
+    y_field = "subjects" if x_field != "subjects" else "records"
+    y_label = f"{axis_labels[y_field]} (log scale)"
+    x_hover, y_hover = _build_hover_template(x_field, y_field)
+
+    required_columns = {x_field, y_field, "size_gb"}
+    data = data.replace([np.inf, -np.inf], np.nan)
+    data = data.dropna(subset=list(required_columns))
+    data = data[(data[x_field] > 0) & (data[y_field] > 0)]
+
+    data["dataset_url"] = data["dataset"].apply(get_dataset_url)
+
+    out_path = Path(out_html)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if data.empty:
+        empty_html = """
+<div class="dataset-loading" id="dataset-loading">No dataset records available for plotting.</div>
+"""
+        out_path.write_text(empty_html, encoding="utf-8")
+        return out_path
+
+    size_max = data["size_gb"].max()
+    if not np.isfinite(size_max) or size_max <= 0:
+        size_max = 1.0
+    sizeref = (2.0 * size_max) / (40.0**2)
+
+    sfreq_str = data["sfreq"].map(_format_int)
+    nchans_str = data["nchans"].map(_format_int)
+
+    fig = px.scatter(
+        data,
+        x=x_field,
+        y=y_field,
+        size="size_gb",
+        color="modality_label",
+        hover_name="dataset",
+        custom_data=[
+            data["dataset"],
+            data["subjects"],
+            data["records"],
+            data["tasks"],
+            nchans_str,
+            sfreq_str,
+            data["size_bytes"].map(
+                lambda bytes_: human_readable_size(safe_int(bytes_, 0))
+            ),
+            data["modality_label"],
+            data["dataset_url"],
+        ],
+        size_max=40,
+        labels={
+            y_field: y_label,
+            "modality_label": "Modality",
+            x_field: x_label,
+        },
+        color_discrete_map=MODALITY_COLOR_MAP,
+        title="",
+        category_orders={
+            "modality_label": [
+                label
+                for label in MODALITY_COLOR_MAP.keys()
+                if label in data["modality_label"].unique()
+            ]
+        },
+        log_x=True,
+        log_y=True,
+    )
+
+    numeric_x = pd.to_numeric(data[x_field], errors="coerce")
+    numeric_y = pd.to_numeric(data[y_field], errors="coerce")
+    mask = (
+        np.isfinite(numeric_x)
+        & np.isfinite(numeric_y)
+        & (numeric_x > 0)
+        & (numeric_y > 0)
+    )
+
+    fit_annotation_text = None
+    if mask.sum() >= 2:
+        log_x = np.log10(numeric_x[mask])
+        log_y = np.log10(numeric_y[mask])
+        ss_tot = np.sum((log_y - log_y.mean()) ** 2)
+        if np.ptp(log_x) > 0 and np.ptp(log_y) > 0 and ss_tot > 0:
+            slope, intercept = np.polyfit(log_x, log_y, 1)
+            line_log_x = np.linspace(log_x.min(), log_x.max(), 200)
+            line_x = 10**line_log_x
+            line_y = 10 ** (slope * line_log_x + intercept)
+            fig.add_trace(
+                go.Scatter(
+                    x=line_x,
+                    y=line_y,
+                    mode="lines",
+                    name="log-log fit",
+                    line=dict(color="#111827", width=2, dash="dot"),
+                    hoverinfo="skip",
+                    showlegend=False,
+                )
+            )
+            residuals = log_y - (slope * log_x + intercept)
+            r_squared = 1 - np.sum(residuals**2) / ss_tot
+            fit_annotation_text = f"log-log OLS fit R² = {r_squared:.3f}"
+
+    hover_template = (
+        "<b>%{customdata[0]}</b>"
+        f"<br>{x_hover}"
+        f"<br>{y_hover}"
+        "<br>Subjects (total): %{customdata[1]:,}"
+        "<br>Records (total): %{customdata[2]:,}"
+        "<br>Tasks: %{customdata[3]:,}"
+        "<br>Channels: %{customdata[4]}"
+        "<br>Sampling: %{customdata[5]} Hz"
+        "<br>Size: %{customdata[6]}"
+        "<br>Modality: %{customdata[7]}"
+        "<br><i>Click bubble to open dataset page</i>"
+        "<extra></extra>"
+    )
+
+    for trace in fig.data:
+        mode = getattr(trace, "mode", "") or ""
+        if "markers" not in mode:
+            continue
+        trace.marker.update(
+            sizemin=6,
+            sizemode="area",
+            sizeref=sizeref,
+            line=dict(width=0.6, color="rgba(0,0,0,0.3)"),
+            opacity=0.75,
+        )
+        trace.hovertemplate = hover_template
+
+    fig.update_layout(
+        height=height,
+        width=max_width,
+        margin=dict(l=60, r=40, t=80, b=60),
+        template="plotly_white",
+        legend=dict(
+            title="Modality",
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=0.99,
+        ),
+        font=dict(
+            family="Inter, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif",
+            size=14,
+        ),
+        title=dict(text="", x=0.01, xanchor="left", y=0.98, yanchor="top"),
+        autosize=True,
+    )
+
+    if fit_annotation_text:
+        fig.add_annotation(
+            xref="paper",
+            yref="paper",
+            x=0.02,
+            y=0.98,
+            text=fit_annotation_text,
+            showarrow=False,
+            font=dict(size=15, color="#111827"),
+            bgcolor="rgba(255,255,255,0.75)",
+            bordercolor="rgba(17,24,39,0.25)",
+            borderwidth=1,
+            borderpad=6,
+        )
+
+    fig.update_xaxes(
+        showgrid=True,
+        gridcolor="rgba(0,0,0,0.12)",
+        zeroline=False,
+        type="log",
+        dtick=1,
+    )
+    fig.update_yaxes(
+        showgrid=True,
+        gridcolor="rgba(0,0,0,0.12)",
+        zeroline=False,
+        type="log",
+        dtick=1,
+    )
+
+    html_content = fig.to_html(
+        full_html=False,
+        include_plotlyjs=False,
+        div_id="dataset-bubble",
+        config={
+            "responsive": True,
+            "displaylogo": False,
+            "modeBarButtonsToRemove": ["lasso2d", "select2d"],
+            "toImageButtonOptions": {
+                "format": "png",
+                "filename": "dataset_landscape",
+                "height": height,
+                "width": max_width,
+                "scale": 2,
+            },
+        },
+    )
+
+    styled_html = f"""
+<style>
+#dataset-bubble {{
+    width: 100% !important;
+    max-width: {max_width}px;
+    height: {height}px !important;
+    min-height: {height}px;
+    margin: 0 auto;
+}}
+#dataset-bubble .plotly-graph-div {{
+    width: 100% !important;
+    height: 100% !important;
+}}
+.dataset-loading {{
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: {height}px;
+    font-family: Inter, system-ui, sans-serif;
+    color: #6b7280;
+}}
+</style>
+<div class="dataset-loading" id="dataset-loading">Loading dataset landscape...</div>
+{html_content}
+<script>
+document.addEventListener('DOMContentLoaded', function() {{
+    const loading = document.getElementById('dataset-loading');
+    const plot = document.getElementById('dataset-bubble');
+
+    function showPlot() {{
+        if (loading) {{
+            loading.style.display = 'none';
+        }}
+        if (plot) {{
+            plot.style.display = 'block';
+        }}
+    }}
+
+    function hookPlotlyClick(attempts) {{
+        if (!plot || typeof plot.on !== 'function') {{
+            if (attempts < 40) {{
+                window.setTimeout(function() {{ hookPlotlyClick(attempts + 1); }}, 60);
+            }}
+            return;
+        }}
+        plot.on('plotly_click', function(evt) {{
+            const point = evt && evt.points && evt.points[0];
+            const url = point && point.customdata && point.customdata[8];
+            if (url) {{
+                window.open(url, '_blank', 'noopener');
+            }}
+        }});
+        showPlot();
+    }}
+
+    hookPlotlyClick(0);
+    showPlot();
+}});
+</script>
+"""
+
+    out_path.write_text(styled_html, encoding="utf-8")
+    return out_path
+
+
+def _read_dataset(path: Path) -> pd.DataFrame:
+    return pd.read_csv(path, index_col=False, header=0, skipinitialspace=True)
+
+
+def main() -> None:
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Generate the dataset bubble chart.")
+    parser.add_argument("source", type=Path, help="Path to dataset summary CSV")
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("dataset_bubble.html"),
+        help="Output HTML file",
+    )
+    parser.add_argument(
+        "--x-axis",
+        choices=["records", "duration_h", "size_gb", "tasks", "subjects"],
+        default="records",
+        help="Field for the bubble chart x-axis",
+    )
+    args = parser.parse_args()
+
+    df = _read_dataset(args.source)
+    output_path = generate_dataset_bubble(df, args.output, x_var=args.x_axis)
+    print(f"Bubble chart saved to {output_path.resolve()}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/plot_dataset/colours.py b/docs/plot_dataset/colours.py
new file mode 100644
index 00000000..7d2d50ae
--- /dev/null
+++ b/docs/plot_dataset/colours.py
@@ -0,0 +1,98 @@
+"""Helpers for Sankey diagram generation."""
+
+# Color mappings consistent with prepare_summary_tables.py and custom.css
+PATHOLOGY_COLOR_MAP = {
+    "Healthy": "#22c55e",  # green
+    "Clinical": "#f87171",  # Lighter red to match table
+    "Unknown": "#94a3b8",  # grey
+}
+
+MODALITY_COLOR_MAP = {
+    "Visual": "#2563eb",
+    "Auditory": "#0ea5e9",
+    "Tactile": "#10b981",
+    "Somatosensory": "#10b981",
+    "Multisensory": "#ec4899",
+    "Motor": "#f59e0b",
+    "Resting State": "#6366f1",
+    "Rest": "#6366f1",
+    "Sleep": "#7c3aed",
+    "Other": "#14b8a6",
+    "Unknown": "#94a3b8",
+}
+
+TYPE_COLOR_MAP = {
+    "Perception": "#3b82f6",
+    "Decision-making": "#eab308",
+    "Rest": "#16a34a",
+    "Resting-state": "#16a34a",
+    "Sleep": "#8b5cf6",
+    "Cognitive": "#6366f1",
+    "Clinical": "#f87171",  # Lighter red to match table
+    "Memory": "#c4b5fd",  # Lighter purple to match table
+    "Attention": "#c4b5fd",  # Lighter purple to match table
+    "Intervention": "#c4b5fd",  # Lighter purple to match table
+    "Learning": "#c4b5fd",  # Lighter purple to match table
+    "Other": "#c4b5fd",  # Lighter purple to match table
+    "Unknown": "#94a3b8",
+}
+
+# Canonical mappings to normalize values
+CANONICAL_MAP = {
+    "Type Subject": {
+        "healthy controls": "Healthy",
+        "healthy": "Healthy",
+        "control": "Healthy",
+        "clinical": "Clinical",
+        "patient": "Clinical",
+    },
+    "modality of exp": {
+        "visual": "Visual",
+        "auditory": "Auditory",
+        "tactile": "Tactile",
+        "somatosensory": "Tactile",
+        "multisensory": "Multisensory",
+        "motor": "Motor",
+        "rest": "Resting State",
+        "resting state": "Resting State",
+        "resting-state": "Resting State",
+        "sleep": "Sleep",
+        "other": "Other",
+    },
+    "type of exp": {
+        "perception": "Perception",
+        "decision making": "Decision-making",
+        "decision-making": "Decision-making",
+        "rest": "Rest",
+        "resting state": "Resting-state",
+        "resting-state": "Resting-state",
+        "sleep": "Sleep",
+        "cognitive": "Cognitive",
+        "clinical": "Clinical",
+        "other": "Other",
+    },
+}
+
+# Map column names to their color maps
+COLUMN_COLOR_MAPS = {
+    "Type Subject": PATHOLOGY_COLOR_MAP,
+    "modality of exp": MODALITY_COLOR_MAP,
+    "type of exp": TYPE_COLOR_MAP,
+}
+
+
+def hex_to_rgba(hex_color: str, alpha: float = 0.2) -> str:
+    """Convert hex color to rgba with given alpha."""
+    if not isinstance(hex_color, str) or not hex_color.startswith("#"):
+        # This is not a valid hex color, return a default color
+        return "rgba(148, 163, 184, 0.2)"  # Default grey
+    hex_color = hex_color.lstrip("#")
+    if len(hex_color) != 6:
+        return "rgba(148, 163, 184, 0.2)"  # Default grey for invalid length
+    try:
+        r = int(hex_color[0:2], 16)
+        g = int(hex_color[2:4], 16)
+        b = int(hex_color[4:6], 16)
+    except ValueError:
+        return "rgba(148, 163, 184, 0.2)"  # Default grey for conversion error
+    return f"rgba({r}, {g}, {b}, {alpha})"
diff --git a/docs/plot_dataset/plot_sankey.py b/docs/plot_dataset/plot_sankey.py
new file mode 100644
index 00000000..fb41a3a8
--- /dev/null
+++ b/docs/plot_dataset/plot_sankey.py
@@ -0,0 +1,352 @@
+from __future__ import annotations
+
+"""Generate a Sankey diagram from the EEG-Dash dataset summary.
+
+The script loads ``eegdash/dataset/dataset_summary.csv`` (by default) and builds
+an interactive Plotly Sankey diagram connecting three categorical columns. This
+mirrors how the documentation summarises datasets across subject type, modality,
+and experiment type, but can be reused with any trio of categorical columns via
+CLI arguments.
+"""
+
+import argparse
+from pathlib import Path
+from typing import Sequence
+
+import pandas as pd
+import plotly.graph_objects as go
+
+try:  # Support execution as a script or as a package module
+    from .colours import CANONICAL_MAP, COLUMN_COLOR_MAPS, hex_to_rgba
+except ImportError:  # pragma: no cover - fallback for direct script execution
+    from colours import CANONICAL_MAP, COLUMN_COLOR_MAPS, hex_to_rgba
+
+DEFAULT_COLUMNS = ["Type Subject", "modality of exp", "type of exp"]
+__all__ = ["generate_dataset_sankey", "build_sankey"]
+
+
+def _prepare_dataframe(df: pd.DataFrame, columns: Sequence[str]) -> pd.DataFrame:
+    all_columns = list(columns)
+    if "n_subjects" not in all_columns:
+        all_columns.append("n_subjects")
+
+    missing = [col for col in all_columns if col not in df.columns]
+    if missing:
+        msg = f"Columns not found in dataframe: {missing}"
+        raise ValueError(msg)
+
+    cleaned = df.copy()
+
+    # Fill missing n_subjects with 1 (to count as at least one dataset)
+    # and ensure the column is numeric integer type.
+    cleaned["n_subjects"] = (
+        pd.to_numeric(cleaned["n_subjects"], errors="coerce").fillna(1).astype(int)
+    )
+
+    # Process each column for cleaning and normalization
+    for col in columns:
+        # 1. Fill original NaN values with the string 'Unknown'
+        cleaned[col] = cleaned[col].fillna("Unknown")
+
+        # 2. Split multi-valued cells
+        cleaned[col] = cleaned[col].astype(str).str.split(r"/|;|,", regex=True)
+        cleaned = cleaned.explode(col)
+
+        # 3. Clean up whitespace and any empty strings created by splitting
+        cleaned[col] = cleaned[col].str.strip()
+        cleaned[col] = cleaned[col].replace(["", "nan"], "Unknown")
+
+        # 4. Apply canonical mapping to standardize terms
+        if col in CANONICAL_MAP:
+            mapping = CANONICAL_MAP[col]
+            # Use .str.lower() for case-insensitive mapping
+            cleaned[col] = cleaned[col].str.lower().map(mapping).fillna(cleaned[col])
+
+    # 5. Apply special rule for 'Type Subject' after all other processing
+    if "Type Subject" in columns:
+        # The user wants to preserve original labels but color them as 'Clinical'.
+        # The relabeling to 'Clinical' is now removed. The coloring logic will handle this.
+        pass
+
+    return cleaned[all_columns]
+
+
+def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
+    df = pd.read_csv(
+        path,
+        index_col=False,
+        header=0,
+        skipinitialspace=True,
+    )
+    return _prepare_dataframe(df, columns)
+
+
+def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
+    node_labels: list[str] = []
+    node_colors: list[str] = []
+    node_index: dict[tuple[str, str], int] = {}
+
+    for col in columns:
+        color_map = COLUMN_COLOR_MAPS.get(col, {})
+
+        # Sort unique values to ensure "Unknown" appears at the bottom
+        all_unique = df[col].unique()
+        # Separate "Unknown" and sort the rest alphabetically
+        known_values = sorted([v for v in all_unique if v != "Unknown"])
+        unique_values = known_values
+        # Add "Unknown" to the end if it exists
+        if "Unknown" in all_unique:
+            unique_values.append("Unknown")
+
+        for val in unique_values:
+            if (col, val) not in node_index:
+                node_index[(col, val)] = len(node_labels)
+                node_labels.append(val)
+
+                # Use "Clinical" color for specific pathologies
+                node_color = color_map.get(val, "#94a3b8")
+                if col == "Type Subject" and val not in ["Healthy", "Unknown"]:
+                    node_color = color_map.get("Clinical", "#94a3b8")
+                node_colors.append(node_color)
+
+    sources: list[int] = []
+    targets: list[int] = []
+    values: list[int] = []
+    link_colors: list[str] = []
+    link_hover_labels: list[str] = []
+
+    for idx in range(len(columns) - 1):
+        col_from, col_to = columns[idx], columns[idx + 1]
+
+        # Use the color from the source node for the link
+        source_color_map = COLUMN_COLOR_MAPS.get(col_from, {})
+
+        # Group by source and target, getting both sum of subjects and count of datasets
+        grouped = (
+            df.groupby([col_from, col_to])
+            .agg(
+                subject_sum=("n_subjects", "sum"),
+                dataset_count=("n_subjects", "size"),
+            )
+            .reset_index()
+        )
+
+        for _, row in grouped.iterrows():
+            source_val, target_val, subject_sum, dataset_count = (
+                row[col_from],
+                row[col_to],
+                row["subject_sum"],
+                row["dataset_count"],
+            )
+
+            source_node_idx = node_index.get((col_from, source_val))
+            target_node_idx = node_index.get((col_to, target_val))
+
+            if source_node_idx is not None and target_node_idx is not None:
+                sources.append(source_node_idx)
+                targets.append(target_node_idx)
+                values.append(subject_sum)  # Weight links by sum of subjects
+                link_hover_labels.append(
+                    f"{source_val} → {target_val}:<br>"
+                    f"{subject_sum} subjects in {dataset_count} datasets"
+                )
+
+                # Assign color to the link based on the source node
+                source_color = source_color_map.get(source_val, "#94a3b8")
+                if col_from == "Type Subject" and source_val not in [
+                    "Healthy",
+                    "Unknown",
+                ]:
+                    source_color = source_color_map.get("Clinical", "#94a3b8")
+                link_colors.append(hex_to_rgba(source_color))
+
+    # Add counts (subjects and datasets) and percentages to the first column labels
+    first_col_name = columns[0]
+    first_col_stats = df.groupby(first_col_name).agg(
+        subject_sum=("n_subjects", "sum"),
+        dataset_count=("n_subjects", "size"),
+    )
+    total_subjects = first_col_stats["subject_sum"].sum()
+
+    for i, label in enumerate(node_labels):
+        col, val = next((k for k, v in node_index.items() if v == i), (None, None))
+        if col == first_col_name and val in first_col_stats.index:
+            stats = first_col_stats.loc[val]
+            subject_sum = stats["subject_sum"]
+            dataset_count = stats["dataset_count"]
+            percentage = (
+                (subject_sum / total_subjects) * 100 if total_subjects > 0 else 0
+            )
+            node_labels[i] = (
+                f"{label}<br>({subject_sum} subjects, {dataset_count} datasets, {percentage:.1f}%)"
+            )
+
+    return (
+        node_labels,
+        node_colors,
+        sources,
+        targets,
+        values,
+        link_colors,
+        link_hover_labels,
+    )
+
+
+def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
+    (
+        labels,
+        colors,
+        sources,
+        targets,
+        values,
+        link_colors,
+        link_hover_labels,
+    ) = _build_sankey_data(df, columns)
+
+    sankey = go.Sankey(
+        arrangement="snap",
+        node=dict(
+            pad=30,
+            thickness=18,
+            label=labels,
+            color=colors,
+            align="left",  # Align all labels to the left of the node bars
+        ),
+        link=dict(
+            source=sources,
+            target=targets,
+            value=values,
+            color=link_colors,
+            hovertemplate="%{customdata}<extra></extra>",
+            customdata=link_hover_labels,
+        ),
+    )
+
+    fig = go.Figure(sankey)
+
+    fig.update_layout(
+        font=dict(size=14),
+        height=900,
+        width=None,
+        autosize=True,
+        margin=dict(t=40, b=40, l=40, r=40),
+        annotations=[
+            dict(
+                x=0,
+                y=1.05,
+                xref="paper",
+                yref="paper",
+                text="Population Type",
+                showarrow=False,
+                font=dict(size=16, color="black"),
+            ),
+            dict(
+                x=0.5,
+                y=1.05,
+                xref="paper",
+                yref="paper",
+                text="Experimental Modality",
+                showarrow=False,
+                font=dict(size=16, color="black"),
+            ),
+            dict(
+                x=1,
+                y=1.05,
+                xref="paper",
+                yref="paper",
+                text="Cognitive Domain",
+                showarrow=False,
+                font=dict(size=16, color="black"),
+            ),
+            dict(
+                x=0,
+                y=-0.15,  # Position the note below the chart
+                xref="paper",
+                yref="paper",
+                text='<b>Note on "Unknown" category:</b> This large portion represents datasets that are still pending categorization.',
+                showarrow=False,
+                align="left",
+                xanchor="left",
+                font=dict(size=12, color="dimgray"),
+            ),
+        ],
+    )
+    return fig
+
+
+def generate_dataset_sankey(
+    df: pd.DataFrame,
+    out_html: str | Path,
+    *,
+    columns: Sequence[str] | None = None,
+) -> Path:
+    """Generate the dataset Sankey diagram and write it to *out_html*."""
+    selected_columns = list(columns) if columns is not None else list(DEFAULT_COLUMNS)
+    prepared = _prepare_dataframe(df, selected_columns)
+    fig = build_sankey(prepared, selected_columns)
+
+    out_path = Path(out_html)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    html_content = fig.to_html(
+        full_html=False,
+        include_plotlyjs=False,
+        div_id="dataset-sankey",
+        config={
+            "responsive": True,
+            "displaylogo": False,
+            "modeBarButtonsToRemove": ["lasso2d", "select2d"],
+        },
+    )
+
+    out_path.write_text(html_content, encoding="utf-8")
+    return out_path
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Generate a Sankey diagram from the dataset summary CSV."
+    )
+    parser.add_argument(
+        "--source",
+        type=Path,
+        default=Path("eegdash/dataset/dataset_summary.csv"),
+        help="Path to the dataset summary CSV file.",
+    )
+    parser.add_argument(
+        "--columns",
+        nargs=3,
+        metavar=("FIRST", "SECOND", "THIRD"),
+        default=DEFAULT_COLUMNS,
+        help="Three categorical columns to connect in the Sankey plot.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("dataset_summary_sankey.html"),
+        help="Output HTML file for the interactive Sankey diagram.",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    if not args.source.exists():
+        raise FileNotFoundError(f"Dataset summary CSV not found at {args.source}")
+
+    columns = list(args.columns)
+    df = _load_dataframe(args.source, columns)
+    fig = build_sankey(df, columns)
+
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    fig.write_html(
+        str(args.output),
+        include_plotlyjs="cdn",
+        full_html=True,
+        auto_open=False,
+    )
+    print(f"Sankey diagram saved to {args.output.resolve()}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/plot_dataset/ridgeline.py b/docs/plot_dataset/ridgeline.py
new file mode 100644
index 00000000..34d5a83f
--- /dev/null
+++ b/docs/plot_dataset/ridgeline.py
@@ -0,0 +1,331 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.utils import PlotlyJSONEncoder
+from scipy.stats import gaussian_kde
+
+try:  # Allow execution as a script or module
+    from .colours import MODALITY_COLOR_MAP, hex_to_rgba
+    from .utils import get_dataset_url, primary_modality
+except ImportError:  # pragma: no cover - fallback for direct script execution
+    from colours import MODALITY_COLOR_MAP, hex_to_rgba  # type: ignore
+    from utils import get_dataset_url, primary_modality  # type: ignore
+
+__all__ = ["generate_modality_ridgeline"]
+
+
+def generate_modality_ridgeline(
+    df: pd.DataFrame,
+    out_html: str | Path,
+    *,
+    rng_seed: int = 42,
+) -> Path | None:
+    """Generate a ridgeline (KDE) plot showing participants per modality."""
+    data = df[df["dataset"].str.lower() != "test"].copy()
+    data["modality_label"] = data["modality of exp"].apply(primary_modality)
+    data["n_subjects"] = pd.to_numeric(data["n_subjects"], errors="coerce")
+    data = data.dropna(subset=["n_subjects"])
+    data = data[data["modality_label"] != "Other"]
+
+    if data.empty:
+        return None
+
+    median_participants = (
+        data.groupby("modality_label")["n_subjects"].median().sort_values()
+    )
+    order = [
+        label
+        for label in median_participants.index
+        if label in data["modality_label"].unique()
+    ]
+    if not order:
+        return None
+
+    fig = go.Figure()
+    rng = np.random.default_rng(rng_seed)
+    amplitude = 0.6
+    row_spacing = 0.95
+
+    for idx, label in enumerate(order):
+        subset = data[data["modality_label"] == label].copy()
+        values = subset["n_subjects"].astype(float).dropna()
+        if len(values) < 3:
+            continue
+
+        subset["dataset_url"] = subset["dataset"].apply(get_dataset_url)
+        log_vals = np.log10(values)
+        grid = np.linspace(log_vals.min() - 0.25, log_vals.max() + 0.25, 240)
+        kde = gaussian_kde(log_vals)
+        density = kde(grid)
+        if density.max() <= 0:
+            continue
+
+        density_norm = density / density.max()
+        baseline = idx * row_spacing
+        y_curve = baseline + density_norm * amplitude
+        x_curve = 10**grid
+
+        color = MODALITY_COLOR_MAP.get(label, "#6b7280")
+        fill = hex_to_rgba(color, 0.28)
+
+        fig.add_trace(
+            go.Scatter(
+                x=np.concatenate([x_curve, x_curve[::-1]]),
+                y=np.concatenate([y_curve, np.full_like(y_curve, baseline)]),
+                name=label,
+                fill="toself",
+                fillcolor=fill,
+                line=dict(color="rgba(0,0,0,0)"),
+                hoverinfo="skip",
+                showlegend=False,
+            )
+        )
+
+        fig.add_trace(
+            go.Scatter(
+                x=x_curve,
+                y=y_curve,
+                mode="lines",
+                name=label,
+                line=dict(color=color, width=2),
+                hovertemplate=f"<b>{label}</b><br>#Participants: %{{x:.0f}}<extra></extra>",
+                showlegend=False,
+            )
+        )
+
+        jitter = rng.uniform(0.02, amplitude * 0.5, size=len(values))
+        median_val = float(median_participants.get(label, np.nan))
+        custom_data = np.column_stack(
+            [subset["dataset"].to_numpy(), subset["dataset_url"].to_numpy()]
+        )
+        fig.add_trace(
+            go.Scatter(
+                x=values,
+                y=np.full_like(values, baseline) + jitter,
+                mode="markers",
+                name=label,
+                marker=dict(color=color, size=8, opacity=0.6),
+                customdata=custom_data,
+                hovertemplate="<b><a href='%{customdata[1]}' target='_parent'>%{customdata[0]}</a></b><br>#Participants: %{x}<br><i>Click to view dataset details</i><extra></extra>",
+                showlegend=False,
+            )
+        )
+
+        if np.isfinite(median_val) and median_val > 0:
+            fig.add_trace(
+                go.Scatter(
+                    x=[median_val, median_val],
+                    y=[baseline, baseline + amplitude],
+                    mode="lines",
+                    line=dict(color=color, width=2, dash="dash"),
+                    hovertemplate=(
+                        f"<b>{label}</b><br>Median participants: {median_val:.0f}<extra></extra>"
+                    ),
+                    showlegend=False,
+                )
+            )
+
+    if not fig.data:
+        return None
+
+    kde_height = max(650, 150 * len(order))
+    date_stamp = datetime.now().strftime("%d/%m/%Y")
+    fig.update_layout(
+        height=kde_height,
+        width=1200,
+        template="plotly_white",
+        xaxis=dict(
+            type="log",
+            title=dict(text="Number of Participants (Log Scale)", font=dict(size=18)),
+            showgrid=True,
+            gridcolor="rgba(0,0,0,0.08)",
+            zeroline=False,
+            dtick=1,
+            minor=dict(showgrid=True, gridcolor="rgba(0,0,0,0.04)"),
+            tickfont=dict(size=14),
+        ),
+        yaxis=dict(
+            title=dict(text="Modality", font=dict(size=18)),
+            tickmode="array",
+            tickvals=[idx * row_spacing for idx in range(len(order))],
+            ticktext=order,
+            showgrid=False,
+            range=[-0.25, max(0.35, (len(order) - 1) * row_spacing + amplitude + 0.25)],
+            tickfont=dict(size=14),
+        ),
+        showlegend=False,
+        margin=dict(l=120, r=40, t=108, b=80),
+        title=dict(
+            text=f"<br><sub>Based on EEG-Dash datasets available at {date_stamp}.</sub>",
+            x=0.5,
+            xanchor="center",
+            y=0.98,
+            yanchor="top",
+            font=dict(size=20),
+        ),
+        autosize=True,
+        font=dict(size=16),
+    )
+
+    fig.add_annotation(
+        xref="paper",
+        yref="paper",
+        x=0.98,
+        y=0.02,
+        text="Visual studies consistently use the<br>largest sample sizes, typically 20-30 participants",
+        showarrow=False,
+        font=dict(size=14, color="#111827"),
+        bgcolor="rgba(255,255,255,0.9)",
+        bordercolor="rgba(17,24,39,0.3)",
+        borderwidth=1,
+        borderpad=8,
+        xanchor="right",
+        yanchor="bottom",
+    )
+
+    plot_config = {
+        "responsive": True,
+        "displaylogo": False,
+        "modeBarButtonsToRemove": ["lasso2d", "select2d"],
+        "toImageButtonOptions": {
+            "format": "png",
+            "filename": "participant_kde",
+            "height": kde_height,
+            "width": 1200,
+            "scale": 2,
+        },
+    }
+
+    fig_spec = fig.to_plotly_json()
+    data_json = json.dumps(fig_spec.get("data", []), cls=PlotlyJSONEncoder)
+    layout_json = json.dumps(fig_spec.get("layout", {}), cls=PlotlyJSONEncoder)
+    config_json = json.dumps(plot_config, cls=PlotlyJSONEncoder)
+
+    styled_html = f"""
+<style>
+#dataset-kde-modalities {{
+    width: 100% !important;
+    max-width: 1200px;
+    height: {kde_height}px !important;
+    min-height: {kde_height}px;
+    margin: 0 auto;
+    display: none;
+}}
+#dataset-kde-modalities.plotly-graph-div {{
+    width: 100% !important;
+    height: 100% !important;
+}}
+.kde-loading {{
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: {kde_height}px;
+    font-family: Inter, system-ui, sans-serif;
+    color: #6b7280;
+}}
+</style>
+<div class="kde-loading" id="kde-loading">Loading participant distribution...</div>
+<div id="dataset-kde-modalities" class="plotly-graph-div"></div>
+<script>
+(function() {{
+  const TARGET_ID = 'dataset-kde-modalities';
+  const FIG_DATA = {data_json};
+  const FIG_LAYOUT = {layout_json};
+  const FIG_CONFIG = {config_json};
+
+  function onReady(callback) {{
+    if (document.readyState === 'loading') {{
+      document.addEventListener('DOMContentLoaded', callback, {{ once: true }});
+    }} else {{
+      callback();
+    }}
+  }}
+
+  function renderPlot() {{
+    const container = document.getElementById(TARGET_ID);
+    if (!container) {{
+      return;
+    }}
+
+    const draw = () => {{
+      if (!window.Plotly) {{
+        window.requestAnimationFrame(draw);
+        return;
+      }}
+
+      window.Plotly.newPlot(TARGET_ID, FIG_DATA, FIG_LAYOUT, FIG_CONFIG).then((plot) => {{
+        const loading = document.getElementById('kde-loading');
+        if (loading) {{
+          loading.style.display = 'none';
+        }}
+        container.style.display = 'block';
+
+        plot.on('plotly_click', (event) => {{
+          const point = event.points && event.points[0];
+          if (!point || !point.customdata) {{
+            return;
+          }}
+          const url = point.customdata[1];
+          if (url) {{
+            const resolved = new URL(url, window.location.href);
+            window.open(resolved.href, '_self');
+          }}
+        }});
+      }});
+    }};
+
+    draw();
+  }}
+
+  onReady(renderPlot);
+}})();
+</script>
+"""
+
+    out_path = Path(out_html)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(styled_html, encoding="utf-8")
+    return out_path
+
+
+def _read_dataset(path: Path) -> pd.DataFrame:
+    return pd.read_csv(path, index_col=False, header=0, skipinitialspace=True)
+
+
+def main() -> None:
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Generate the modality ridgeline plot from a dataset summary CSV."
+    )
+    parser.add_argument("source", type=Path, help="Path to dataset summary CSV")
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("dataset_kde_modalities.html"),
+        help="Output HTML file",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=42,
+        help="Random seed controlling jitter placement",
+    )
+    args = parser.parse_args()
+
+    df = _read_dataset(args.source)
+    output_path = generate_modality_ridgeline(df, args.output, rng_seed=args.seed)
+    if output_path is None:
+        print("Ridgeline plot could not be generated (insufficient data).")
+    else:
+        print(f"Ridgeline plot saved to {output_path.resolve()}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/plot_dataset/utils.py b/docs/plot_dataset/utils.py
new file mode 100644
index 00000000..2a518d69
--- /dev/null
+++ b/docs/plot_dataset/utils.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+
+try:  # Allow import both as package and script
+    from .colours import CANONICAL_MAP, MODALITY_COLOR_MAP
+except ImportError:  # pragma: no cover - fallback for direct script execution
+    from colours import CANONICAL_MAP, MODALITY_COLOR_MAP  # type: ignore
+
+__all__ = [
+    "get_dataset_url",
+    "human_readable_size",
+    "primary_modality",
+    "safe_int",
+]
+
+_SEPARATORS = ("/", "|", ";")
+
+
+def primary_modality(value: Any) -> str:
+    """Return the canonical modality label for a record."""
+    if value is None:
+        return "Unknown"
+    if isinstance(value, float) and pd.isna(value):
+        return "Unknown"
+
+    text = str(value).strip()
+    if not text:
+        return "Unknown"
+
+    # normalise separators, keep order of appearance
+    for sep in _SEPARATORS:
+        text = text.replace(sep, ",")
+    tokens = [tok.strip() for tok in text.split(",") if tok.strip()]
+    if not tokens:
+        return "Unknown"
+
+    first = tokens[0]
+    canonical_map = CANONICAL_MAP.get("modality of exp", {})
+    lowered = first.lower()
+    canonical = canonical_map.get(lowered)
+    if canonical:
+        return canonical
+
+    if first in MODALITY_COLOR_MAP:
+        return first
+
+    title_variant = first.title()
+    if title_variant in MODALITY_COLOR_MAP:
+        return title_variant
+
+    return "Other"
+
+
+def safe_int(value: Any, default: int | None = None) -> int | None:
+    """Convert *value* to ``int`` when possible; otherwise return *default*."""
+    try:
+        if value is None or (isinstance(value, float) and pd.isna(value)):
+            return default
+        return int(round(float(value)))
+    except Exception:
+        return default
+
+
+def human_readable_size(num_bytes: int | float | None) -> str:
+    """Format bytes using the closest unit among MB, GB, TB (fallback to KB/B)."""
+    if num_bytes is None:
+        return "0 B"
+
+    try:
+        size = float(num_bytes)
+    except Exception:
+        return "0 B"
+
+    units = [
+        (1024**4, "TB"),
+        (1024**3, "GB"),
+        (1024**2, "MB"),
+        (1024**1, "KB"),
+        (1, "B"),
+    ]
+
+    for factor, unit in units:
+        if size >= factor:
+            value = size / factor
+            if unit in {"B", "KB"}:
+                return f"{int(round(value))} {unit}"
+            return f"{value:.2f} {unit}"
+    return "0 B"
+
+
+def get_dataset_url(name: str) -> str:
+    """Generate dataset URL for plots (relative to dataset summary page)."""
+    if name is None or (isinstance(name, float) and pd.isna(name)):
+        return ""
+    text = str(name).strip()
+    if not text:
+        return ""
+    return f"api/dataset/eegdash.dataset.{text.upper()}.html"
+
+
+def ensure_directory(path: str | Path) -> Path:
+    """Create *path* directory if required and return ``Path`` instance."""
+    dest = Path(path)
+    dest.mkdir(parents=True, exist_ok=True)
+    return dest
diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index fd904296..033824f3 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -1,384 +1,32 @@
 import glob
-import json
+import textwrap
 from argparse import ArgumentParser
+from datetime import datetime
 from pathlib import Path
 from shutil import copyfile
 
 import numpy as np
 import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from plotly.utils import PlotlyJSONEncoder
-from scipy.stats import gaussian_kde
+from plot_dataset import (
+    generate_dataset_bubble,
+    generate_dataset_sankey,
+    generate_modality_ridgeline,
+)
+from plot_dataset.utils import get_dataset_url, human_readable_size
 from table_tag_utils import wrap_tags
 
 DOCS_DIR = Path(__file__).resolve().parent
 STATIC_DATASET_DIR = DOCS_DIR / "source" / "_static" / "dataset_generated"
 
 
-MODALITY_CANONICAL = {
-    "visual": "Visual",
-    "auditory": "Auditory",
-    "tactile": "Tactile",
-    "somatosensory": "Tactile",
-    "multisensory": "Multisensory",
-    "motor": "Motor",
-    "rest": "Resting State",
-    "resting state": "Resting State",
-    "resting-state": "Resting State",
-    "sleep": "Sleep",
-    "other": "Other",
-}
-
-MODALITY_COLOR_MAP = {
-    "Visual": "#2563eb",
-    "Auditory": "#0ea5e9",
-    "Tactile": "#10b981",
-    "Multisensory": "#ec4899",
-    "Motor": "#f59e0b",
-    "Resting State": "#6366f1",
-    "Sleep": "#7c3aed",
-    "Other": "#14b8a6",
-    "Unknown": "#94a3b8",
-}
-
-
-def _hex_to_rgba(hex_color: str, alpha: float = 0.4) -> str:
-    hex_color = hex_color.lstrip("#")
-    if len(hex_color) != 6:
-        return f"rgba(99, 102, 241, {alpha})"
-    r = int(hex_color[0:2], 16)
-    g = int(hex_color[2:4], 16)
-    b = int(hex_color[4:6], 16)
-    return f"rgba({r}, {g}, {b}, {alpha})"
-
-
-def _primary_modality(value: object) -> str:
-    if value is None:
-        return "Unknown"
-    if isinstance(value, float) and pd.isna(value):
-        return "Unknown"
-    text = str(value).strip()
-    if not text:
-        return "Unknown"
-    for sep in ("/", "|", ";"):
-        text = text.replace(sep, ",")
-    tokens = [tok.strip() for tok in text.split(",") if tok.strip()]
-    if not tokens:
-        return "Unknown"
-    raw = tokens[0].lower()
-    canonical = MODALITY_CANONICAL.get(raw)
-    if canonical:
-        return canonical
-    candidate = tokens[0].strip()
-    title_candidate = candidate.title()
-    if title_candidate in MODALITY_COLOR_MAP:
-        return title_candidate
-    return "Other"
-
-
-def _to_numeric_median_list(val) -> float | None:
-    """Return a numeric value from possible list-like strings.
-
-    Examples
-    --------
-    - "64" -> 64
-    - "6,129" -> median -> 67.5 -> 68
-    - "128, 512" -> 320
-    - 500.0 -> 500
-
-    """
-    if pd.isna(val):
-        return None
-    try:
-        # already numeric
-        return float(val)
-    except Exception:
-        pass
-    s = str(val).strip().strip("[]")
-    if not s:
-        return None
-    try:
-        nums = [float(x) for x in s.split(",") if str(x).strip()]
-        if not nums:
-            return None
-        return float(np.median(nums))
-    except Exception:
-        return None
-
-
-def _safe_int(x, default=None):
-    try:
-        if x is None or pd.isna(x):
-            return default
-        return int(round(float(x)))
-    except Exception:
-        return default
-
-
-def gen_datasets_bubble(
-    df: pd.DataFrame,
-    out_html: str = "_static/dataset/dataset_bubble.html",
-    x_var: str = "records",  # one of: 'records', 'duration_h', 'size_gb', 'tasks'
-):
-    """Generate an interactive bubble chart for datasets.
-
-    - x: total duration (hours)
-    - y: number of subjects
-    - size: on-disk size (GB)
-    - color: dataset modality
-    """
-    d = df.copy()
-    d = d[d["dataset"].str.lower() != "test"]
-
-    # numeric columns
-    d["duration_h"] = pd.to_numeric(d.get("duration_hours_total"), errors="coerce")
-    d["subjects"] = pd.to_numeric(d.get("n_subjects"), errors="coerce")
-    d["records"] = pd.to_numeric(d.get("n_records"), errors="coerce")
-    d["tasks"] = pd.to_numeric(d.get("n_tasks"), errors="coerce")
-    d["size_bytes"] = pd.to_numeric(d.get("size_bytes"), errors="coerce")
-
-    # parse sampling and channels into representative numeric values
-    d["sfreq"] = d["sampling_freqs"].map(_to_numeric_median_list)
-    d["nchans"] = d["nchans_set"].map(_to_numeric_median_list)
-
-    d["modality_label"] = d.get("modality of exp").apply(_primary_modality)
-
-    # disk size in GB for sizing
-    GB = 1024**3
-    d["size_gb"] = d["size_bytes"] / GB
-
-    # hover content
-    def _fmt_size(bytes_):
-        return human_readable_size(_safe_int(bytes_, 0))
-
-    # choose x axis field and labels
-    x_field = (
-        x_var if x_var in {"records", "duration_h", "size_gb", "tasks"} else "records"
-    )
-    x_label = {
-        "records": "#Records",
-        "duration_h": "Duration (hours)",
-        "size_gb": "Size (GB)",
-        "tasks": "#Tasks",
-    }[x_field]
-
-    # hover text adapts to x
-    if x_field == "duration_h":
-        x_hover = "Duration: %{x:.2f} h"
-    elif x_field == "size_gb":
-        x_hover = "Size: %{x:.2f} GB"
-    elif x_field == "tasks":
-        x_hover = "Tasks: %{x:,}"
-    else:
-        x_hover = "Records (x): %{x:,}"
-
-    hover = (
-        "<b>%{customdata[0]}</b>"  # dataset id
-        "<br>Subjects: %{y:,}"
-        f"<br>{x_hover}"
-        "<br>Records: %{customdata[1]:,}"
-        "<br>Tasks: %{customdata[2]:,}"
-        "<br>Channels: %{customdata[3]}"
-        "<br>Sampling: %{customdata[4]} Hz"
-        "<br>Size: %{customdata[5]}"
-        "<br>Modality: %{customdata[6]}"
-        "<extra></extra>"
-    )
-
-    d = d.dropna(subset=["duration_h", "subjects", "size_gb"])  # need these
-
-    # Marker sizing: scale into a good visual range
-    max_size = max(d["size_gb"].max(), 1)
-    sizeref = (2.0 * max_size) / (40.0**2)  # target ~40px max marker
-
-    # Prepare prettified strings for hover
-    def _fmt_int(v):
-        if v is None or pd.isna(v):
-            return ""
-        try:
-            return str(int(round(float(v))))
-        except Exception:
-            return str(v)
-
-    sfreq_str = d["sfreq"].map(_fmt_int)
-    nchans_str = d["nchans"].map(_fmt_int)
-
-    fig = px.scatter(
-        d,
-        x=x_field,
-        y="subjects",
-        size="size_gb",
-        color="modality_label",
-        hover_name="dataset",
-        custom_data=[
-            d["dataset"],
-            d["records"],
-            d["tasks"],
-            nchans_str,
-            sfreq_str,
-            d["size_bytes"].map(_fmt_size),
-            d["modality_label"],
-        ],
-        size_max=40,
-        labels={
-            "subjects": "#Subjects",
-            "modality_label": "Modality",
-            x_field: x_label,
-        },
-        color_discrete_map=MODALITY_COLOR_MAP,
-        title="",
-        category_orders={
-            "modality_label": [
-                label
-                for label in MODALITY_COLOR_MAP.keys()
-                if label in d["modality_label"].unique()
-            ]
-        },
-    )
-
-    # tune marker sizing explicitly for better control
-    for tr in fig.data:
-        tr.marker.update(
-            sizemin=6,
-            sizemode="area",
-            sizeref=sizeref,
-            line=dict(width=0.6, color="rgba(0,0,0,0.3)"),
-        )
-        tr.hovertemplate = hover
-
-    fig.update_layout(
-        height=750,
-        width=1200,  # Set explicit width for consistent sizing
-        margin=dict(l=60, r=40, t=80, b=60),
-        template="plotly_white",
-        legend=dict(
-            title="Modality",
-            orientation="h",
-            yanchor="bottom",
-            y=1.02,
-            xanchor="right",
-            x=0.99,
-        ),
-        font=dict(
-            family="Inter, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif",
-            size=14,
-        ),
-        title=dict(
-            text="",
-            x=0.01,
-            xanchor="left",
-            y=0.98,
-            yanchor="top",
-            pad=dict(t=10, b=8),
-        ),
-        autosize=True,  # Enable auto-sizing to fill container
-    )
-
-    fig.update_xaxes(showgrid=True, gridcolor="rgba(0,0,0,0.12)", zeroline=False)
-    fig.update_yaxes(showgrid=True, gridcolor="rgba(0,0,0,0.12)", zeroline=False)
-
-    out_path = Path(out_html)
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    # Add CSS and loading indicator for immediate proper sizing
-    html_content = fig.to_html(
-        full_html=False,
-        include_plotlyjs=False,
-        div_id="dataset-bubble",
-        config={
-            "responsive": True,
-            "displaylogo": False,
-            "modeBarButtonsToRemove": ["lasso2d", "select2d"],
-            "toImageButtonOptions": {
-                "format": "png",
-                "filename": "dataset_landscape",
-                "height": 750,
-                "width": 1200,
-                "scale": 2,
-            },
-        },
-    )
-
-    # Wrap with styling to ensure proper initial sizing
-    styled_html = f"""
-<style>
-#dataset-bubble {{
-    width: 100% !important;
-    max-width: 1200px;
-    height: 750px !important;
-    min-height: 750px;
-    margin: 0 auto;
-}}
-#dataset-bubble .plotly-graph-div {{
-    width: 100% !important;
-    height: 100% !important;
-}}
-.dataset-loading {{
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    height: 750px;
-    font-family: Inter, system-ui, sans-serif;
-    color: #6b7280;
-}}
-</style>
-<div class="dataset-loading" id="dataset-loading">Loading dataset landscape...</div>
-{html_content}
-<script>
-// Hide loading indicator once plot is rendered
-document.addEventListener('DOMContentLoaded', function() {{
-    const loading = document.getElementById('dataset-loading');
-    const plot = document.getElementById('dataset-bubble');
-    if (loading && plot) {{
-        loading.style.display = 'none';
-        plot.style.display = 'block';
-    }}
-}});
-</script>
-"""
-
-    with open(str(out_path), "w", encoding="utf-8") as f:
-        f.write(styled_html)
-    return str(out_path)
-
-
-def human_readable_size(num_bytes: int) -> str:
-    """Format bytes using the closest unit among MB, GB, TB (fallback to KB/B).
-
-    Chooses the largest unit such that the value is >= 1. Uses base 1024.
-    """
-    if num_bytes is None:
-        return "0 B"
-    size = float(num_bytes)
-    units = [
-        (1024**4, "TB"),
-        (1024**3, "GB"),
-        (1024**2, "MB"),
-        (1024**1, "KB"),
-        (1, "B"),
-    ]
-    for factor, unit in units:
-        if size >= factor:
-            value = size / factor
-            # Use no decimals for B/KB; two decimals otherwise
-            if unit in ("B", "KB"):
-                return f"{int(round(value))} {unit}"
-            return f"{value:.2f} {unit}"
-    return "0 B"
-
-
-def get_dataset_url(name: str) -> str:
-    """Generate dataset URL for plots (relative to dataset summary page)."""
-    name = name.strip()
-    return f"api/dataset/eegdash.dataset.{name.upper()}.html"
-
-
 def wrap_dataset_name(name: str):
     # Remove any surrounding whitespace
     name = name.strip()
     # Link to the individual dataset API page
     # Updated structure: api/dataset/eegdash.dataset.<CLASS>.html
-    url = f"api/dataset/eegdash.dataset.{name.upper()}.html"
+    url = get_dataset_url(name)
+    if not url:
+        return name.upper()
     return f'<a href="{url}">{name.upper()}</a>'
 
 
@@ -406,6 +54,163 @@ def wrap_dataset_name(name: str):
     },
 }
 
+DATA_TABLE_TEMPLATE = textwrap.dedent(
+    r"""
+<!-- jQuery + DataTables core -->
+<script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
+<link rel="stylesheet" href="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.css"/>
+<script src="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.js"></script>
+
+<!-- Buttons + SearchPanes (+ Select required by SearchPanes) -->
+<link rel="stylesheet" href="https://cdn.datatables.net/buttons/2.4.2/css/buttons.dataTables.min.css">
+<script src="https://cdn.datatables.net/buttons/2.4.2/js/dataTables.buttons.min.js"></script>
+<link rel="stylesheet" href="https://cdn.datatables.net/select/1.7.0/css/select.dataTables.min.css">
+<link rel="stylesheet" href="https://cdn.datatables.net/searchpanes/2.3.1/css/searchPanes.dataTables.min.css">
+<script src="https://cdn.datatables.net/select/1.7.0/js/dataTables.select.min.js"></script>
+<script src="https://cdn.datatables.net/searchpanes/2.3.1/js/dataTables.searchPanes.min.js"></script>
+
+<style>
+    /* Styling for the Total row (placed in tfoot) */
+    table.sd-table tfoot td {
+        font-weight: 600;
+        border-top: 2px solid rgba(0,0,0,0.2);
+        background: #f9fafb;
+        /* Match body cell padding to keep perfect alignment */
+        padding: 8px 10px !important;
+        vertical-align: middle;
+    }
+
+    /* Right-align numeric-like columns (2..8) consistently for body & footer */
+    table.sd-table tbody td:nth-child(n+2),
+    table.sd-table tfoot td:nth-child(n+2) {
+        text-align: right;
+    }
+    /* Keep first column (Dataset/Total) left-aligned */
+    table.sd-table tbody td:first-child,
+    table.sd-table tfoot td:first-child {
+        text-align: left;
+    }
+</style>
+
+<TABLE_HTML>
+
+<script>
+// Helper: robustly extract values for SearchPanes when needed
+function tagsArrayFromHtml(html) {
+    if (html == null) return [];
+    // If it's numeric or plain text, just return as a single value
+    if (typeof html === 'number') return [String(html)];
+    if (typeof html === 'string' && html.indexOf('<') === -1) return [html.trim()];
+    // Else parse any .tag elements inside HTML
+    const tmp = document.createElement('div');
+    tmp.innerHTML = html;
+    const tags = Array.from(tmp.querySelectorAll('.tag')).map(function(el){
+        return (el.textContent || '').trim();
+    });
+    return tags.length ? tags : [tmp.textContent.trim()];
+}
+
+// Helper: parse human-readable sizes like "4.31 GB" into bytes (number)
+function parseSizeToBytes(text) {
+    if (!text) return 0;
+    const s = String(text).trim();
+    const m = s.match(/([\d,.]+)\s*(TB|GB|MB|KB|B)/i);
+    if (!m) return 0;
+    const value = parseFloat(m[1].replace(/,/g, ''));
+    const unit = m[2].toUpperCase();
+    const factor = { B:1, KB:1024, MB:1024**2, GB:1024**3, TB:1024**4 }[unit] || 1;
+    return value * factor;
+}
+
+document.addEventListener('DOMContentLoaded', function () {
+    const table = document.getElementById('datasets-table');
+    if (!table || !window.jQuery || !window.jQuery.fn || !window.jQuery.fn.DataTable) {
+        return;
+    }
+
+    const $table = window.jQuery(table);
+    if (window.jQuery.fn.DataTable.isDataTable(table)) {
+        return;
+    }
+
+    // 1) Move the "Total" row into <tfoot> so sorting/filtering never moves it
+    const $tbody = $table.find('tbody');
+    const $total = $tbody.find('tr').filter(function(){
+        return window.jQuery(this).find('td').eq(0).text().trim() === 'Total';
+    });
+    if ($total.length) {
+        let $tfoot = $table.find('tfoot');
+        if (!$tfoot.length) $tfoot = window.jQuery('<tfoot/>').appendTo($table);
+        $total.appendTo($tfoot);
+    }
+
+    // 2) Initialize DataTable with SearchPanes button
+    const FILTER_COLS = [1,2,3,4,5,6];
+    // Detect the index of the size column by header text
+    const sizeIdx = (function(){
+        let idx = -1;
+        $table.find('thead th').each(function(i){
+            const t = window.jQuery(this).text().trim().toLowerCase();
+            if (t === 'size on disk' || t === 'size') idx = i;
+        });
+        return idx;
+    })();
+
+    const dataTable = $table.DataTable({
+        dom: 'Blfrtip',
+        paging: false,
+        searching: true,
+        info: false,
+        language: {
+            search: 'Filter dataset:',
+            searchPanes: { collapse: { 0: 'Filters', _: 'Filters (%d)' } }
+        },
+        buttons: [{
+            extend: 'searchPanes',
+            text: 'Filters',
+            config: { cascadePanes: true, viewTotal: true, layout: 'columns-4', initCollapsed: false }
+        }],
+        columnDefs: (function(){
+            const defs = [
+                { searchPanes: { show: true }, targets: FILTER_COLS }
+            ];
+            if (sizeIdx !== -1) {
+                defs.push({
+                    targets: sizeIdx,
+                    render: function(data, type) {
+                        if (type === 'sort' || type === 'type') {
+                            return parseSizeToBytes(data);
+                        }
+                        return data;
+                    }
+                });
+            }
+            return defs;
+        })()
+    });
+
+    // 3) UX: click a header to open the relevant filter pane
+    $table.find('thead th').each(function (i) {
+        if ([1,2,3,4].indexOf(i) === -1) return;
+        window.jQuery(this)
+            .css('cursor','pointer')
+            .attr('title','Click to filter this column')
+            .on('click', function () {
+                dataTable.button('.buttons-searchPanes').trigger();
+                window.setTimeout(function () {
+                    const idx = [1,2,3,4].indexOf(i);
+                    const $container = window.jQuery(dataTable.searchPanes.container());
+                    const $pane = $container.find('.dtsp-pane').eq(idx);
+                    const $title = $pane.find('.dtsp-title');
+                    if ($title.length) $title.trigger('click');
+                }, 0);
+            });
+    });
+});
+</script>
+"""
+)
+
 
 def _tag_normalizer(kind: str):
     canonical = {k.lower(): v for k, v in DATASET_CANONICAL_MAP.get(kind, {}).items()}
@@ -517,10 +322,21 @@ def main(source_dir: str, target_dir: str):
             f, index_col=False, header=0, skipinitialspace=True
         )  # , sep=";")
         # Generate bubble chart from the raw data to have access to size_bytes
-        # Use x-axis as number of records for better spread
         bubble_path = target_dir / "dataset_bubble.html"
-        gen_datasets_bubble(df_raw, str(bubble_path), x_var="records")
-        copyfile(bubble_path, STATIC_DATASET_DIR / bubble_path.name)
+        bubble_output = generate_dataset_bubble(
+            df_raw,
+            bubble_path,
+            x_var="subjects",
+        )
+        copyfile(bubble_output, STATIC_DATASET_DIR / bubble_output.name)
+
+        # Generate Sankey diagram showing dataset flow across categories
+        try:
+            sankey_path = target_dir / "dataset_sankey.html"
+            sankey_output = generate_dataset_sankey(df_raw, sankey_path)
+            copyfile(sankey_output, STATIC_DATASET_DIR / sankey_output.name)
+        except Exception as exc:
+            print(f"[dataset Sankey] Skipped due to error: {exc}")
 
         df = prepare_table(df_raw)
         # preserve int values
@@ -568,239 +384,18 @@ def main(source_dir: str, target_dir: str):
             escape=False,
             table_id="datasets-table",
         )
+        html_table = DATA_TABLE_TEMPLATE.replace("<TABLE_HTML>", html_table)
         table_path = target_dir / "dataset_summary_table.html"
-        with open(table_path, "+w", encoding="utf-8") as f:
+        with open(table_path, "w", encoding="utf-8") as f:
             f.write(html_table)
         copyfile(table_path, STATIC_DATASET_DIR / table_path.name)
 
         # Generate KDE ridgeline plot for modality participant distributions
         try:
-            d_modal = df_raw[df_raw["dataset"].str.lower() != "test"].copy()
-            d_modal["modality_label"] = d_modal["modality of exp"].apply(
-                _primary_modality
-            )
-            d_modal["n_subjects"] = pd.to_numeric(
-                d_modal["n_subjects"], errors="coerce"
-            )
-            d_modal = d_modal.dropna(subset=["n_subjects"])
-
-            fig_kde = go.Figure()
-            order = [
-                label
-                for label in MODALITY_COLOR_MAP
-                if label in d_modal["modality_label"].unique()
-            ]
-            rng = np.random.default_rng(42)
-
-            for idx, label in enumerate(order):
-                subset = d_modal[d_modal["modality_label"] == label].copy()
-                vals = subset["n_subjects"].astype(float).dropna()
-                if len(vals) < 3:
-                    continue
-                # Generate URLs for datasets in this modality
-                subset["dataset_url"] = subset["dataset"].apply(get_dataset_url)
-                log_vals = np.log10(vals)
-                grid = np.linspace(log_vals.min() - 0.25, log_vals.max() + 0.25, 240)
-                kde = gaussian_kde(log_vals)
-                density = kde(grid)
-                if density.max() <= 0:
-                    continue
-                density_norm = density / density.max()
-                amplitude = 0.6
-                baseline = idx * 1.1
-                y_curve = baseline + density_norm * amplitude
-                x_curve = 10**grid
-
-                color = MODALITY_COLOR_MAP.get(label, "#6b7280")
-                fill = _hex_to_rgba(color, 0.28)
-
-                fig_kde.add_trace(
-                    go.Scatter(
-                        x=np.concatenate([x_curve, x_curve[::-1]]),
-                        y=np.concatenate([y_curve, np.full_like(y_curve, baseline)]),
-                        name=label,
-                        fill="toself",
-                        fillcolor=fill,
-                        line=dict(color="rgba(0,0,0,0)"),
-                        hoverinfo="skip",
-                        showlegend=False,
-                    )
-                )
-
-                fig_kde.add_trace(
-                    go.Scatter(
-                        x=x_curve,
-                        y=y_curve,
-                        mode="lines",
-                        name=label,
-                        line=dict(color=color, width=2),
-                        hovertemplate=f"<b>{label}</b><br>#Participants: %{{x:.0f}}<extra></extra>",
-                    )
-                )
-
-                jitter = rng.uniform(0.02, amplitude * 0.5, size=len(vals))
-                # Prepare custom data with dataset names and URLs
-                custom_data = np.column_stack(
-                    [subset["dataset"].to_numpy(), subset["dataset_url"].to_numpy()]
-                )
-                fig_kde.add_trace(
-                    go.Scatter(
-                        x=vals,
-                        y=np.full_like(vals, baseline) + jitter,
-                        mode="markers",
-                        name=label,
-                        marker=dict(color=color, size=5, opacity=0.6),
-                        customdata=custom_data,
-                        hovertemplate="<b><a href='%{customdata[1]}' target='_parent'>%{customdata[0]}</a></b><br>#Participants: %{x}<br><i>Click to view dataset details</i><extra></extra>",
-                        showlegend=False,
-                    )
-                )
-
-            if fig_kde.data:
-                fig_kde.update_layout(
-                    height=max(650, 150 * len(order)),
-                    width=1200,  # Set explicit width for consistent sizing
-                    template="plotly_white",
-                    xaxis=dict(
-                        type="log",
-                        title="#Participants",
-                        showgrid=True,
-                        gridcolor="rgba(0,0,0,0.12)",
-                        zeroline=False,
-                    ),
-                    yaxis=dict(
-                        title="Modality",
-                        tickmode="array",
-                        tickvals=[idx * 1.1 for idx in range(len(order))],
-                        ticktext=order,
-                        showgrid=False,
-                        range=[-0.3, max(0.3, (len(order) - 1) * 1.1 + 0.9)],
-                    ),
-                    legend=dict(
-                        title="Modality",
-                        orientation="h",
-                        yanchor="bottom",
-                        y=1.02,
-                        xanchor="right",
-                        x=0.99,
-                    ),
-                    margin=dict(l=120, r=40, t=80, b=80),
-                    title=dict(
-                        text="",
-                        x=0.01,
-                        xanchor="left",
-                        y=0.98,
-                        yanchor="top",
-                    ),
-                    autosize=True,  # Enable auto-sizing to fill container
-                )
-                # Add CSS and loading indicator for immediate proper sizing
-                kde_height = max(650, 150 * len(order))
-                plot_config = {
-                    "responsive": True,
-                    "displaylogo": False,
-                    "modeBarButtonsToRemove": ["lasso2d", "select2d"],
-                    "toImageButtonOptions": {
-                        "format": "png",
-                        "filename": "participant_kde",
-                        "height": kde_height,
-                        "width": 1200,
-                        "scale": 2,
-                    },
-                }
-                fig_spec = fig_kde.to_plotly_json()
-                data_json = json.dumps(fig_spec.get("data", []), cls=PlotlyJSONEncoder)
-                layout_json = json.dumps(
-                    fig_spec.get("layout", {}), cls=PlotlyJSONEncoder
-                )
-                config_json = json.dumps(plot_config, cls=PlotlyJSONEncoder)
-
-                # Wrap with styling to ensure proper initial sizing and defer Plotly rendering
-                styled_html = f"""
-<style>
-#dataset-kde-modalities {{
-    width: 100% !important;
-    max-width: 1200px;
-    height: {kde_height}px !important;
-    min-height: {kde_height}px;
-    margin: 0 auto;
-    display: none;
-}}
-#dataset-kde-modalities.plotly-graph-div {{
-    width: 100% !important;
-    height: 100% !important;
-}}
-.kde-loading {{
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    height: {kde_height}px;
-    font-family: Inter, system-ui, sans-serif;
-    color: #6b7280;
-}}
-</style>
-<div class="kde-loading" id="kde-loading">Loading participant distribution...</div>
-<div id="dataset-kde-modalities" class="plotly-graph-div"></div>
-<script>
-(function() {{
-  const TARGET_ID = 'dataset-kde-modalities';
-  const FIG_DATA = {data_json};
-  const FIG_LAYOUT = {layout_json};
-  const FIG_CONFIG = {config_json};
-
-  function onReady(callback) {{
-    if (document.readyState === 'loading') {{
-      document.addEventListener('DOMContentLoaded', callback, {{ once: true }});
-    }} else {{
-      callback();
-    }}
-  }}
-
-  function renderPlot() {{
-    const container = document.getElementById(TARGET_ID);
-    if (!container) {{
-      return;
-    }}
-
-    const draw = () => {{
-      if (!window.Plotly) {{
-        window.requestAnimationFrame(draw);
-        return;
-      }}
-
-      window.Plotly.newPlot(TARGET_ID, FIG_DATA, FIG_LAYOUT, FIG_CONFIG).then((plot) => {{
-        const loading = document.getElementById('kde-loading');
-        if (loading) {{
-          loading.style.display = 'none';
-        }}
-        container.style.display = 'block';
-
-        plot.on('plotly_click', (event) => {{
-          const point = event.points && event.points[0];
-          if (!point || !point.customdata) {{
-            return;
-          }}
-          const url = point.customdata[1];
-          if (url) {{
-            const resolved = new URL(url, window.location.href);
-            window.open(resolved.href, '_self');
-          }}
-        }});
-      }});
-    }};
-
-    draw();
-  }}
-
-  onReady(renderPlot);
-}})();
-</script>
-"""
-
-                kde_path = Path(target_dir) / "dataset_kde_modalities.html"
-                with open(kde_path, "w", encoding="utf-8") as f:
-                    f.write(styled_html)
-                copyfile(kde_path, STATIC_DATASET_DIR / kde_path.name)
+            kde_path = target_dir / "dataset_kde_modalities.html"
+            kde_output = generate_modality_ridgeline(df_raw, kde_path)
+            if kde_output:
+                copyfile(kde_output, STATIC_DATASET_DIR / kde_output.name)
         except Exception as exc:
             print(f"[dataset KDE] Skipped due to error: {exc}")
 
diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst
index b4b607ff..c3ef7a33 100644
--- a/docs/source/dataset_summary.rst
+++ b/docs/source/dataset_summary.rst
@@ -10,13 +10,11 @@
 
 .. rst-class:: dataset-summary-article
 
-Datasets 
-=========
+Datasets Catalog
+================
 
 To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
 
-The archive is currently still in :bdg-danger:`beta testing` mode, so be kind. 
-
 .. raw:: html
 
    <script src="https://cdn.plot.ly/plotly-3.1.0.min.js"></script>
@@ -27,10 +25,16 @@ The archive is currently still in :bdg-danger:`beta testing` mode, so be kind.
 
       .. include:: dataset_summary/table.rst
 
-   .. tab-item:: Participant KDE
+   .. tab-item:: Participant Distribution
 
       .. include:: dataset_summary/kde.rst
 
-   .. tab-item:: Landscape
+   .. tab-item:: Dataset Flow
+
+      .. include:: dataset_summary/sankey.rst
+
+   .. tab-item:: Scatter of Sample Size vs. Recording Duration
 
       .. include:: dataset_summary/bubble.rst
+
+The archive is currently still in :bdg-danger:`beta testing` mode, so be kind. 
diff --git a/docs/source/dataset_summary/bubble.rst b/docs/source/dataset_summary/bubble.rst
index 83e83179..6de57ebf 100644
--- a/docs/source/dataset_summary/bubble.rst
+++ b/docs/source/dataset_summary/bubble.rst
@@ -1,3 +1,5 @@
+.. title:: Dataset landscape
+
 .. rubric:: Dataset landscape
 
 .. raw:: html
diff --git a/docs/source/dataset_summary/kde.rst b/docs/source/dataset_summary/kde.rst
index 906a4c6f..e2f66ddf 100644
--- a/docs/source/dataset_summary/kde.rst
+++ b/docs/source/dataset_summary/kde.rst
@@ -1,4 +1,6 @@
-.. rubric:: Participant Distribution by Modality
+.. title:: Participant Distribution by Modality
+
+.. rubric:: Distribution of Sample Sizes Varies by Experimental Modality
 
 .. raw:: html
 
diff --git a/docs/source/dataset_summary/sankey.rst b/docs/source/dataset_summary/sankey.rst
new file mode 100644
index 00000000..66304778
--- /dev/null
+++ b/docs/source/dataset_summary/sankey.rst
@@ -0,0 +1,20 @@
+.. title:: Dataset flow
+
+.. rubric:: Sankey diagrams of EEGDash Datasets by Population, Modality, and Cognitive Domain
+
+.. raw:: html
+
+   <figure class="eegdash-figure" style="margin: 0 0 1.25rem 0;">
+
+.. raw:: html
+   :file: ../_static/dataset_generated/dataset_sankey.html
+
+.. raw:: html
+
+   <figcaption class="eegdash-caption">
+     Figure: Dataset flow across population, modality, and cognitive domain.
+     Link thickness is proportional to the total number of subjects, and the tooltip
+     reports both subject and dataset counts. Hover and click legend entries to
+     explore specific segments.
+   </figcaption>
+   </figure>
diff --git a/docs/source/dataset_summary/table.rst b/docs/source/dataset_summary/table.rst
index 3891d906..b409b575 100644
--- a/docs/source/dataset_summary/table.rst
+++ b/docs/source/dataset_summary/table.rst
@@ -1,3 +1,5 @@
+.. title:: EEG Datasets Table
+
 .. rubric:: EEG Datasets Table
 
 The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes M-EEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs,
@@ -22,153 +24,4 @@ In addition, EEG-DaSh will incorporate a subset of the data converted from `NEMA
    </figcaption>
    </figure>
 
-Pathology, modality, and dataset type now surface as consistent color-coded tags so you can scan the table at a glance and reuse the same visual language as the model catalog.
-
-.. raw:: html
-
-  <!-- jQuery + DataTables core -->
-  <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
-  <link rel="stylesheet" href="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.css"/>
-  <script src="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.js"></script>
-
-  <!-- Buttons + SearchPanes (+ Select required by SearchPanes) -->
-  <link rel="stylesheet" href="https://cdn.datatables.net/buttons/2.4.2/css/buttons.dataTables.min.css">
-  <script src="https://cdn.datatables.net/buttons/2.4.2/js/dataTables.buttons.min.js"></script>
-  <link rel="stylesheet" href="https://cdn.datatables.net/select/1.7.0/css/select.dataTables.min.css">
-  <link rel="stylesheet" href="https://cdn.datatables.net/searchpanes/2.3.1/css/searchPanes.dataTables.min.css">
-  <script src="https://cdn.datatables.net/select/1.7.0/js/dataTables.select.min.js"></script>
-  <script src="https://cdn.datatables.net/searchpanes/2.3.1/js/dataTables.searchPanes.min.js"></script>
-
-  <style>
-    /* Styling for the Total row (placed in tfoot) */
-    table.sd-table tfoot td {
-      font-weight: 600;
-      border-top: 2px solid rgba(0,0,0,0.2);
-      background: #f9fafb;
-      /* Match body cell padding to keep perfect alignment */
-      padding: 8px 10px !important;
-      vertical-align: middle;
-    }
-
-    /* Right-align numeric-like columns (2..8) consistently for body & footer */
-    table.sd-table tbody td:nth-child(n+2),
-    table.sd-table tfoot td:nth-child(n+2) {
-      text-align: right;
-    }
-    /* Keep first column (Dataset/Total) left-aligned */
-    table.sd-table tbody td:first-child,
-    table.sd-table tfoot td:first-child {
-      text-align: left;
-    }
-  </style>
-
-  <script>
-  // Helper: robustly extract values for SearchPanes when needed
-  function tagsArrayFromHtml(html) {
-    if (html == null) return [];
-    // If it's numeric or plain text, just return as a single value
-    if (typeof html === 'number') return [String(html)];
-    if (typeof html === 'string' && html.indexOf('<') === -1) return [html.trim()];
-    // Else parse any .tag elements inside HTML
-    var tmp = document.createElement('div');
-    tmp.innerHTML = html;
-    var tags = Array.from(tmp.querySelectorAll('.tag')).map(function(el){
-      return (el.textContent || '').trim();
-    });
-    return tags.length ? tags : [tmp.textContent.trim()];
-  }
-
-  // Helper: parse human-readable sizes like "4.31 GB" into bytes (number)
-  function parseSizeToBytes(text) {
-    if (!text) return 0;
-    var s = String(text).trim();
-    var m = s.match(/([\d,.]+)\s*(TB|GB|MB|KB|B)/i);
-    if (!m) return 0;
-    var value = parseFloat(m[1].replace(/,/g, ''));
-    var unit = m[2].toUpperCase();
-    var factor = { B:1, KB:1024, MB:1024**2, GB:1024**3, TB:1024**4 }[unit] || 1;
-    return value * factor;
-  }
-
-  $(function () {
-    var $table = $('#datasets-table');
-    if (!$table.length) {
-      return;
-    }
-    if ($.fn.DataTable && $.fn.DataTable.isDataTable($table[0])) {
-      return;
-    }
-
-    // 1) Move the "Total" row into <tfoot> so sorting/filtering never moves it
-    var $tbody = $table.find('tbody');
-    var $total = $tbody.find('tr').filter(function(){
-      return $(this).find('td').eq(0).text().trim() === 'Total';
-    });
-    if ($total.length) {
-      var $tfoot = $table.find('tfoot');
-      if (!$tfoot.length) $tfoot = $('<tfoot/>').appendTo($table);
-      $total.appendTo($tfoot);
-    }
-
-    // 2) Initialize DataTable with SearchPanes button
-    var FILTER_COLS = [1,2,3,4,5,6];
-    // Detect the index of the size column by header text
-    var sizeIdx = (function(){
-      var idx = -1;
-      $table.find('thead th').each(function(i){
-        var t = $(this).text().trim().toLowerCase();
-        if (t === 'size on disk' || t === 'size') idx = i;
-      });
-      return idx;
-    })();
-
-    var table = $table.DataTable({
-      dom: 'Blfrtip',
-      paging: false,
-      searching: true,
-      info: false,
-      language: {
-        search: 'Filter dataset:',
-        searchPanes: { collapse: { 0: 'Filters', _: 'Filters (%d)' } }
-      },
-      buttons: [{
-        extend: 'searchPanes',
-        text: 'Filters',
-        config: { cascadePanes: true, viewTotal: true, layout: 'columns-4', initCollapsed: false }
-      }],
-      columnDefs: (function(){
-        var defs = [
-          { searchPanes: { show: true }, targets: FILTER_COLS }
-        ];
-        if (sizeIdx !== -1) {
-          defs.push({
-            targets: sizeIdx,
-            render: function(data, type) {
-              if (type === 'sort' || type === 'type') {
-                return parseSizeToBytes(data);
-              }
-              return data;
-            }
-          });
-        }
-        return defs;
-      })()
-    });
-
-    // 3) UX: click a header to open the relevant filter pane
-    $table.find('thead th').each(function (i) {
-      if ([1,2,3,4].indexOf(i) === -1) return;
-      $(this).css('cursor','pointer').attr('title','Click to filter this column');
-      $(this).on('click', function () {
-        table.button('.buttons-searchPanes').trigger();
-        setTimeout(function () {
-          var idx = [1,2,3,4].indexOf(i);
-          var $container = $(table.searchPanes.container());
-          var $pane = $container.find('.dtsp-pane').eq(idx);
-          var $title = $pane.find('.dtsp-title');
-          if ($title.length) $title.trigger('click');
-        }, 0);
-      });
-    });
-  });
-  </script>
+Pathology, modality, and dataset type now surface as consistent color-coded tags so you can scan the table at a glance.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 01b8e41d..632007c6 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -20,11 +20,14 @@ EEG Dash Homepage
 
 
 .. rst-class:: h4 text-center font-weight-light my-4
-
 The EEG-DaSh data archive will establish a data-sharing resource for MEEG (EEG, MEG) data, enabling 
 large-scale computational advancements to preserve and share scientific data from publicly funded 
 research for machine learning and deep learning applications.
 
+.. rst-class:: text-center
+
+**Note:** The "DaSh" in EEG-DaSh stands for **Data Share**.
+
 The EEG-DaSh data archive is a collaborative effort led by the University of California, San Diego (UCSD) and Ben-Gurion University of the Negev (BGU) and partially funded by the National Science Foundation (NSF). All are welcome to contribute to the https://github.com/sccn/EEGDash project.
 
 The archive is currently still in :bdg-danger:`beta testing` mode, so be kind. 
diff --git a/eegdash/dataset/dataset_summary.csv b/eegdash/dataset/dataset_summary.csv
index 609fa2d3..74781587 100644
--- a/eegdash/dataset/dataset_summary.csv
+++ b/eegdash/dataset/dataset_summary.csv
@@ -198,7 +198,7 @@
 197,ds003751,38,38,1,128,250,19.95,4.71 GB,5057922307,0,ds003751,Healthy,other,Multisensory,Affect
 198,ds003421,80,20,1,257,1000,11.604,76.77 GB,82433418198,0,ds003421,Healthy,10-20,Multisensory,Decision-making
 199,ds002158,117,20,1,,,0.0,428.59 GB,460190030981,0,ds002158,Healthy,10-20,Visual,Affect
-200,ds004951,23,11,1,63,1000,29.563,22.00 GB,23627352274,0,ds004951,?,,Tactile,Learning
+200,ds004951,23,11,1,63,1000,29.563,22.00 GB,23627352274,0,ds004951,,,Tactile,Learning
 201,ds004802,38,38,1,65,"2048,512",0.0,29.34 GB,31504070800,0,ds004802,Other,,Visual,Affect
 202,ds004816,20,20,1,63,1000,0.0,23.31 GB,25028989553,0,ds004816,Healthy,,Visual,Attention
 203,ds005873,2850,125,1,2,256,11935.09,117.21 GB,125851664268,0,,,,,