From cfdeb138972fa5fed125c2f4d8e3a3d665ff7687 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 17:17:40 +0200
Subject: [PATCH 01/30] puting the log scale

---
 docs/prepare_summary_tables.py        | 110 +++++++++++++++++++++-----
 docs/source/dataset_summary/table.rst |   2 +
 2 files changed, 91 insertions(+), 21 deletions(-)
diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index fd904296..c4dda9cd 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -162,6 +162,7 @@ def _fmt_size(bytes_):
         "size_gb": "Size (GB)",
         "tasks": "#Tasks",
     }[x_field]
+    x_label = f"{x_label} (log scale)"
 
     # hover text adapts to x
     if x_field == "duration_h":
@@ -183,14 +184,32 @@ def _fmt_size(bytes_):
         "<br>Sampling: %{customdata[4]} Hz"
         "<br>Size: %{customdata[5]}"
         "<br>Modality: %{customdata[6]}"
+        "<br><i>Click bubble to open dataset page</i>"
         "<extra></extra>"
     )
 
-    d = d.dropna(subset=["duration_h", "subjects", "size_gb"])  # need these
+    required_columns = {"subjects", "size_gb", x_field}
+    d = d.replace([np.inf, -np.inf], np.nan)
+    d = d.dropna(subset=list(required_columns))
+    d = d[(d["subjects"] > 0) & (d[x_field] > 0)]
+
+    d["dataset_url"] = d["dataset"].apply(get_dataset_url)
+
+    if d.empty:
+        out_path = Path(out_html)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        no_data_html = """
+<div class="dataset-loading" id="dataset-loading">No dataset records available for plotting.</div>
+"""
+        with open(str(out_path), "w", encoding="utf-8") as f:
+            f.write(no_data_html)
+        return str(out_path)
 
     # Marker sizing: scale into a good visual range
-    max_size = max(d["size_gb"].max(), 1)
-    sizeref = (2.0 * max_size) / (40.0**2)  # target ~40px max marker
+    size_max = d["size_gb"].max()
+    if not np.isfinite(size_max) or size_max <= 0:
+        size_max = 1.0
+    sizeref = (2.0 * size_max) / (40.0**2)  # target ~40px max marker
 
     # Prepare prettified strings for hover
     def _fmt_int(v):
@@ -219,10 +238,11 @@ def _fmt_int(v):
             sfreq_str,
             d["size_bytes"].map(_fmt_size),
             d["modality_label"],
+            d["dataset_url"],
         ],
         size_max=40,
         labels={
-            "subjects": "#Subjects",
+            "subjects": "#Subjects (log scale)",
             "modality_label": "Modality",
             x_field: x_label,
         },
@@ -235,6 +255,8 @@ def _fmt_int(v):
                 if label in d["modality_label"].unique()
             ]
         },
+        log_x=True,
+        log_y=True,
     )
 
     # tune marker sizing explicitly for better control
@@ -247,9 +269,12 @@ def _fmt_int(v):
         )
         tr.hovertemplate = hover
 
+    plot_width = 1280
+    plot_height = 720
+
     fig.update_layout(
-        height=750,
-        width=1200,  # Set explicit width for consistent sizing
+        height=plot_height,
+        width=plot_width,  # Landscape orientation
         margin=dict(l=60, r=40, t=80, b=60),
         template="plotly_white",
         legend=dict(
@@ -275,8 +300,20 @@ def _fmt_int(v):
         autosize=True,  # Enable auto-sizing to fill container
     )
 
-    fig.update_xaxes(showgrid=True, gridcolor="rgba(0,0,0,0.12)", zeroline=False)
-    fig.update_yaxes(showgrid=True, gridcolor="rgba(0,0,0,0.12)", zeroline=False)
+    fig.update_xaxes(
+        showgrid=True,
+        gridcolor="rgba(0,0,0,0.12)",
+        zeroline=False,
+        type="log",
+        dtick=1,
+    )
+    fig.update_yaxes(
+        showgrid=True,
+        gridcolor="rgba(0,0,0,0.12)",
+        zeroline=False,
+        type="log",
+        dtick=1,
+    )
 
     out_path = Path(out_html)
     out_path.parent.mkdir(parents=True, exist_ok=True)
@@ -292,8 +329,8 @@ def _fmt_int(v):
             "toImageButtonOptions": {
                 "format": "png",
                 "filename": "dataset_landscape",
-                "height": 750,
-                "width": 1200,
+                "height": plot_height,
+                "width": plot_width,
                 "scale": 2,
             },
         },
@@ -304,9 +341,9 @@ def _fmt_int(v):
 <style>
 #dataset-bubble {{
     width: 100% !important;
-    max-width: 1200px;
-    height: 750px !important;
-    min-height: 750px;
+    max-width: {plot_width}px;
+    height: {plot_height}px !important;
+    min-height: {plot_height}px;
     margin: 0 auto;
 }}
 #dataset-bubble .plotly-graph-div {{
@@ -317,7 +354,7 @@ def _fmt_int(v):
     display: flex;
     justify-content: center;
     align-items: center;
-    height: 750px;
+    height: {plot_height}px;
     font-family: Inter, system-ui, sans-serif;
     color: #6b7280;
 }}
@@ -325,14 +362,39 @@ def _fmt_int(v):
 <div class="dataset-loading" id="dataset-loading">Loading dataset landscape...</div>
 {html_content}
 <script>
-// Hide loading indicator once plot is rendered
+// Hide loading indicator once plot is rendered and make bubbles clickable
 document.addEventListener('DOMContentLoaded', function() {{
     const loading = document.getElementById('dataset-loading');
     const plot = document.getElementById('dataset-bubble');
-    if (loading && plot) {{
-        loading.style.display = 'none';
-        plot.style.display = 'block';
+
+    function showPlot() {{
+        if (loading) {{
+            loading.style.display = 'none';
+        }}
+        if (plot) {{
+            plot.style.display = 'block';
+        }}
     }}
+
+    function hookPlotlyClick(attempts) {{
+        if (!plot || typeof plot.on !== 'function') {{
+            if (attempts < 40) {{
+                window.setTimeout(function() {{ hookPlotlyClick(attempts + 1); }}, 60);
+            }}
+            return;
+        }}
+        plot.on('plotly_click', function(evt) {{
+            const point = evt && evt.points && evt.points[0];
+            const url = point && point.customdata && point.customdata[7];
+            if (url) {{
+                window.open(url, '_blank', 'noopener');
+            }}
+        }});
+        showPlot();
+    }}
+
+    hookPlotlyClick(0);
+    showPlot();
 }});
 </script>
 """
@@ -369,8 +431,12 @@ def human_readable_size(num_bytes: int) -> str:
 
 def get_dataset_url(name: str) -> str:
     """Generate dataset URL for plots (relative to dataset summary page)."""
-    name = name.strip()
-    return f"api/dataset/eegdash.dataset.{name.upper()}.html"
+    if name is None or (isinstance(name, float) and pd.isna(name)):
+        return ""
+    text = str(name).strip()
+    if not text:
+        return ""
+    return f"../../api/dataset/eegdash.dataset.{text.upper()}.html"
 
 
 def wrap_dataset_name(name: str):
@@ -378,7 +444,9 @@ def wrap_dataset_name(name: str):
     name = name.strip()
     # Link to the individual dataset API page
     # Updated structure: api/dataset/eegdash.dataset.<CLASS>.html
-    url = f"api/dataset/eegdash.dataset.{name.upper()}.html"
+    url = get_dataset_url(name)
+    if not url:
+        return name.upper()
     return f'<a href="{url}">{name.upper()}</a>'
 
 
diff --git a/docs/source/dataset_summary/table.rst b/docs/source/dataset_summary/table.rst
index 3891d906..487e846f 100644
--- a/docs/source/dataset_summary/table.rst
+++ b/docs/source/dataset_summary/table.rst
@@ -1,3 +1,5 @@
+.. title:: EEG Dataset Catalogue
+
 .. rubric:: EEG Datasets Table
 
 The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes M-EEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs,

From 0f01b5e81b9d3e1a7dc3eae755689f935a06093a Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 17:19:15 +0200
Subject: [PATCH 02/30] including title

---
 docs/source/dataset_summary/bubble.rst | 2 ++
 docs/source/dataset_summary/kde.rst    | 2 ++
 docs/source/dataset_summary/table.rst  | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/source/dataset_summary/bubble.rst b/docs/source/dataset_summary/bubble.rst
index 83e83179..6de57ebf 100644
--- a/docs/source/dataset_summary/bubble.rst
+++ b/docs/source/dataset_summary/bubble.rst
@@ -1,3 +1,5 @@
+.. title:: Dataset landscape
+
 .. rubric:: Dataset landscape
 
 .. raw:: html
diff --git a/docs/source/dataset_summary/kde.rst b/docs/source/dataset_summary/kde.rst
index 906a4c6f..92150024 100644
--- a/docs/source/dataset_summary/kde.rst
+++ b/docs/source/dataset_summary/kde.rst
@@ -1,3 +1,5 @@
+.. title:: Participant Distribution by Modality
+
 .. rubric:: Participant Distribution by Modality
 
 .. raw:: html
diff --git a/docs/source/dataset_summary/table.rst b/docs/source/dataset_summary/table.rst
index 487e846f..2090c6d8 100644
--- a/docs/source/dataset_summary/table.rst
+++ b/docs/source/dataset_summary/table.rst
@@ -1,4 +1,4 @@
-.. title:: EEG Dataset Catalogue
+.. title:: EEG Datasets Table
 
 .. rubric:: EEG Datasets Table
 

From 4e5c14b593fb60ab969e674e9caeac3195a0306d Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 17:29:02 +0200
Subject: [PATCH 03/30] updating the prepare table

---
 docs/prepare_summary_tables.py | 62 ++++++++++++++++++++++------------
 1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index c4dda9cd..720d711d 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -154,44 +154,61 @@ def _fmt_size(bytes_):
 
     # choose x axis field and labels
     x_field = (
-        x_var if x_var in {"records", "duration_h", "size_gb", "tasks"} else "records"
+        x_var
+        if x_var in {"records", "duration_h", "size_gb", "tasks", "subjects"}
+        else "records"
     )
-    x_label = {
+
+    axis_base_labels = {
         "records": "#Records",
         "duration_h": "Duration (hours)",
         "size_gb": "Size (GB)",
         "tasks": "#Tasks",
-    }[x_field]
-    x_label = f"{x_label} (log scale)"
+        "subjects": "#Subjects",
+    }
+
+    x_label = f"{axis_base_labels[x_field]} (log scale)"
+    y_field = "subjects"
+    if x_field == "subjects":
+        y_field = "records"
+    y_label = f"{axis_base_labels[y_field]} (log scale)"
 
-    # hover text adapts to x
+    # hover text adapts to axis choices
     if x_field == "duration_h":
-        x_hover = "Duration: %{x:.2f} h"
+        x_hover = "Duration (x): %{x:.2f} h"
     elif x_field == "size_gb":
-        x_hover = "Size: %{x:.2f} GB"
+        x_hover = "Size (x): %{x:.2f} GB"
     elif x_field == "tasks":
-        x_hover = "Tasks: %{x:,}"
+        x_hover = "Tasks (x): %{x:,}"
+    elif x_field == "subjects":
+        x_hover = "Subjects (x): %{x:,}"
     else:
         x_hover = "Records (x): %{x:,}"
 
+    if y_field == "subjects":
+        y_hover = "Subjects (y): %{y:,}"
+    else:
+        y_hover = "Records (y): %{y:,}"
+
     hover = (
         "<b>%{customdata[0]}</b>"  # dataset id
-        "<br>Subjects: %{y:,}"
         f"<br>{x_hover}"
-        "<br>Records: %{customdata[1]:,}"
-        "<br>Tasks: %{customdata[2]:,}"
-        "<br>Channels: %{customdata[3]}"
-        "<br>Sampling: %{customdata[4]} Hz"
-        "<br>Size: %{customdata[5]}"
-        "<br>Modality: %{customdata[6]}"
+        f"<br>{y_hover}"
+        "<br>Subjects (total): %{customdata[1]:,}"
+        "<br>Records (total): %{customdata[2]:,}"
+        "<br>Tasks: %{customdata[3]:,}"
+        "<br>Channels: %{customdata[4]}"
+        "<br>Sampling: %{customdata[5]} Hz"
+        "<br>Size: %{customdata[6]}"
+        "<br>Modality: %{customdata[7]}"
         "<br><i>Click bubble to open dataset page</i>"
         "<extra></extra>"
     )
 
-    required_columns = {"subjects", "size_gb", x_field}
+    required_columns = {x_field, y_field, "size_gb"}
     d = d.replace([np.inf, -np.inf], np.nan)
     d = d.dropna(subset=list(required_columns))
-    d = d[(d["subjects"] > 0) & (d[x_field] > 0)]
+    d = d[(d[x_field] > 0) & (d[y_field] > 0)]
 
     d["dataset_url"] = d["dataset"].apply(get_dataset_url)
 
@@ -226,12 +243,13 @@ def _fmt_int(v):
     fig = px.scatter(
         d,
         x=x_field,
-        y="subjects",
+        y=y_field,
         size="size_gb",
         color="modality_label",
         hover_name="dataset",
         custom_data=[
             d["dataset"],
+            d["subjects"],
             d["records"],
             d["tasks"],
             nchans_str,
@@ -242,7 +260,7 @@ def _fmt_int(v):
         ],
         size_max=40,
         labels={
-            "subjects": "#Subjects (log scale)",
+            y_field: y_label,
             "modality_label": "Modality",
             x_field: x_label,
         },
@@ -385,7 +403,7 @@ def _fmt_int(v):
         }}
         plot.on('plotly_click', function(evt) {{
             const point = evt && evt.points && evt.points[0];
-            const url = point && point.customdata && point.customdata[7];
+            const url = point && point.customdata && point.customdata[8];
             if (url) {{
                 window.open(url, '_blank', 'noopener');
             }}
@@ -585,9 +603,9 @@ def main(source_dir: str, target_dir: str):
             f, index_col=False, header=0, skipinitialspace=True
         )  # , sep=";")
         # Generate bubble chart from the raw data to have access to size_bytes
-        # Use x-axis as number of records for better spread
+        # Use x-axis as number of subjects so participant counts lead the story
         bubble_path = target_dir / "dataset_bubble.html"
-        gen_datasets_bubble(df_raw, str(bubble_path), x_var="records")
+        gen_datasets_bubble(df_raw, str(bubble_path), x_var="subjects")
         copyfile(bubble_path, STATIC_DATASET_DIR / bubble_path.name)
 
         df = prepare_table(df_raw)

From ea747184045972c1565c29f48a48d3e23fd8ac7e Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 17:38:39 +0200
Subject: [PATCH 04/30] fix the line plot

---
 docs/prepare_summary_tables.py | 54 ++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index 720d711d..c9fcb447 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -277,13 +277,52 @@ def _fmt_int(v):
         log_y=True,
     )
 
+    # Add a log-log regression fit line and R² annotation when data permits
+    fit_annotation_text = None
+    numeric_x = pd.to_numeric(d[x_field], errors="coerce")
+    numeric_y = pd.to_numeric(d[y_field], errors="coerce")
+    mask = (
+        np.isfinite(numeric_x)
+        & np.isfinite(numeric_y)
+        & (numeric_x > 0)
+        & (numeric_y > 0)
+    )
+
+    if mask.sum() >= 2:
+        log_x = np.log10(numeric_x[mask])
+        log_y = np.log10(numeric_y[mask])
+        ss_tot = np.sum((log_y - log_y.mean()) ** 2)
+        if np.ptp(log_x) > 0 and np.ptp(log_y) > 0 and ss_tot > 0:
+            slope, intercept = np.polyfit(log_x, log_y, 1)
+            line_log_x = np.linspace(log_x.min(), log_x.max(), 200)
+            line_x = 10**line_log_x
+            line_y = 10 ** (slope * line_log_x + intercept)
+            fig.add_trace(
+                go.Scatter(
+                    x=line_x,
+                    y=line_y,
+                    mode="lines",
+                    name="log-log fit",
+                    line=dict(color="#111827", width=2, dash="dot"),
+                    hoverinfo="skip",
+                    showlegend=False,
+                )
+            )
+            residuals = log_y - (slope * log_x + intercept)
+            r_squared = 1 - np.sum(residuals**2) / ss_tot
+            fit_annotation_text = f"log-log OLS fit R² = {r_squared:.3f}"
+
     # tune marker sizing explicitly for better control
     for tr in fig.data:
+        mode = getattr(tr, "mode", "") or ""
+        if "markers" not in mode:
+            continue
         tr.marker.update(
             sizemin=6,
             sizemode="area",
             sizeref=sizeref,
             line=dict(width=0.6, color="rgba(0,0,0,0.3)"),
+            opacity=0.75,
         )
         tr.hovertemplate = hover
 
@@ -318,6 +357,21 @@ def _fmt_int(v):
         autosize=True,  # Enable auto-sizing to fill container
     )
 
+    if fit_annotation_text:
+        fig.add_annotation(
+            xref="paper",
+            yref="paper",
+            x=0.02,
+            y=0.98,
+            text=fit_annotation_text,
+            showarrow=False,
+            font=dict(size=15, color="#111827"),
+            bgcolor="rgba(255,255,255,0.75)",
+            bordercolor="rgba(17,24,39,0.25)",
+            borderwidth=1,
+            borderpad=6,
+        )
+
     fig.update_xaxes(
         showgrid=True,
         gridcolor="rgba(0,0,0,0.12)",

From 6823dc9ed42ff56c1db82fe0a863c960aa4897d2 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 17:58:07 +0200
Subject: [PATCH 05/30] updating

---
 docs/prepare_summary_tables.py | 56 +++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index c9fcb447..217a5c00 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -1,6 +1,7 @@
 import glob
 import json
 from argparse import ArgumentParser
+from datetime import datetime
 from pathlib import Path
 from shutil import copyfile
 
@@ -724,12 +725,20 @@ def main(source_dir: str, target_dir: str):
             )
             d_modal = d_modal.dropna(subset=["n_subjects"])
 
-            fig_kde = go.Figure()
+            # Filter out "Other" modality
+            d_modal = d_modal[d_modal["modality_label"] != "Other"]
+
+            # Calculate median participants per modality and reorder ascending
+            median_participants = (
+                d_modal.groupby("modality_label")["n_subjects"].median().sort_values()
+            )
             order = [
                 label
-                for label in MODALITY_COLOR_MAP
+                for label in median_participants.index
                 if label in d_modal["modality_label"].unique()
             ]
+
+            fig_kde = go.Figure()
             rng = np.random.default_rng(42)
 
             for idx, label in enumerate(order):
@@ -775,6 +784,7 @@ def main(source_dir: str, target_dir: str):
                         name=label,
                         line=dict(color=color, width=2),
                         hovertemplate=f"<b>{label}</b><br>#Participants: %{{x:.0f}}<extra></extra>",
+                        showlegend=False,
                     )
                 )
 
@@ -789,7 +799,7 @@ def main(source_dir: str, target_dir: str):
                         y=np.full_like(vals, baseline) + jitter,
                         mode="markers",
                         name=label,
-                        marker=dict(color=color, size=5, opacity=0.6),
+                        marker=dict(color=color, size=8, opacity=0.6),
                         customdata=custom_data,
                         hovertemplate="<b><a href='%{customdata[1]}' target='_parent'>%{customdata[0]}</a></b><br>#Participants: %{x}<br><i>Click to view dataset details</i><extra></extra>",
                         showlegend=False,
@@ -803,10 +813,12 @@ def main(source_dir: str, target_dir: str):
                     template="plotly_white",
                     xaxis=dict(
                         type="log",
-                        title="#Participants",
+                        title="Number of Participants (Log Scale)",
                         showgrid=True,
-                        gridcolor="rgba(0,0,0,0.12)",
+                        gridcolor="rgba(0,0,0,0.08)",
                         zeroline=False,
+                        dtick=1,
+                        minor=dict(showgrid=True, gridcolor="rgba(0,0,0,0.04)"),
                     ),
                     yaxis=dict(
                         title="Modality",
@@ -816,24 +828,34 @@ def main(source_dir: str, target_dir: str):
                         showgrid=False,
                         range=[-0.3, max(0.3, (len(order) - 1) * 1.1 + 0.9)],
                     ),
-                    legend=dict(
-                        title="Modality",
-                        orientation="h",
-                        yanchor="bottom",
-                        y=1.02,
-                        xanchor="right",
-                        x=0.99,
-                    ),
-                    margin=dict(l=120, r=40, t=80, b=80),
+                    showlegend=False,
+                    margin=dict(l=120, r=40, t=100, b=80),
                     title=dict(
-                        text="",
-                        x=0.01,
-                        xanchor="left",
+                        text=f"<br><sub>Based on a EEG-Dash Datasets avaliables at {datetime.now().strftime('%d/%m/%Y')}.</sub>",
+                        x=0.5,
+                        xanchor="center",
                         y=0.98,
                         yanchor="top",
                     ),
                     autosize=True,  # Enable auto-sizing to fill container
                 )
+
+                # Add annotation highlighting Visual distribution
+                fig_kde.add_annotation(
+                    xref="paper",
+                    yref="paper",
+                    x=0.98,
+                    y=0.02,
+                    text="Visual studies consistently use the<br>largest sample sizes, typically 20-30 participants",
+                    showarrow=False,
+                    font=dict(size=12, color="#111827"),
+                    bgcolor="rgba(255,255,255,0.9)",
+                    bordercolor="rgba(17,24,39,0.3)",
+                    borderwidth=1,
+                    borderpad=8,
+                    xanchor="right",
+                    yanchor="bottom",
+                )
                 # Add CSS and loading indicator for immediate proper sizing
                 kde_height = max(650, 150 * len(order))
                 plot_config = {

From 33bfd07e2c6a49ca9f2e55ab1201e35020707576 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 18:06:21 +0200
Subject: [PATCH 06/30] improving the kde

---
 docs/prepare_summary_tables.py      | 16 ++++++++++++++++
 docs/source/dataset_summary/kde.rst |  2 +-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index 217a5c00..8e63d983 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -789,6 +789,8 @@ def main(source_dir: str, target_dir: str):
                 )
 
                 jitter = rng.uniform(0.02, amplitude * 0.5, size=len(vals))
+                median_val = float(median_participants.get(label, np.nan))
+
                 # Prepare custom data with dataset names and URLs
                 custom_data = np.column_stack(
                     [subset["dataset"].to_numpy(), subset["dataset_url"].to_numpy()]
@@ -806,6 +808,20 @@ def main(source_dir: str, target_dir: str):
                     )
                 )
 
+                if np.isfinite(median_val) and median_val > 0:
+                    fig_kde.add_trace(
+                        go.Scatter(
+                            x=[median_val, median_val],
+                            y=[baseline, baseline + amplitude],
+                            mode="lines",
+                            line=dict(color=color, width=2, dash="dash"),
+                            hovertemplate=(
+                                f"<b>{label}</b><br>Median participants: {median_val:.0f}<extra></extra>"
+                            ),
+                            showlegend=False,
+                        )
+                    )
+
             if fig_kde.data:
                 fig_kde.update_layout(
                     height=max(650, 150 * len(order)),
diff --git a/docs/source/dataset_summary/kde.rst b/docs/source/dataset_summary/kde.rst
index 92150024..e2f66ddf 100644
--- a/docs/source/dataset_summary/kde.rst
+++ b/docs/source/dataset_summary/kde.rst
@@ -1,6 +1,6 @@
 .. title:: Participant Distribution by Modality
 
-.. rubric:: Participant Distribution by Modality
+.. rubric:: Distribution of Sample Sizes Varies by Experimental Modality
 
 .. raw:: html
 

From 9270f92a43eaee5383a7017755dec210e1e3388c Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 18:15:34 +0200
Subject: [PATCH 07/30] updating the kde

---
 docs/prepare_summary_tables.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index 8e63d983..f0151bcd 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -740,6 +740,8 @@ def main(source_dir: str, target_dir: str):
 
             fig_kde = go.Figure()
             rng = np.random.default_rng(42)
+            amplitude = 0.6
+            row_spacing = 0.95
 
             for idx, label in enumerate(order):
                 subset = d_modal[d_modal["modality_label"] == label].copy()
@@ -755,8 +757,7 @@ def main(source_dir: str, target_dir: str):
                 if density.max() <= 0:
                     continue
                 density_norm = density / density.max()
-                amplitude = 0.6
-                baseline = idx * 1.1
+                baseline = idx * row_spacing
                 y_curve = baseline + density_norm * amplitude
                 x_curve = 10**grid
 
@@ -824,36 +825,48 @@ def main(source_dir: str, target_dir: str):
 
             if fig_kde.data:
                 fig_kde.update_layout(
-                    height=max(650, 150 * len(order)),
+                    height=max(650, 140 * len(order)),
                     width=1200,  # Set explicit width for consistent sizing
                     template="plotly_white",
                     xaxis=dict(
                         type="log",
-                        title="Number of Participants (Log Scale)",
+                        title=dict(
+                            text="Number of Participants (Log Scale)",
+                            font=dict(size=18),
+                        ),
                         showgrid=True,
                         gridcolor="rgba(0,0,0,0.08)",
                         zeroline=False,
                         dtick=1,
                         minor=dict(showgrid=True, gridcolor="rgba(0,0,0,0.04)"),
+                        tickfont=dict(size=14),
                     ),
                     yaxis=dict(
-                        title="Modality",
+                        title=dict(text="Modality", font=dict(size=18)),
                         tickmode="array",
-                        tickvals=[idx * 1.1 for idx in range(len(order))],
+                        tickvals=[idx * row_spacing for idx in range(len(order))],
                         ticktext=order,
                         showgrid=False,
-                        range=[-0.3, max(0.3, (len(order) - 1) * 1.1 + 0.9)],
+                        range=[
+                            -0.25,
+                            max(
+                                0.35, (len(order) - 1) * row_spacing + amplitude + 0.25
+                            ),
+                        ],
+                        tickfont=dict(size=14),
                     ),
                     showlegend=False,
-                    margin=dict(l=120, r=40, t=100, b=80),
+                    margin=dict(l=120, r=40, t=108, b=80),
                     title=dict(
                         text=f"<br><sub>Based on a EEG-Dash Datasets avaliables at {datetime.now().strftime('%d/%m/%Y')}.</sub>",
                         x=0.5,
                         xanchor="center",
                         y=0.98,
                         yanchor="top",
+                        font=dict(size=20),
                     ),
                     autosize=True,  # Enable auto-sizing to fill container
+                    font=dict(size=16),
                 )
 
                 # Add annotation highlighting Visual distribution
@@ -864,7 +877,7 @@ def main(source_dir: str, target_dir: str):
                     y=0.02,
                     text="Visual studies consistently use the<br>largest sample sizes, typically 20-30 participants",
                     showarrow=False,
-                    font=dict(size=12, color="#111827"),
+                    font=dict(size=14, color="#111827"),
                     bgcolor="rgba(255,255,255,0.9)",
                     bordercolor="rgba(17,24,39,0.3)",
                     borderwidth=1,

From fda2ac5d5423d371d2cd16ef3335fbd3df0d24af Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 20:16:21 +0200
Subject: [PATCH 08/30] first iteration

---
 eegdash/sankey_helpers.py |  86 ++++++++++++++++++
 plot.py                   | 184 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 270 insertions(+)
 create mode 100644 eegdash/sankey_helpers.py
 create mode 100644 plot.py

diff --git a/eegdash/sankey_helpers.py b/eegdash/sankey_helpers.py
new file mode 100644
index 00000000..d1e112a1
--- /dev/null
+++ b/eegdash/sankey_helpers.py
@@ -0,0 +1,86 @@
+"""Helpers for Sankey diagram generation."""
+
+# Color mappings consistent with prepare_summary_tables.py and custom.css
+PATHOLOGY_COLOR_MAP = {
+    "Healthy": "#16a34a",  # green (from border-color in CSS)
+    "Clinical": "#dc2626",  # red (from border-color in CSS)
+    "Unknown": "#94a3b8",
+}
+
+MODALITY_COLOR_MAP = {
+    "Visual": "#2563eb",
+    "Auditory": "#0ea5e9",
+    "Tactile": "#10b981",
+    "Somatosensory": "#10b981",  # same as Tactile
+    "Multisensory": "#ec4899",
+    "Motor": "#f59e0b",
+    "Resting State": "#6366f1",
+    "Rest": "#6366f1",  # alias for Resting State
+    "Sleep": "#7c3aed",
+    "Other": "#14b8a6",
+    "Unknown": "#94a3b8",
+}
+
+TYPE_COLOR_MAP = {
+    "Perception": "#3b82f6",  # blue
+    "Decision-making": "#eab308",  # yellow
+    "Rest": "#16a34a",  # green
+    "Resting-state": "#16a34a",  # green (alias)
+    "Sleep": "#8b5cf6",  # purple
+    "Cognitive": "#6366f1",  # indigo
+    "Clinical": "#dc2626",  # red
+    "Unknown": "#94a3b8",
+}
+
+# Canonical mappings to normalize values
+CANONICAL_MAP = {
+    "Type Subject": {
+        "healthy controls": "Healthy",
+        "healthy": "Healthy",
+        "control": "Healthy",
+        "clinical": "Clinical",
+        "patient": "Clinical",
+    },
+    "modality of exp": {
+        "visual": "Visual",
+        "auditory": "Auditory",
+        "tactile": "Tactile",
+        "somatosensory": "Tactile",
+        "multisensory": "Multisensory",
+        "motor": "Motor",
+        "rest": "Resting State",
+        "resting state": "Resting State",
+        "resting-state": "Resting State",
+        "sleep": "Sleep",
+        "other": "Other",
+    },
+    "type of exp": {
+        "perception": "Perception",
+        "decision making": "Decision-making",
+        "decision-making": "Decision-making",
+        "rest": "Rest",
+        "resting state": "Resting-state",
+        "resting-state": "Resting-state",
+        "sleep": "Sleep",
+        "cognitive": "Cognitive",
+        "clinical": "Clinical",
+    },
+}
+
+# Map column names to their color maps
+COLUMN_COLOR_MAPS = {
+    "Type Subject": PATHOLOGY_COLOR_MAP,
+    "modality of exp": MODALITY_COLOR_MAP,
+    "type of exp": TYPE_COLOR_MAP,
+}
+
+
+def hex_to_rgba(hex_color: str, alpha: float = 0.2) -> str:
+    """Convert hex color to rgba with given alpha."""
+    hex_color = hex_color.lstrip("#")
+    if len(hex_color) != 6:
+        raise ValueError("Invalid hex color format")
+    r = int(hex_color[0:2], 16)
+    g = int(hex_color[2:4], 16)
+    b = int(hex_color[4:6], 16)
+    return f"rgba({r}, {g}, {b}, {alpha})"
diff --git a/plot.py b/plot.py
new file mode 100644
index 00000000..c99a060c
--- /dev/null
+++ b/plot.py
@@ -0,0 +1,184 @@
+from __future__ import annotations
+
+"""Generate a Sankey diagram from the EEG-Dash dataset summary.
+
+The script loads ``eegdash/dataset/dataset_summary.csv`` (by default) and builds
+an interactive Plotly Sankey diagram connecting three categorical columns. This
+mirrors how the documentation summarises datasets across subject type, modality,
+and experiment type, but can be reused with any trio of categorical columns via
+CLI arguments.
+"""
+
+import argparse
+from pathlib import Path
+from typing import Sequence
+
+import pandas as pd
+import plotly.graph_objects as go
+
+from eegdash.sankey_helpers import (
+    CANONICAL_MAP,
+    COLUMN_COLOR_MAPS,
+    hex_to_rgba,
+)
+
+DEFAULT_COLUMNS = ["Type Subject", "modality of exp", "type of exp"]
+
+
+def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
+    df = pd.read_csv(
+        path,
+        index_col=False,
+        header=0,
+        skipinitialspace=True,
+    )
+    missing = [col for col in columns if col not in df.columns]
+    if missing:
+        msg = f"Columns not found in dataframe: {missing}"
+        raise ValueError(msg)
+
+    cleaned = df.copy()
+    for col in columns:
+        # drop rows with missing values in the specified columns
+        cleaned = cleaned.dropna(subset=[col])
+
+        # Split multi-valued cells into separate rows
+        cleaned[col] = cleaned[col].str.split("/|;|,")
+        cleaned = cleaned.explode(col)
+        cleaned[col] = cleaned[col].str.strip()
+
+        # normalize values to canonical forms
+        if col in CANONICAL_MAP:
+            mapping = CANONICAL_MAP[col]
+            cleaned[col] = cleaned[col].str.lower().map(mapping).fillna(cleaned[col])
+
+    return cleaned[columns]
+
+
+def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
+    node_labels: list[str] = []
+    node_colors: list[str] = []
+    node_index: dict[tuple[str, str], int] = {}
+
+    for col in columns:
+        color_map = COLUMN_COLOR_MAPS.get(col, {})
+        unique_values = df[col].unique()
+        for val in unique_values:
+            if (col, val) not in node_index:
+                node_index[(col, val)] = len(node_labels)
+                node_labels.append(val)
+                node_colors.append(color_map.get(val, "#94a3b8"))
+
+    sources: list[int] = []
+    targets: list[int] = []
+    values: list[int] = []
+    link_colors: list[str] = []
+
+    for idx in range(len(columns) - 1):
+        col_from, col_to = columns[idx], columns[idx + 1]
+
+        # Use the color from the source node for the link
+        source_color_map = COLUMN_COLOR_MAPS.get(col_from, {})
+
+        # Group by source and target columns and count occurrences
+        grouped = df.groupby([col_from, col_to]).size().reset_index(name="count")
+
+        for _, row in grouped.iterrows():
+            source_val, target_val, count = row[col_from], row[col_to], row["count"]
+
+            source_node_idx = node_index.get((col_from, source_val))
+            target_node_idx = node_index.get((col_to, target_val))
+
+            if source_node_idx is not None and target_node_idx is not None:
+                sources.append(source_node_idx)
+                targets.append(target_node_idx)
+                values.append(count)
+
+                # Assign color to the link based on the source node
+                source_color = source_color_map.get(source_val, "#94a3b8")
+                link_colors.append(hex_to_rgba(source_color))
+
+    return node_labels, node_colors, sources, targets, values, link_colors
+
+
+def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
+    (
+        labels,
+        colors,
+        sources,
+        targets,
+        values,
+        link_colors,
+    ) = _build_sankey_data(df, columns)
+
+    sankey = go.Sankey(
+        arrangement="snap",
+        node=dict(
+            pad=18,
+            thickness=18,
+            label=labels,
+            color=colors,
+        ),
+        link=dict(
+            source=sources,
+            target=targets,
+            value=values,
+            color=link_colors,
+        ),
+    )
+
+    fig = go.Figure(sankey)
+
+    fig.update_layout(
+        font=dict(size=12),
+    )
+    return fig
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Generate a Sankey diagram from the dataset summary CSV."
+    )
+    parser.add_argument(
+        "--source",
+        type=Path,
+        default=Path("eegdash/dataset/dataset_summary.csv"),
+        help="Path to the dataset summary CSV file.",
+    )
+    parser.add_argument(
+        "--columns",
+        nargs=3,
+        metavar=("FIRST", "SECOND", "THIRD"),
+        default=DEFAULT_COLUMNS,
+        help="Three categorical columns to connect in the Sankey plot.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("dataset_summary_sankey.html"),
+        help="Output HTML file for the interactive Sankey diagram.",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    if not args.source.exists():
+        raise FileNotFoundError(f"Dataset summary CSV not found at {args.source}")
+
+    columns = list(args.columns)
+    df = _load_dataframe(args.source, columns)
+    fig = build_sankey(df, columns)
+
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    fig.write_html(
+        str(args.output),
+        include_plotlyjs="cdn",
+        full_html=True,
+        auto_open=False,
+    )
+    print(f"Sankey diagram saved to {args.output.resolve()}")
+
+
+if __name__ == "__main__":
+    main()

From 6504b7198824d0b32ce0658281435103bec7b849 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 20:26:51 +0200
Subject: [PATCH 09/30] updating one label

---
 eegdash/dataset/dataset_summary.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eegdash/dataset/dataset_summary.csv b/eegdash/dataset/dataset_summary.csv
index 609fa2d3..2660e4d2 100644
--- a/eegdash/dataset/dataset_summary.csv
+++ b/eegdash/dataset/dataset_summary.csv
@@ -198,7 +198,7 @@
 197,ds003751,38,38,1,128,250,19.95,4.71 GB,5057922307,0,ds003751,Healthy,other,Multisensory,Affect
 198,ds003421,80,20,1,257,1000,11.604,76.77 GB,82433418198,0,ds003421,Healthy,10-20,Multisensory,Decision-making
 199,ds002158,117,20,1,,,0.0,428.59 GB,460190030981,0,ds002158,Healthy,10-20,Visual,Affect
-200,ds004951,23,11,1,63,1000,29.563,22.00 GB,23627352274,0,ds004951,?,,Tactile,Learning
+200,ds004951,23,11,1,63,1000,29.563,22.00 GB,23627352274,0,ds004951,Other,,Tactile,Learning
 201,ds004802,38,38,1,65,"2048,512",0.0,29.34 GB,31504070800,0,ds004802,Other,,Visual,Affect
 202,ds004816,20,20,1,63,1000,0.0,23.31 GB,25028989553,0,ds004816,Healthy,,Visual,Attention
 203,ds005873,2850,125,1,2,256,11935.09,117.21 GB,125851664268,0,,,,,

From 4deda6c895ba9661588dfeb8fc17320b510dcb00 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 20:27:07 +0200
Subject: [PATCH 10/30] updating the helper

---
 eegdash/sankey_helpers.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/eegdash/sankey_helpers.py b/eegdash/sankey_helpers.py
index d1e112a1..571b2471 100644
--- a/eegdash/sankey_helpers.py
+++ b/eegdash/sankey_helpers.py
@@ -2,8 +2,8 @@
 
 # Color mappings consistent with prepare_summary_tables.py and custom.css
 PATHOLOGY_COLOR_MAP = {
-    "Healthy": "#16a34a",  # green (from border-color in CSS)
-    "Clinical": "#dc2626",  # red (from border-color in CSS)
+    "Healthy": "#16a34a",
+    "Clinical": "#ef4444",  # Use a red consistent with the table styles
     "Unknown": "#94a3b8",
 }
 
@@ -11,24 +11,29 @@
     "Visual": "#2563eb",
     "Auditory": "#0ea5e9",
     "Tactile": "#10b981",
-    "Somatosensory": "#10b981",  # same as Tactile
+    "Somatosensory": "#10b981",
     "Multisensory": "#ec4899",
     "Motor": "#f59e0b",
     "Resting State": "#6366f1",
-    "Rest": "#6366f1",  # alias for Resting State
+    "Rest": "#6366f1",
     "Sleep": "#7c3aed",
     "Other": "#14b8a6",
     "Unknown": "#94a3b8",
 }
 
 TYPE_COLOR_MAP = {
-    "Perception": "#3b82f6",  # blue
-    "Decision-making": "#eab308",  # yellow
-    "Rest": "#16a34a",  # green
-    "Resting-state": "#16a34a",  # green (alias)
-    "Sleep": "#8b5cf6",  # purple
-    "Cognitive": "#6366f1",  # indigo
-    "Clinical": "#dc2626",  # red
+    "Perception": "#3b82f6",
+    "Decision-making": "#eab308",
+    "Rest": "#16a34a",
+    "Resting-state": "#16a34a",
+    "Sleep": "#8b5cf6",
+    "Cognitive": "#6366f1",
+    "Clinical": "#f87171",  # Lighter red to match table
+    "Memory": "#c4b5fd",  # Lighter purple to match table
+    "Attention": "#c4b5fd",  # Lighter purple to match table
+    "Intervention": "#c4b5fd",  # Lighter purple to match table
+    "Learning": "#c4b5fd",  # Lighter purple to match table
+    "Other": "#c4b5fd",  # Lighter purple to match table
     "Unknown": "#94a3b8",
 }
 

From 7728067f3f3534762629f739e95b33a410b98399 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 20:45:07 +0200
Subject: [PATCH 11/30] unknown instead of ?

---
 eegdash/dataset/dataset_summary.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eegdash/dataset/dataset_summary.csv b/eegdash/dataset/dataset_summary.csv
index 2660e4d2..74781587 100644
--- a/eegdash/dataset/dataset_summary.csv
+++ b/eegdash/dataset/dataset_summary.csv
@@ -198,7 +198,7 @@
 197,ds003751,38,38,1,128,250,19.95,4.71 GB,5057922307,0,ds003751,Healthy,other,Multisensory,Affect
 198,ds003421,80,20,1,257,1000,11.604,76.77 GB,82433418198,0,ds003421,Healthy,10-20,Multisensory,Decision-making
 199,ds002158,117,20,1,,,0.0,428.59 GB,460190030981,0,ds002158,Healthy,10-20,Visual,Affect
-200,ds004951,23,11,1,63,1000,29.563,22.00 GB,23627352274,0,ds004951,Other,,Tactile,Learning
+200,ds004951,23,11,1,63,1000,29.563,22.00 GB,23627352274,0,ds004951,,,Tactile,Learning
 201,ds004802,38,38,1,65,"2048,512",0.0,29.34 GB,31504070800,0,ds004802,Other,,Visual,Affect
 202,ds004816,20,20,1,63,1000,0.0,23.31 GB,25028989553,0,ds004816,Healthy,,Visual,Attention
 203,ds005873,2850,125,1,2,256,11935.09,117.21 GB,125851664268,0,,,,,

From a614c1801249130d797200b16e7c095f747c5e12 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 20:51:25 +0200
Subject: [PATCH 12/30] updating with some stable version

---
 eegdash/sankey_helpers.py | 21 ++++++++++++++-------
 plot.py                   | 35 +++++++++++++++++++++++++++++------
 2 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/eegdash/sankey_helpers.py b/eegdash/sankey_helpers.py
index 571b2471..7d2d50ae 100644
--- a/eegdash/sankey_helpers.py
+++ b/eegdash/sankey_helpers.py
@@ -2,9 +2,9 @@
 
 # Color mappings consistent with prepare_summary_tables.py and custom.css
 PATHOLOGY_COLOR_MAP = {
-    "Healthy": "#16a34a",
-    "Clinical": "#ef4444",  # Use a red consistent with the table styles
-    "Unknown": "#94a3b8",
+    "Healthy": "#22c55e",  # green
+    "Clinical": "#f87171",  # Lighter red to match table
+    "Unknown": "#94a3b8",  # grey
 }
 
 MODALITY_COLOR_MAP = {
@@ -69,6 +69,7 @@
         "sleep": "Sleep",
         "cognitive": "Cognitive",
         "clinical": "Clinical",
+        "other": "Other",
     },
 }
 
@@ -82,10 +83,16 @@
 
 def hex_to_rgba(hex_color: str, alpha: float = 0.2) -> str:
     """Convert hex color to rgba with given alpha."""
+    if not isinstance(hex_color, str) or not hex_color.startswith("#"):
+        # This is not a valid hex color, return a default color
+        return "rgba(148, 163, 184, 0.2)"  # Default grey
     hex_color = hex_color.lstrip("#")
     if len(hex_color) != 6:
-        raise ValueError("Invalid hex color format")
-    r = int(hex_color[0:2], 16)
-    g = int(hex_color[2:4], 16)
-    b = int(hex_color[4:6], 16)
+        return "rgba(148, 163, 184, 0.2)"  # Default grey for invalid length
+    try:
+        r = int(hex_color[0:2], 16)
+        g = int(hex_color[2:4], 16)
+        b = int(hex_color[4:6], 16)
+    except ValueError:
+        return "rgba(148, 163, 184, 0.2)"  # Default grey for conversion error
     return f"rgba({r}, {g}, {b}, {alpha})"
diff --git a/plot.py b/plot.py
index c99a060c..0122cda0 100644
--- a/plot.py
+++ b/plot.py
@@ -38,20 +38,34 @@ def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
         raise ValueError(msg)
 
     cleaned = df.copy()
+
+    # Process each column for cleaning and normalization
     for col in columns:
-        # drop rows with missing values in the specified columns
-        cleaned = cleaned.dropna(subset=[col])
+        # 1. Fill original NaN values with the string 'Unknown'
+        cleaned[col] = cleaned[col].fillna("Unknown")
 
-        # Split multi-valued cells into separate rows
-        cleaned[col] = cleaned[col].str.split("/|;|,")
+        # 2. Split multi-valued cells
+        cleaned[col] = cleaned[col].astype(str).str.split("/|;|,")
         cleaned = cleaned.explode(col)
+
+        # 3. Clean up whitespace and any empty strings created by splitting
         cleaned[col] = cleaned[col].str.strip()
+        cleaned[col] = cleaned[col].replace(["", "nan"], "Unknown")
 
-        # normalize values to canonical forms
+        # 4. Apply canonical mapping to standardize terms
         if col in CANONICAL_MAP:
             mapping = CANONICAL_MAP[col]
+            # Use .str.lower() for case-insensitive mapping
             cleaned[col] = cleaned[col].str.lower().map(mapping).fillna(cleaned[col])
 
+    # 5. Apply special rule for 'Type Subject' after all other processing
+    if "Type Subject" in columns:
+        # Identify values that are NOT 'Healthy' or 'Unknown'
+        is_healthy = cleaned["Type Subject"] == "Healthy"
+        is_unknown = cleaned["Type Subject"] == "Unknown"
+        # Set all other values to 'Clinical'
+        cleaned.loc[~is_healthy & ~is_unknown, "Type Subject"] = "Clinical"
+
     return cleaned[columns]
 
 
@@ -62,7 +76,16 @@ def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
 
     for col in columns:
         color_map = COLUMN_COLOR_MAPS.get(col, {})
-        unique_values = df[col].unique()
+
+        # Sort unique values to ensure "Unknown" appears at the bottom
+        all_unique = df[col].unique()
+        # Separate "Unknown" and sort the rest alphabetically
+        known_values = sorted([v for v in all_unique if v != "Unknown"])
+        unique_values = known_values
+        # Add "Unknown" to the end if it exists
+        if "Unknown" in all_unique:
+            unique_values.append("Unknown")
+
         for val in unique_values:
             if (col, val) not in node_index:
                 node_index[(col, val)] = len(node_labels)

From 1fa1e3cb1b420c400aa64eb6d8866dfe57b77797 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 20:55:57 +0200
Subject: [PATCH 13/30] fixing the colours encoding

---
 plot.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/plot.py b/plot.py
index 0122cda0..fbbd7bbb 100644
--- a/plot.py
+++ b/plot.py
@@ -60,11 +60,9 @@ def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
 
     # 5. Apply special rule for 'Type Subject' after all other processing
     if "Type Subject" in columns:
-        # Identify values that are NOT 'Healthy' or 'Unknown'
-        is_healthy = cleaned["Type Subject"] == "Healthy"
-        is_unknown = cleaned["Type Subject"] == "Unknown"
-        # Set all other values to 'Clinical'
-        cleaned.loc[~is_healthy & ~is_unknown, "Type Subject"] = "Clinical"
+        # The user wants to preserve original labels but color them as 'Clinical'.
+        # The relabeling to 'Clinical' is now removed. The coloring logic will handle this.
+        pass
 
     return cleaned[columns]
 
@@ -90,7 +88,12 @@ def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
             if (col, val) not in node_index:
                 node_index[(col, val)] = len(node_labels)
                 node_labels.append(val)
-                node_colors.append(color_map.get(val, "#94a3b8"))
+
+                # Use "Clinical" color for specific pathologies
+                node_color = color_map.get(val, "#94a3b8")
+                if col == "Type Subject" and val not in ["Healthy", "Unknown"]:
+                    node_color = color_map.get("Clinical", "#94a3b8")
+                node_colors.append(node_color)
 
     sources: list[int] = []
     targets: list[int] = []
@@ -119,6 +122,11 @@ def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
 
                 # Assign color to the link based on the source node
                 source_color = source_color_map.get(source_val, "#94a3b8")
+                if col_from == "Type Subject" and source_val not in [
+                    "Healthy",
+                    "Unknown",
+                ]:
+                    source_color = source_color_map.get("Clinical", "#94a3b8")
                 link_colors.append(hex_to_rgba(source_color))
 
     return node_labels, node_colors, sources, targets, values, link_colors

From 2863f9aae432bce2498c3fb5c89163d6ad24a1a3 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 20:58:40 +0200
Subject: [PATCH 14/30] implement more details

---
 plot.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/plot.py b/plot.py
index fbbd7bbb..e0fe22bf 100644
--- a/plot.py
+++ b/plot.py
@@ -99,6 +99,7 @@ def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
     targets: list[int] = []
     values: list[int] = []
     link_colors: list[str] = []
+    link_hover_labels: list[str] = []
 
     for idx in range(len(columns) - 1):
         col_from, col_to = columns[idx], columns[idx + 1]
@@ -119,6 +120,7 @@ def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
                 sources.append(source_node_idx)
                 targets.append(target_node_idx)
                 values.append(count)
+                link_hover_labels.append(f"{source_val} → {target_val}: {count}")
 
                 # Assign color to the link based on the source node
                 source_color = source_color_map.get(source_val, "#94a3b8")
@@ -129,7 +131,27 @@ def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
                     source_color = source_color_map.get("Clinical", "#94a3b8")
                 link_colors.append(hex_to_rgba(source_color))
 
-    return node_labels, node_colors, sources, targets, values, link_colors
+    # Add counts and percentages to the first column labels
+    first_col_name = columns[0]
+    first_col_counts = df[first_col_name].value_counts()
+    total_count = first_col_counts.sum()
+
+    for i, label in enumerate(node_labels):
+        col, val = next((k for k, v in node_index.items() if v == i), (None, None))
+        if col == first_col_name:
+            count = first_col_counts.get(val, 0)
+            percentage = (count / total_count) * 100 if total_count > 0 else 0
+            node_labels[i] = f"{label} ({count}, {percentage:.1f}%)"
+
+    return (
+        node_labels,
+        node_colors,
+        sources,
+        targets,
+        values,
+        link_colors,
+        link_hover_labels,
+    )
 
 
 def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
@@ -140,6 +162,7 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
         targets,
         values,
         link_colors,
+        link_hover_labels,
     ) = _build_sankey_data(df, columns)
 
     sankey = go.Sankey(
@@ -155,6 +178,8 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
             target=targets,
             value=values,
             color=link_colors,
+            hovertemplate="%{customdata}<extra></extra>",
+            customdata=link_hover_labels,
         ),
     )
 

From b5d915116a366c16d070acf7bec156e87dbecf8a Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 21:01:50 +0200
Subject: [PATCH 15/30] updating the title

---
 plot.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/plot.py b/plot.py
index e0fe22bf..c110661b 100644
--- a/plot.py
+++ b/plot.py
@@ -186,7 +186,37 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
     fig = go.Figure(sankey)
 
     fig.update_layout(
+        title_text="Sankey diagrams of EEGDash Datasets by Population, Modality, and Cognitive Domain",
         font=dict(size=12),
+        annotations=[
+            dict(
+                x=0,
+                y=1.05,
+                xref="paper",
+                yref="paper",
+                text="Population Type",
+                showarrow=False,
+                font=dict(size=14, color="black"),
+            ),
+            dict(
+                x=0.5,
+                y=1.05,
+                xref="paper",
+                yref="paper",
+                text="Experimental Modality",
+                showarrow=False,
+                font=dict(size=14, color="black"),
+            ),
+            dict(
+                x=1,
+                y=1.05,
+                xref="paper",
+                yref="paper",
+                text="Cognitive Domain",
+                showarrow=False,
+                font=dict(size=14, color="black"),
+            ),
+        ],
     )
     return fig
 

From 3fa8f83eb4e42f8bd182f752d653c4ed37588c16 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 21:14:48 +0200
Subject: [PATCH 16/30] more improve

---
 plot.py | 73 ++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 19 deletions(-)

diff --git a/plot.py b/plot.py
index c110661b..e9eadbfb 100644
--- a/plot.py
+++ b/plot.py
@@ -32,13 +32,24 @@ def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
         header=0,
         skipinitialspace=True,
     )
-    missing = [col for col in columns if col not in df.columns]
+    # Ensure n_subjects is read, as it's needed for weighting
+    all_columns = list(columns)
+    if "n_subjects" not in all_columns:
+        all_columns.append("n_subjects")
+
+    missing = [col for col in all_columns if col not in df.columns]
     if missing:
         msg = f"Columns not found in dataframe: {missing}"
         raise ValueError(msg)
 
     cleaned = df.copy()
 
+    # Fill missing n_subjects with 1 (to count as at least one dataset)
+    # and ensure the column is numeric integer type.
+    cleaned["n_subjects"] = (
+        pd.to_numeric(cleaned["n_subjects"], errors="coerce").fillna(1).astype(int)
+    )
+
     # Process each column for cleaning and normalization
     for col in columns:
         # 1. Fill original NaN values with the string 'Unknown'
@@ -64,7 +75,7 @@ def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
         # The relabeling to 'Clinical' is now removed. The coloring logic will handle this.
         pass
 
-    return cleaned[columns]
+    return cleaned[all_columns]
 
 
 def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
@@ -107,11 +118,23 @@ def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
         # Use the color from the source node for the link
         source_color_map = COLUMN_COLOR_MAPS.get(col_from, {})
 
-        # Group by source and target columns and count occurrences
-        grouped = df.groupby([col_from, col_to]).size().reset_index(name="count")
+        # Group by source and target, getting both sum of subjects and count of datasets
+        grouped = (
+            df.groupby([col_from, col_to])
+            .agg(
+                subject_sum=("n_subjects", "sum"),
+                dataset_count=("n_subjects", "size"),
+            )
+            .reset_index()
+        )
 
         for _, row in grouped.iterrows():
-            source_val, target_val, count = row[col_from], row[col_to], row["count"]
+            source_val, target_val, subject_sum, dataset_count = (
+                row[col_from],
+                row[col_to],
+                row["subject_sum"],
+                row["dataset_count"],
+            )
 
             source_node_idx = node_index.get((col_from, source_val))
             target_node_idx = node_index.get((col_to, target_val))
@@ -119,8 +142,11 @@ def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
             if source_node_idx is not None and target_node_idx is not None:
                 sources.append(source_node_idx)
                 targets.append(target_node_idx)
-                values.append(count)
-                link_hover_labels.append(f"{source_val} → {target_val}: {count}")
+                values.append(subject_sum)  # Weight links by sum of subjects
+                link_hover_labels.append(
+                    f"{source_val} → {target_val}:<br>"
+                    f"{subject_sum} subjects in {dataset_count} datasets"
+                )
 
                 # Assign color to the link based on the source node
                 source_color = source_color_map.get(source_val, "#94a3b8")
@@ -131,17 +157,26 @@ def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
                     source_color = source_color_map.get("Clinical", "#94a3b8")
                 link_colors.append(hex_to_rgba(source_color))
 
-    # Add counts and percentages to the first column labels
+    # Add counts (subjects and datasets) and percentages to the first column labels
     first_col_name = columns[0]
-    first_col_counts = df[first_col_name].value_counts()
-    total_count = first_col_counts.sum()
+    first_col_stats = df.groupby(first_col_name).agg(
+        subject_sum=("n_subjects", "sum"),
+        dataset_count=("n_subjects", "size"),
+    )
+    total_subjects = first_col_stats["subject_sum"].sum()
 
     for i, label in enumerate(node_labels):
         col, val = next((k for k, v in node_index.items() if v == i), (None, None))
-        if col == first_col_name:
-            count = first_col_counts.get(val, 0)
-            percentage = (count / total_count) * 100 if total_count > 0 else 0
-            node_labels[i] = f"{label} ({count}, {percentage:.1f}%)"
+        if col == first_col_name and val in first_col_stats.index:
+            stats = first_col_stats.loc[val]
+            subject_sum = stats["subject_sum"]
+            dataset_count = stats["dataset_count"]
+            percentage = (
+                (subject_sum / total_subjects) * 100 if total_subjects > 0 else 0
+            )
+            node_labels[i] = (
+                f"{label}<br>({subject_sum} subjects, {dataset_count} datasets, {percentage:.1f}%)"
+            )
 
     return (
         node_labels,
@@ -168,7 +203,7 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
     sankey = go.Sankey(
         arrangement="snap",
         node=dict(
-            pad=18,
+            pad=30,
             thickness=18,
             label=labels,
             color=colors,
@@ -187,7 +222,7 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
 
     fig.update_layout(
         title_text="Sankey diagrams of EEGDash Datasets by Population, Modality, and Cognitive Domain",
-        font=dict(size=12),
+        font=dict(size=14),
         annotations=[
             dict(
                 x=0,
@@ -196,7 +231,7 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
                 yref="paper",
                 text="Population Type",
                 showarrow=False,
-                font=dict(size=14, color="black"),
+                font=dict(size=16, color="black"),
             ),
             dict(
                 x=0.5,
@@ -205,7 +240,7 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
                 yref="paper",
                 text="Experimental Modality",
                 showarrow=False,
-                font=dict(size=14, color="black"),
+                font=dict(size=16, color="black"),
             ),
             dict(
                 x=1,
@@ -214,7 +249,7 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
                 yref="paper",
                 text="Cognitive Domain",
                 showarrow=False,
-                font=dict(size=14, color="black"),
+                font=dict(size=16, color="black"),
             ),
         ],
     )

From 980ee687adf5c9daf332cc6832ff10f834300132 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 21:17:17 +0200
Subject: [PATCH 17/30] updating label side

---
 plot.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/plot.py b/plot.py
index e9eadbfb..65d6a83d 100644
--- a/plot.py
+++ b/plot.py
@@ -207,6 +207,7 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
             thickness=18,
             label=labels,
             color=colors,
+            align="left",  # Align all labels to the left of the node bars
         ),
         link=dict(
             source=sources,
@@ -223,6 +224,7 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
     fig.update_layout(
         title_text="Sankey diagrams of EEGDash Datasets by Population, Modality, and Cognitive Domain",
         font=dict(size=14),
+        margin=dict(b=100),  # Add bottom margin to make space for the note
         annotations=[
             dict(
                 x=0,
@@ -251,6 +253,17 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
                 showarrow=False,
                 font=dict(size=16, color="black"),
             ),
+            dict(
+                x=0,
+                y=-0.15,  # Position the note below the chart
+                xref="paper",
+                yref="paper",
+                text='<b>Note on "Unknown" category:</b> This large portion represents datasets that are still pending categorization.',
+                showarrow=False,
+                align="left",
+                xanchor="left",
+                font=dict(size=12, color="dimgray"),
+            ),
         ],
     )
     return fig

From da80138210a79ee97cc05ae713e00f1c424d7a09 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 21:25:58 +0200
Subject: [PATCH 18/30] commit the files

---
 eegdash/sankey_helpers.py => docs/plot_dataset/colours.py | 0
 plot.py => docs/plot_dataset/plot_sankey.py               | 7 +------
 2 files changed, 1 insertion(+), 6 deletions(-)
 rename eegdash/sankey_helpers.py => docs/plot_dataset/colours.py (100%)
 rename plot.py => docs/plot_dataset/plot_sankey.py (99%)

diff --git a/eegdash/sankey_helpers.py b/docs/plot_dataset/colours.py
similarity index 100%
rename from eegdash/sankey_helpers.py
rename to docs/plot_dataset/colours.py
diff --git a/plot.py b/docs/plot_dataset/plot_sankey.py
similarity index 99%
rename from plot.py
rename to docs/plot_dataset/plot_sankey.py
index 65d6a83d..e201e4ee 100644
--- a/plot.py
+++ b/docs/plot_dataset/plot_sankey.py
@@ -15,12 +15,7 @@
 
 import pandas as pd
 import plotly.graph_objects as go
-
-from eegdash.sankey_helpers import (
-    CANONICAL_MAP,
-    COLUMN_COLOR_MAPS,
-    hex_to_rgba,
-)
+from colours import CANONICAL_MAP, COLUMN_COLOR_MAPS, hex_to_rgba
 
 DEFAULT_COLUMNS = ["Type Subject", "modality of exp", "type of exp"]
 

From 9a9ec14a070d2f31624a7996929b57c52964cd7d Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 21:28:10 +0200
Subject: [PATCH 19/30] fixing issue

---
 docs/source/dataset_summary/table.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/source/dataset_summary/table.rst b/docs/source/dataset_summary/table.rst
index 2090c6d8..3975b97f 100644
--- a/docs/source/dataset_summary/table.rst
+++ b/docs/source/dataset_summary/table.rst
@@ -24,8 +24,7 @@ In addition, EEG-DaSh will incorporate a subset of the data converted from `NEMA
    </figcaption>
    </figure>
 
-Pathology, modality, and dataset type now surface as consistent color-coded tags so you can scan the table at a glance and reuse the same visual language as the model catalog.
-
+Pathology, modality, and dataset type now surface as consistent color-coded tags so you can scan the table at a glance.
 .. raw:: html
 
   <!-- jQuery + DataTables core -->

From 4b36cb604df64a3895a034926a9c9aeab5bc1a17 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 21:29:53 +0200
Subject: [PATCH 20/30] usa english

---
 docs/source/dataset_summary.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst
index b4b607ff..642fbb2e 100644
--- a/docs/source/dataset_summary.rst
+++ b/docs/source/dataset_summary.rst
@@ -10,13 +10,11 @@
 
 .. rst-class:: dataset-summary-article
 
-Datasets 
-=========
+Datasets Catalog
+================
 
 To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
 
-The archive is currently still in :bdg-danger:`beta testing` mode, so be kind. 
-
 .. raw:: html
 
    <script src="https://cdn.plot.ly/plotly-3.1.0.min.js"></script>
@@ -34,3 +32,5 @@ The archive is currently still in :bdg-danger:`beta testing` mode, so be kind.
    .. tab-item:: Landscape
 
       .. include:: dataset_summary/bubble.rst
+
+The archive is currently still in :bdg-danger:`beta testing` mode, so be kind. 

From 559348c8c7534949bfe77a507513423cbbd5606c Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 22:00:18 +0200
Subject: [PATCH 21/30] refactoring the plot

---
 docs/plot_dataset/__init__.py    |  12 +
 docs/plot_dataset/bubble.py      | 404 ++++++++++++++++
 docs/plot_dataset/plot_sankey.py |   6 +-
 docs/plot_dataset/ridgeline.py   | 331 +++++++++++++
 docs/plot_dataset/utils.py       | 109 +++++
 docs/prepare_summary_tables.py   | 791 +------------------------------
 6 files changed, 873 insertions(+), 780 deletions(-)
 create mode 100644 docs/plot_dataset/__init__.py
 create mode 100644 docs/plot_dataset/bubble.py
 create mode 100644 docs/plot_dataset/ridgeline.py
 create mode 100644 docs/plot_dataset/utils.py

diff --git a/docs/plot_dataset/__init__.py b/docs/plot_dataset/__init__.py
new file mode 100644
index 00000000..a258d27f
--- /dev/null
+++ b/docs/plot_dataset/__init__.py
@@ -0,0 +1,12 @@
+"""Plot generation utilities for EEGDash documentation."""
+
+from .bubble import generate_dataset_bubble  # noqa: F401
+from .colours import (  # noqa: F401
+    CANONICAL_MAP,
+    COLUMN_COLOR_MAPS,
+    MODALITY_COLOR_MAP,
+    PATHOLOGY_COLOR_MAP,
+    TYPE_COLOR_MAP,
+    hex_to_rgba,
+)
+from .ridgeline import generate_modality_ridgeline  # noqa: F401
diff --git a/docs/plot_dataset/bubble.py b/docs/plot_dataset/bubble.py
new file mode 100644
index 00000000..3e5c3ae5
--- /dev/null
+++ b/docs/plot_dataset/bubble.py
@@ -0,0 +1,404 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+
+try:  # Allow execution as a script or module
+    from .colours import MODALITY_COLOR_MAP
+    from .utils import get_dataset_url, human_readable_size, primary_modality, safe_int
+except ImportError:  # pragma: no cover - fallback for direct script execution
+    from colours import MODALITY_COLOR_MAP  # type: ignore
+    from utils import (  # type: ignore
+        get_dataset_url,
+        human_readable_size,
+        primary_modality,
+        safe_int,
+    )
+
+__all__ = ["generate_dataset_bubble"]
+
+
+def _to_numeric_median_list(val) -> float | None:
+    if pd.isna(val):
+        return None
+    try:
+        return float(val)
+    except Exception:
+        pass
+
+    s = str(val).strip().strip("[]")
+    if not s:
+        return None
+
+    try:
+        nums = [float(x) for x in s.split(",") if str(x).strip()]
+        if not nums:
+            return None
+        return float(np.median(nums))
+    except Exception:
+        return None
+
+
+def _format_int(value) -> str:
+    if value is None or pd.isna(value):
+        return ""
+    try:
+        return str(int(round(float(value))))
+    except Exception:
+        return str(value)
+
+
+def _build_hover_template(x_field: str, y_field: str) -> tuple[str, str]:
+    x_map = {
+        "duration_h": "Duration (x): %{x:.2f} h",
+        "size_gb": "Size (x): %{x:.2f} GB",
+        "tasks": "Tasks (x): %{x:,}",
+        "subjects": "Subjects (x): %{x:,}",
+    }
+    y_map = {
+        "subjects": "Subjects (y): %{y:,}",
+    }
+    x_hover = x_map.get(x_field, "Records (x): %{x:,}")
+    y_hover = y_map.get(y_field, "Records (y): %{y:,}")
+    return x_hover, y_hover
+
+
+def generate_dataset_bubble(
+    df: pd.DataFrame,
+    out_html: str | Path,
+    *,
+    x_var: str = "records",
+    max_width: int = 1280,
+    height: int = 720,
+) -> Path:
+    """Generate the dataset landscape bubble chart."""
+    data = df.copy()
+    data = data[data["dataset"].str.lower() != "test"]
+
+    data["duration_h"] = pd.to_numeric(
+        data.get("duration_hours_total"), errors="coerce"
+    )
+    data["subjects"] = pd.to_numeric(data.get("n_subjects"), errors="coerce")
+    data["records"] = pd.to_numeric(data.get("n_records"), errors="coerce")
+    data["tasks"] = pd.to_numeric(data.get("n_tasks"), errors="coerce")
+    data["size_bytes"] = pd.to_numeric(data.get("size_bytes"), errors="coerce")
+
+    data["sfreq"] = data["sampling_freqs"].map(_to_numeric_median_list)
+    data["nchans"] = data["nchans_set"].map(_to_numeric_median_list)
+
+    data["modality_label"] = data.get("modality of exp").apply(primary_modality)
+
+    GB = 1024**3
+    data["size_gb"] = data["size_bytes"] / GB
+
+    x_field = (
+        x_var
+        if x_var in {"records", "duration_h", "size_gb", "tasks", "subjects"}
+        else "records"
+    )
+    axis_labels = {
+        "records": "#Records",
+        "duration_h": "Duration (hours)",
+        "size_gb": "Size (GB)",
+        "tasks": "#Tasks",
+        "subjects": "#Subjects",
+    }
+    x_label = f"{axis_labels[x_field]} (log scale)"
+    y_field = "subjects" if x_field != "subjects" else "records"
+    y_label = f"{axis_labels[y_field]} (log scale)"
+    x_hover, y_hover = _build_hover_template(x_field, y_field)
+
+    required_columns = {x_field, y_field, "size_gb"}
+    data = data.replace([np.inf, -np.inf], np.nan)
+    data = data.dropna(subset=list(required_columns))
+    data = data[(data[x_field] > 0) & (data[y_field] > 0)]
+
+    data["dataset_url"] = data["dataset"].apply(get_dataset_url)
+
+    out_path = Path(out_html)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if data.empty:
+        empty_html = """
+<div class="dataset-loading" id="dataset-loading">No dataset records available for plotting.</div>
+"""
+        out_path.write_text(empty_html, encoding="utf-8")
+        return out_path
+
+    size_max = data["size_gb"].max()
+    if not np.isfinite(size_max) or size_max <= 0:
+        size_max = 1.0
+    sizeref = (2.0 * size_max) / (40.0**2)
+
+    sfreq_str = data["sfreq"].map(_format_int)
+    nchans_str = data["nchans"].map(_format_int)
+
+    fig = px.scatter(
+        data,
+        x=x_field,
+        y=y_field,
+        size="size_gb",
+        color="modality_label",
+        hover_name="dataset",
+        custom_data=[
+            data["dataset"],
+            data["subjects"],
+            data["records"],
+            data["tasks"],
+            nchans_str,
+            sfreq_str,
+            data["size_bytes"].map(
+                lambda bytes_: human_readable_size(safe_int(bytes_, 0))
+            ),
+            data["modality_label"],
+            data["dataset_url"],
+        ],
+        size_max=40,
+        labels={
+            y_field: y_label,
+            "modality_label": "Modality",
+            x_field: x_label,
+        },
+        color_discrete_map=MODALITY_COLOR_MAP,
+        title="",
+        category_orders={
+            "modality_label": [
+                label
+                for label in MODALITY_COLOR_MAP.keys()
+                if label in data["modality_label"].unique()
+            ]
+        },
+        log_x=True,
+        log_y=True,
+    )
+
+    numeric_x = pd.to_numeric(data[x_field], errors="coerce")
+    numeric_y = pd.to_numeric(data[y_field], errors="coerce")
+    mask = (
+        np.isfinite(numeric_x)
+        & np.isfinite(numeric_y)
+        & (numeric_x > 0)
+        & (numeric_y > 0)
+    )
+
+    fit_annotation_text = None
+    if mask.sum() >= 2:
+        log_x = np.log10(numeric_x[mask])
+        log_y = np.log10(numeric_y[mask])
+        ss_tot = np.sum((log_y - log_y.mean()) ** 2)
+        if np.ptp(log_x) > 0 and np.ptp(log_y) > 0 and ss_tot > 0:
+            slope, intercept = np.polyfit(log_x, log_y, 1)
+            line_log_x = np.linspace(log_x.min(), log_x.max(), 200)
+            line_x = 10**line_log_x
+            line_y = 10 ** (slope * line_log_x + intercept)
+            fig.add_trace(
+                go.Scatter(
+                    x=line_x,
+                    y=line_y,
+                    mode="lines",
+                    name="log-log fit",
+                    line=dict(color="#111827", width=2, dash="dot"),
+                    hoverinfo="skip",
+                    showlegend=False,
+                )
+            )
+            residuals = log_y - (slope * log_x + intercept)
+            r_squared = 1 - np.sum(residuals**2) / ss_tot
+            fit_annotation_text = f"log-log OLS fit R² = {r_squared:.3f}"
+
+    hover_template = (
+        "<b>%{customdata[0]}</b>"
+        f"<br>{x_hover}"
+        f"<br>{y_hover}"
+        "<br>Subjects (total): %{customdata[1]:,}"
+        "<br>Records (total): %{customdata[2]:,}"
+        "<br>Tasks: %{customdata[3]:,}"
+        "<br>Channels: %{customdata[4]}"
+        "<br>Sampling: %{customdata[5]} Hz"
+        "<br>Size: %{customdata[6]}"
+        "<br>Modality: %{customdata[7]}"
+        "<br><i>Click bubble to open dataset page</i>"
+        "<extra></extra>"
+    )
+
+    for trace in fig.data:
+        mode = getattr(trace, "mode", "") or ""
+        if "markers" not in mode:
+            continue
+        trace.marker.update(
+            sizemin=6,
+            sizemode="area",
+            sizeref=sizeref,
+            line=dict(width=0.6, color="rgba(0,0,0,0.3)"),
+            opacity=0.75,
+        )
+        trace.hovertemplate = hover_template
+
+    fig.update_layout(
+        height=height,
+        width=max_width,
+        margin=dict(l=60, r=40, t=80, b=60),
+        template="plotly_white",
+        legend=dict(
+            title="Modality",
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=0.99,
+        ),
+        font=dict(
+            family="Inter, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif",
+            size=14,
+        ),
+        title=dict(text="", x=0.01, xanchor="left", y=0.98, yanchor="top"),
+        autosize=True,
+    )
+
+    if fit_annotation_text:
+        fig.add_annotation(
+            xref="paper",
+            yref="paper",
+            x=0.02,
+            y=0.98,
+            text=fit_annotation_text,
+            showarrow=False,
+            font=dict(size=15, color="#111827"),
+            bgcolor="rgba(255,255,255,0.75)",
+            bordercolor="rgba(17,24,39,0.25)",
+            borderwidth=1,
+            borderpad=6,
+        )
+
+    fig.update_xaxes(
+        showgrid=True,
+        gridcolor="rgba(0,0,0,0.12)",
+        zeroline=False,
+        type="log",
+        dtick=1,
+    )
+    fig.update_yaxes(
+        showgrid=True,
+        gridcolor="rgba(0,0,0,0.12)",
+        zeroline=False,
+        type="log",
+        dtick=1,
+    )
+
+    html_content = fig.to_html(
+        full_html=False,
+        include_plotlyjs=False,
+        div_id="dataset-bubble",
+        config={
+            "responsive": True,
+            "displaylogo": False,
+            "modeBarButtonsToRemove": ["lasso2d", "select2d"],
+            "toImageButtonOptions": {
+                "format": "png",
+                "filename": "dataset_landscape",
+                "height": height,
+                "width": max_width,
+                "scale": 2,
+            },
+        },
+    )
+
+    styled_html = f"""
+<style>
+#dataset-bubble {{
+    width: 100% !important;
+    max-width: {max_width}px;
+    height: {height}px !important;
+    min-height: {height}px;
+    margin: 0 auto;
+}}
+#dataset-bubble .plotly-graph-div {{
+    width: 100% !important;
+    height: 100% !important;
+}}
+.dataset-loading {{
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: {height}px;
+    font-family: Inter, system-ui, sans-serif;
+    color: #6b7280;
+}}
+</style>
+<div class="dataset-loading" id="dataset-loading">Loading dataset landscape...</div>
+{html_content}
+<script>
+document.addEventListener('DOMContentLoaded', function() {{
+    const loading = document.getElementById('dataset-loading');
+    const plot = document.getElementById('dataset-bubble');
+
+    function showPlot() {{
+        if (loading) {{
+            loading.style.display = 'none';
+        }}
+        if (plot) {{
+            plot.style.display = 'block';
+        }}
+    }}
+
+    function hookPlotlyClick(attempts) {{
+        if (!plot || typeof plot.on !== 'function') {{
+            if (attempts < 40) {{
+                window.setTimeout(function() {{ hookPlotlyClick(attempts + 1); }}, 60);
+            }}
+            return;
+        }}
+        plot.on('plotly_click', function(evt) {{
+            const point = evt && evt.points && evt.points[0];
+            const url = point && point.customdata && point.customdata[8];
+            if (url) {{
+                window.open(url, '_blank', 'noopener');
+            }}
+        }});
+        showPlot();
+    }}
+
+    hookPlotlyClick(0);
+    showPlot();
+}});
+</script>
+"""
+
+    out_path.write_text(styled_html, encoding="utf-8")
+    return out_path
+
+
+def _read_dataset(path: Path) -> pd.DataFrame:
+    return pd.read_csv(path, index_col=False, header=0, skipinitialspace=True)
+
+
+def main() -> None:
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Generate the dataset bubble chart.")
+    parser.add_argument("source", type=Path, help="Path to dataset summary CSV")
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("dataset_bubble.html"),
+        help="Output HTML file",
+    )
+    parser.add_argument(
+        "--x-axis",
+        choices=["records", "duration_h", "size_gb", "tasks", "subjects"],
+        default="records",
+        help="Field for the bubble chart x-axis",
+    )
+    args = parser.parse_args()
+
+    df = _read_dataset(args.source)
+    output_path = generate_dataset_bubble(df, args.output, x_var=args.x_axis)
+    print(f"Bubble chart saved to {output_path.resolve()}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/plot_dataset/plot_sankey.py b/docs/plot_dataset/plot_sankey.py
index e201e4ee..f33b6534 100644
--- a/docs/plot_dataset/plot_sankey.py
+++ b/docs/plot_dataset/plot_sankey.py
@@ -15,7 +15,11 @@
 
 import pandas as pd
 import plotly.graph_objects as go
-from colours import CANONICAL_MAP, COLUMN_COLOR_MAPS, hex_to_rgba
+
+try:  # Support execution as a script or as a package module
+    from .colours import CANONICAL_MAP, COLUMN_COLOR_MAPS, hex_to_rgba
+except ImportError:  # pragma: no cover - fallback for direct script execution
+    from colours import CANONICAL_MAP, COLUMN_COLOR_MAPS, hex_to_rgba
 
 DEFAULT_COLUMNS = ["Type Subject", "modality of exp", "type of exp"]
 
diff --git a/docs/plot_dataset/ridgeline.py b/docs/plot_dataset/ridgeline.py
new file mode 100644
index 00000000..34d5a83f
--- /dev/null
+++ b/docs/plot_dataset/ridgeline.py
@@ -0,0 +1,331 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.utils import PlotlyJSONEncoder
+from scipy.stats import gaussian_kde
+
+try:  # Allow execution as a script or module
+    from .colours import MODALITY_COLOR_MAP, hex_to_rgba
+    from .utils import get_dataset_url, primary_modality
+except ImportError:  # pragma: no cover - fallback for direct script execution
+    from colours import MODALITY_COLOR_MAP, hex_to_rgba  # type: ignore
+    from utils import get_dataset_url, primary_modality  # type: ignore
+
+__all__ = ["generate_modality_ridgeline"]
+
+
+def generate_modality_ridgeline(
+    df: pd.DataFrame,
+    out_html: str | Path,
+    *,
+    rng_seed: int = 42,
+) -> Path | None:
+    """Generate a ridgeline (KDE) plot showing participants per modality."""
+    data = df[df["dataset"].str.lower() != "test"].copy()
+    data["modality_label"] = data["modality of exp"].apply(primary_modality)
+    data["n_subjects"] = pd.to_numeric(data["n_subjects"], errors="coerce")
+    data = data.dropna(subset=["n_subjects"])
+    data = data[data["modality_label"] != "Other"]
+
+    if data.empty:
+        return None
+
+    median_participants = (
+        data.groupby("modality_label")["n_subjects"].median().sort_values()
+    )
+    order = [
+        label
+        for label in median_participants.index
+        if label in data["modality_label"].unique()
+    ]
+    if not order:
+        return None
+
+    fig = go.Figure()
+    rng = np.random.default_rng(rng_seed)
+    amplitude = 0.6
+    row_spacing = 0.95
+
+    for idx, label in enumerate(order):
+        subset = data[data["modality_label"] == label].copy()
+        values = subset["n_subjects"].astype(float).dropna()
+        if len(values) < 3:
+            continue
+
+        subset["dataset_url"] = subset["dataset"].apply(get_dataset_url)
+        log_vals = np.log10(values)
+        grid = np.linspace(log_vals.min() - 0.25, log_vals.max() + 0.25, 240)
+        kde = gaussian_kde(log_vals)
+        density = kde(grid)
+        if density.max() <= 0:
+            continue
+
+        density_norm = density / density.max()
+        baseline = idx * row_spacing
+        y_curve = baseline + density_norm * amplitude
+        x_curve = 10**grid
+
+        color = MODALITY_COLOR_MAP.get(label, "#6b7280")
+        fill = hex_to_rgba(color, 0.28)
+
+        fig.add_trace(
+            go.Scatter(
+                x=np.concatenate([x_curve, x_curve[::-1]]),
+                y=np.concatenate([y_curve, np.full_like(y_curve, baseline)]),
+                name=label,
+                fill="toself",
+                fillcolor=fill,
+                line=dict(color="rgba(0,0,0,0)"),
+                hoverinfo="skip",
+                showlegend=False,
+            )
+        )
+
+        fig.add_trace(
+            go.Scatter(
+                x=x_curve,
+                y=y_curve,
+                mode="lines",
+                name=label,
+                line=dict(color=color, width=2),
+                hovertemplate=f"<b>{label}</b><br>#Participants: %{{x:.0f}}<extra></extra>",
+                showlegend=False,
+            )
+        )
+
+        jitter = rng.uniform(0.02, amplitude * 0.5, size=len(values))
+        median_val = float(median_participants.get(label, np.nan))
+        custom_data = np.column_stack(
+            [subset["dataset"].to_numpy(), subset["dataset_url"].to_numpy()]
+        )
+        fig.add_trace(
+            go.Scatter(
+                x=values,
+                y=np.full_like(values, baseline) + jitter,
+                mode="markers",
+                name=label,
+                marker=dict(color=color, size=8, opacity=0.6),
+                customdata=custom_data,
+                hovertemplate="<b><a href='%{customdata[1]}' target='_parent'>%{customdata[0]}</a></b><br>#Participants: %{x}<br><i>Click to view dataset details</i><extra></extra>",
+                showlegend=False,
+            )
+        )
+
+        if np.isfinite(median_val) and median_val > 0:
+            fig.add_trace(
+                go.Scatter(
+                    x=[median_val, median_val],
+                    y=[baseline, baseline + amplitude],
+                    mode="lines",
+                    line=dict(color=color, width=2, dash="dash"),
+                    hovertemplate=(
+                        f"<b>{label}</b><br>Median participants: {median_val:.0f}<extra></extra>"
+                    ),
+                    showlegend=False,
+                )
+            )
+
+    if not fig.data:
+        return None
+
+    kde_height = max(650, 150 * len(order))
+    date_stamp = datetime.now().strftime("%d/%m/%Y")
+    fig.update_layout(
+        height=kde_height,
+        width=1200,
+        template="plotly_white",
+        xaxis=dict(
+            type="log",
+            title=dict(text="Number of Participants (Log Scale)", font=dict(size=18)),
+            showgrid=True,
+            gridcolor="rgba(0,0,0,0.08)",
+            zeroline=False,
+            dtick=1,
+            minor=dict(showgrid=True, gridcolor="rgba(0,0,0,0.04)"),
+            tickfont=dict(size=14),
+        ),
+        yaxis=dict(
+            title=dict(text="Modality", font=dict(size=18)),
+            tickmode="array",
+            tickvals=[idx * row_spacing for idx in range(len(order))],
+            ticktext=order,
+            showgrid=False,
+            range=[-0.25, max(0.35, (len(order) - 1) * row_spacing + amplitude + 0.25)],
+            tickfont=dict(size=14),
+        ),
+        showlegend=False,
+        margin=dict(l=120, r=40, t=108, b=80),
+        title=dict(
+            text=f"<br><sub>Based on EEG-Dash datasets available at {date_stamp}.</sub>",
+            x=0.5,
+            xanchor="center",
+            y=0.98,
+            yanchor="top",
+            font=dict(size=20),
+        ),
+        autosize=True,
+        font=dict(size=16),
+    )
+
+    fig.add_annotation(
+        xref="paper",
+        yref="paper",
+        x=0.98,
+        y=0.02,
+        text="Visual studies consistently use the<br>largest sample sizes, typically 20-30 participants",
+        showarrow=False,
+        font=dict(size=14, color="#111827"),
+        bgcolor="rgba(255,255,255,0.9)",
+        bordercolor="rgba(17,24,39,0.3)",
+        borderwidth=1,
+        borderpad=8,
+        xanchor="right",
+        yanchor="bottom",
+    )
+
+    plot_config = {
+        "responsive": True,
+        "displaylogo": False,
+        "modeBarButtonsToRemove": ["lasso2d", "select2d"],
+        "toImageButtonOptions": {
+            "format": "png",
+            "filename": "participant_kde",
+            "height": kde_height,
+            "width": 1200,
+            "scale": 2,
+        },
+    }
+
+    fig_spec = fig.to_plotly_json()
+    data_json = json.dumps(fig_spec.get("data", []), cls=PlotlyJSONEncoder)
+    layout_json = json.dumps(fig_spec.get("layout", {}), cls=PlotlyJSONEncoder)
+    config_json = json.dumps(plot_config, cls=PlotlyJSONEncoder)
+
+    styled_html = f"""
+<style>
+#dataset-kde-modalities {{
+    width: 100% !important;
+    max-width: 1200px;
+    height: {kde_height}px !important;
+    min-height: {kde_height}px;
+    margin: 0 auto;
+    display: none;
+}}
+#dataset-kde-modalities.plotly-graph-div {{
+    width: 100% !important;
+    height: 100% !important;
+}}
+.kde-loading {{
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: {kde_height}px;
+    font-family: Inter, system-ui, sans-serif;
+    color: #6b7280;
+}}
+</style>
+<div class="kde-loading" id="kde-loading">Loading participant distribution...</div>
+<div id="dataset-kde-modalities" class="plotly-graph-div"></div>
+<script>
+(function() {{
+  const TARGET_ID = 'dataset-kde-modalities';
+  const FIG_DATA = {data_json};
+  const FIG_LAYOUT = {layout_json};
+  const FIG_CONFIG = {config_json};
+
+  function onReady(callback) {{
+    if (document.readyState === 'loading') {{
+      document.addEventListener('DOMContentLoaded', callback, {{ once: true }});
+    }} else {{
+      callback();
+    }}
+  }}
+
+  function renderPlot() {{
+    const container = document.getElementById(TARGET_ID);
+    if (!container) {{
+      return;
+    }}
+
+    const draw = () => {{
+      if (!window.Plotly) {{
+        window.requestAnimationFrame(draw);
+        return;
+      }}
+
+      window.Plotly.newPlot(TARGET_ID, FIG_DATA, FIG_LAYOUT, FIG_CONFIG).then((plot) => {{
+        const loading = document.getElementById('kde-loading');
+        if (loading) {{
+          loading.style.display = 'none';
+        }}
+        container.style.display = 'block';
+
+        plot.on('plotly_click', (event) => {{
+          const point = event.points && event.points[0];
+          if (!point || !point.customdata) {{
+            return;
+          }}
+          const url = point.customdata[1];
+          if (url) {{
+            const resolved = new URL(url, window.location.href);
+            window.open(resolved.href, '_self');
+          }}
+        }});
+      }});
+    }};
+
+    draw();
+  }}
+
+  onReady(renderPlot);
+}})();
+</script>
+"""
+
+    out_path = Path(out_html)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(styled_html, encoding="utf-8")
+    return out_path
+
+
+def _read_dataset(path: Path) -> pd.DataFrame:
+    return pd.read_csv(path, index_col=False, header=0, skipinitialspace=True)
+
+
+def main() -> None:
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Generate the modality ridgeline plot from a dataset summary CSV."
+    )
+    parser.add_argument("source", type=Path, help="Path to dataset summary CSV")
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("dataset_kde_modalities.html"),
+        help="Output HTML file",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=42,
+        help="Random seed controlling jitter placement",
+    )
+    args = parser.parse_args()
+
+    df = _read_dataset(args.source)
+    output_path = generate_modality_ridgeline(df, args.output, rng_seed=args.seed)
+    if output_path is None:
+        print("Ridgeline plot could not be generated (insufficient data).")
+    else:
+        print(f"Ridgeline plot saved to {output_path.resolve()}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/plot_dataset/utils.py b/docs/plot_dataset/utils.py
new file mode 100644
index 00000000..fbce6502
--- /dev/null
+++ b/docs/plot_dataset/utils.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+
+try:  # Allow import both as package and script
+    from .colours import CANONICAL_MAP, MODALITY_COLOR_MAP
+except ImportError:  # pragma: no cover - fallback for direct script execution
+    from colours import CANONICAL_MAP, MODALITY_COLOR_MAP  # type: ignore
+
+__all__ = [
+    "get_dataset_url",
+    "human_readable_size",
+    "primary_modality",
+    "safe_int",
+]
+
+_SEPARATORS = ("/", "|", ";")
+
+
+def primary_modality(value: Any) -> str:
+    """Return the canonical modality label for a record."""
+    if value is None:
+        return "Unknown"
+    if isinstance(value, float) and pd.isna(value):
+        return "Unknown"
+
+    text = str(value).strip()
+    if not text:
+        return "Unknown"
+
+    # normalise separators, keep order of appearance
+    for sep in _SEPARATORS:
+        text = text.replace(sep, ",")
+    tokens = [tok.strip() for tok in text.split(",") if tok.strip()]
+    if not tokens:
+        return "Unknown"
+
+    first = tokens[0]
+    canonical_map = CANONICAL_MAP.get("modality of exp", {})
+    lowered = first.lower()
+    canonical = canonical_map.get(lowered)
+    if canonical:
+        return canonical
+
+    if first in MODALITY_COLOR_MAP:
+        return first
+
+    title_variant = first.title()
+    if title_variant in MODALITY_COLOR_MAP:
+        return title_variant
+
+    return "Other"
+
+
+def safe_int(value: Any, default: int | None = None) -> int | None:
+    """Convert *value* to ``int`` when possible; otherwise return *default*."""
+    try:
+        if value is None or (isinstance(value, float) and pd.isna(value)):
+            return default
+        return int(round(float(value)))
+    except Exception:
+        return default
+
+
+def human_readable_size(num_bytes: int | float | None) -> str:
+    """Format bytes using the closest unit among MB, GB, TB (fallback to KB/B)."""
+    if num_bytes is None:
+        return "0 B"
+
+    try:
+        size = float(num_bytes)
+    except Exception:
+        return "0 B"
+
+    units = [
+        (1024**4, "TB"),
+        (1024**3, "GB"),
+        (1024**2, "MB"),
+        (1024**1, "KB"),
+        (1, "B"),
+    ]
+
+    for factor, unit in units:
+        if size >= factor:
+            value = size / factor
+            if unit in {"B", "KB"}:
+                return f"{int(round(value))} {unit}"
+            return f"{value:.2f} {unit}"
+    return "0 B"
+
+
+def get_dataset_url(name: str) -> str:
+    """Generate dataset URL for plots (relative to dataset summary page)."""
+    if name is None or (isinstance(name, float) and pd.isna(name)):
+        return ""
+    text = str(name).strip()
+    if not text:
+        return ""
+    return f"../../api/dataset/eegdash.dataset.{text.upper()}.html"
+
+
+def ensure_directory(path: str | Path) -> Path:
+    """Create *path* directory if required and return ``Path`` instance."""
+    dest = Path(path)
+    dest.mkdir(parents=True, exist_ok=True)
+    return dest
diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index f0151bcd..51085274 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -1,5 +1,4 @@
 import glob
-import json
 from argparse import ArgumentParser
 from datetime import datetime
 from pathlib import Path
@@ -7,511 +6,14 @@
 
 import numpy as np
 import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from plotly.utils import PlotlyJSONEncoder
-from scipy.stats import gaussian_kde
+from plot_dataset import generate_dataset_bubble, generate_modality_ridgeline
+from plot_dataset.utils import get_dataset_url, human_readable_size
 from table_tag_utils import wrap_tags
 
 DOCS_DIR = Path(__file__).resolve().parent
 STATIC_DATASET_DIR = DOCS_DIR / "source" / "_static" / "dataset_generated"
 
 
-MODALITY_CANONICAL = {
-    "visual": "Visual",
-    "auditory": "Auditory",
-    "tactile": "Tactile",
-    "somatosensory": "Tactile",
-    "multisensory": "Multisensory",
-    "motor": "Motor",
-    "rest": "Resting State",
-    "resting state": "Resting State",
-    "resting-state": "Resting State",
-    "sleep": "Sleep",
-    "other": "Other",
-}
-
-MODALITY_COLOR_MAP = {
-    "Visual": "#2563eb",
-    "Auditory": "#0ea5e9",
-    "Tactile": "#10b981",
-    "Multisensory": "#ec4899",
-    "Motor": "#f59e0b",
-    "Resting State": "#6366f1",
-    "Sleep": "#7c3aed",
-    "Other": "#14b8a6",
-    "Unknown": "#94a3b8",
-}
-
-
-def _hex_to_rgba(hex_color: str, alpha: float = 0.4) -> str:
-    hex_color = hex_color.lstrip("#")
-    if len(hex_color) != 6:
-        return f"rgba(99, 102, 241, {alpha})"
-    r = int(hex_color[0:2], 16)
-    g = int(hex_color[2:4], 16)
-    b = int(hex_color[4:6], 16)
-    return f"rgba({r}, {g}, {b}, {alpha})"
-
-
-def _primary_modality(value: object) -> str:
-    if value is None:
-        return "Unknown"
-    if isinstance(value, float) and pd.isna(value):
-        return "Unknown"
-    text = str(value).strip()
-    if not text:
-        return "Unknown"
-    for sep in ("/", "|", ";"):
-        text = text.replace(sep, ",")
-    tokens = [tok.strip() for tok in text.split(",") if tok.strip()]
-    if not tokens:
-        return "Unknown"
-    raw = tokens[0].lower()
-    canonical = MODALITY_CANONICAL.get(raw)
-    if canonical:
-        return canonical
-    candidate = tokens[0].strip()
-    title_candidate = candidate.title()
-    if title_candidate in MODALITY_COLOR_MAP:
-        return title_candidate
-    return "Other"
-
-
-def _to_numeric_median_list(val) -> float | None:
-    """Return a numeric value from possible list-like strings.
-
-    Examples
-    --------
-    - "64" -> 64
-    - "6,129" -> median -> 67.5 -> 68
-    - "128, 512" -> 320
-    - 500.0 -> 500
-
-    """
-    if pd.isna(val):
-        return None
-    try:
-        # already numeric
-        return float(val)
-    except Exception:
-        pass
-    s = str(val).strip().strip("[]")
-    if not s:
-        return None
-    try:
-        nums = [float(x) for x in s.split(",") if str(x).strip()]
-        if not nums:
-            return None
-        return float(np.median(nums))
-    except Exception:
-        return None
-
-
-def _safe_int(x, default=None):
-    try:
-        if x is None or pd.isna(x):
-            return default
-        return int(round(float(x)))
-    except Exception:
-        return default
-
-
-def gen_datasets_bubble(
-    df: pd.DataFrame,
-    out_html: str = "_static/dataset/dataset_bubble.html",
-    x_var: str = "records",  # one of: 'records', 'duration_h', 'size_gb', 'tasks'
-):
-    """Generate an interactive bubble chart for datasets.
-
-    - x: total duration (hours)
-    - y: number of subjects
-    - size: on-disk size (GB)
-    - color: dataset modality
-    """
-    d = df.copy()
-    d = d[d["dataset"].str.lower() != "test"]
-
-    # numeric columns
-    d["duration_h"] = pd.to_numeric(d.get("duration_hours_total"), errors="coerce")
-    d["subjects"] = pd.to_numeric(d.get("n_subjects"), errors="coerce")
-    d["records"] = pd.to_numeric(d.get("n_records"), errors="coerce")
-    d["tasks"] = pd.to_numeric(d.get("n_tasks"), errors="coerce")
-    d["size_bytes"] = pd.to_numeric(d.get("size_bytes"), errors="coerce")
-
-    # parse sampling and channels into representative numeric values
-    d["sfreq"] = d["sampling_freqs"].map(_to_numeric_median_list)
-    d["nchans"] = d["nchans_set"].map(_to_numeric_median_list)
-
-    d["modality_label"] = d.get("modality of exp").apply(_primary_modality)
-
-    # disk size in GB for sizing
-    GB = 1024**3
-    d["size_gb"] = d["size_bytes"] / GB
-
-    # hover content
-    def _fmt_size(bytes_):
-        return human_readable_size(_safe_int(bytes_, 0))
-
-    # choose x axis field and labels
-    x_field = (
-        x_var
-        if x_var in {"records", "duration_h", "size_gb", "tasks", "subjects"}
-        else "records"
-    )
-
-    axis_base_labels = {
-        "records": "#Records",
-        "duration_h": "Duration (hours)",
-        "size_gb": "Size (GB)",
-        "tasks": "#Tasks",
-        "subjects": "#Subjects",
-    }
-
-    x_label = f"{axis_base_labels[x_field]} (log scale)"
-    y_field = "subjects"
-    if x_field == "subjects":
-        y_field = "records"
-    y_label = f"{axis_base_labels[y_field]} (log scale)"
-
-    # hover text adapts to axis choices
-    if x_field == "duration_h":
-        x_hover = "Duration (x): %{x:.2f} h"
-    elif x_field == "size_gb":
-        x_hover = "Size (x): %{x:.2f} GB"
-    elif x_field == "tasks":
-        x_hover = "Tasks (x): %{x:,}"
-    elif x_field == "subjects":
-        x_hover = "Subjects (x): %{x:,}"
-    else:
-        x_hover = "Records (x): %{x:,}"
-
-    if y_field == "subjects":
-        y_hover = "Subjects (y): %{y:,}"
-    else:
-        y_hover = "Records (y): %{y:,}"
-
-    hover = (
-        "<b>%{customdata[0]}</b>"  # dataset id
-        f"<br>{x_hover}"
-        f"<br>{y_hover}"
-        "<br>Subjects (total): %{customdata[1]:,}"
-        "<br>Records (total): %{customdata[2]:,}"
-        "<br>Tasks: %{customdata[3]:,}"
-        "<br>Channels: %{customdata[4]}"
-        "<br>Sampling: %{customdata[5]} Hz"
-        "<br>Size: %{customdata[6]}"
-        "<br>Modality: %{customdata[7]}"
-        "<br><i>Click bubble to open dataset page</i>"
-        "<extra></extra>"
-    )
-
-    required_columns = {x_field, y_field, "size_gb"}
-    d = d.replace([np.inf, -np.inf], np.nan)
-    d = d.dropna(subset=list(required_columns))
-    d = d[(d[x_field] > 0) & (d[y_field] > 0)]
-
-    d["dataset_url"] = d["dataset"].apply(get_dataset_url)
-
-    if d.empty:
-        out_path = Path(out_html)
-        out_path.parent.mkdir(parents=True, exist_ok=True)
-        no_data_html = """
-<div class="dataset-loading" id="dataset-loading">No dataset records available for plotting.</div>
-"""
-        with open(str(out_path), "w", encoding="utf-8") as f:
-            f.write(no_data_html)
-        return str(out_path)
-
-    # Marker sizing: scale into a good visual range
-    size_max = d["size_gb"].max()
-    if not np.isfinite(size_max) or size_max <= 0:
-        size_max = 1.0
-    sizeref = (2.0 * size_max) / (40.0**2)  # target ~40px max marker
-
-    # Prepare prettified strings for hover
-    def _fmt_int(v):
-        if v is None or pd.isna(v):
-            return ""
-        try:
-            return str(int(round(float(v))))
-        except Exception:
-            return str(v)
-
-    sfreq_str = d["sfreq"].map(_fmt_int)
-    nchans_str = d["nchans"].map(_fmt_int)
-
-    fig = px.scatter(
-        d,
-        x=x_field,
-        y=y_field,
-        size="size_gb",
-        color="modality_label",
-        hover_name="dataset",
-        custom_data=[
-            d["dataset"],
-            d["subjects"],
-            d["records"],
-            d["tasks"],
-            nchans_str,
-            sfreq_str,
-            d["size_bytes"].map(_fmt_size),
-            d["modality_label"],
-            d["dataset_url"],
-        ],
-        size_max=40,
-        labels={
-            y_field: y_label,
-            "modality_label": "Modality",
-            x_field: x_label,
-        },
-        color_discrete_map=MODALITY_COLOR_MAP,
-        title="",
-        category_orders={
-            "modality_label": [
-                label
-                for label in MODALITY_COLOR_MAP.keys()
-                if label in d["modality_label"].unique()
-            ]
-        },
-        log_x=True,
-        log_y=True,
-    )
-
-    # Add a log-log regression fit line and R² annotation when data permits
-    fit_annotation_text = None
-    numeric_x = pd.to_numeric(d[x_field], errors="coerce")
-    numeric_y = pd.to_numeric(d[y_field], errors="coerce")
-    mask = (
-        np.isfinite(numeric_x)
-        & np.isfinite(numeric_y)
-        & (numeric_x > 0)
-        & (numeric_y > 0)
-    )
-
-    if mask.sum() >= 2:
-        log_x = np.log10(numeric_x[mask])
-        log_y = np.log10(numeric_y[mask])
-        ss_tot = np.sum((log_y - log_y.mean()) ** 2)
-        if np.ptp(log_x) > 0 and np.ptp(log_y) > 0 and ss_tot > 0:
-            slope, intercept = np.polyfit(log_x, log_y, 1)
-            line_log_x = np.linspace(log_x.min(), log_x.max(), 200)
-            line_x = 10**line_log_x
-            line_y = 10 ** (slope * line_log_x + intercept)
-            fig.add_trace(
-                go.Scatter(
-                    x=line_x,
-                    y=line_y,
-                    mode="lines",
-                    name="log-log fit",
-                    line=dict(color="#111827", width=2, dash="dot"),
-                    hoverinfo="skip",
-                    showlegend=False,
-                )
-            )
-            residuals = log_y - (slope * log_x + intercept)
-            r_squared = 1 - np.sum(residuals**2) / ss_tot
-            fit_annotation_text = f"log-log OLS fit R² = {r_squared:.3f}"
-
-    # tune marker sizing explicitly for better control
-    for tr in fig.data:
-        mode = getattr(tr, "mode", "") or ""
-        if "markers" not in mode:
-            continue
-        tr.marker.update(
-            sizemin=6,
-            sizemode="area",
-            sizeref=sizeref,
-            line=dict(width=0.6, color="rgba(0,0,0,0.3)"),
-            opacity=0.75,
-        )
-        tr.hovertemplate = hover
-
-    plot_width = 1280
-    plot_height = 720
-
-    fig.update_layout(
-        height=plot_height,
-        width=plot_width,  # Landscape orientation
-        margin=dict(l=60, r=40, t=80, b=60),
-        template="plotly_white",
-        legend=dict(
-            title="Modality",
-            orientation="h",
-            yanchor="bottom",
-            y=1.02,
-            xanchor="right",
-            x=0.99,
-        ),
-        font=dict(
-            family="Inter, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif",
-            size=14,
-        ),
-        title=dict(
-            text="",
-            x=0.01,
-            xanchor="left",
-            y=0.98,
-            yanchor="top",
-            pad=dict(t=10, b=8),
-        ),
-        autosize=True,  # Enable auto-sizing to fill container
-    )
-
-    if fit_annotation_text:
-        fig.add_annotation(
-            xref="paper",
-            yref="paper",
-            x=0.02,
-            y=0.98,
-            text=fit_annotation_text,
-            showarrow=False,
-            font=dict(size=15, color="#111827"),
-            bgcolor="rgba(255,255,255,0.75)",
-            bordercolor="rgba(17,24,39,0.25)",
-            borderwidth=1,
-            borderpad=6,
-        )
-
-    fig.update_xaxes(
-        showgrid=True,
-        gridcolor="rgba(0,0,0,0.12)",
-        zeroline=False,
-        type="log",
-        dtick=1,
-    )
-    fig.update_yaxes(
-        showgrid=True,
-        gridcolor="rgba(0,0,0,0.12)",
-        zeroline=False,
-        type="log",
-        dtick=1,
-    )
-
-    out_path = Path(out_html)
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    # Add CSS and loading indicator for immediate proper sizing
-    html_content = fig.to_html(
-        full_html=False,
-        include_plotlyjs=False,
-        div_id="dataset-bubble",
-        config={
-            "responsive": True,
-            "displaylogo": False,
-            "modeBarButtonsToRemove": ["lasso2d", "select2d"],
-            "toImageButtonOptions": {
-                "format": "png",
-                "filename": "dataset_landscape",
-                "height": plot_height,
-                "width": plot_width,
-                "scale": 2,
-            },
-        },
-    )
-
-    # Wrap with styling to ensure proper initial sizing
-    styled_html = f"""
-<style>
-#dataset-bubble {{
-    width: 100% !important;
-    max-width: {plot_width}px;
-    height: {plot_height}px !important;
-    min-height: {plot_height}px;
-    margin: 0 auto;
-}}
-#dataset-bubble .plotly-graph-div {{
-    width: 100% !important;
-    height: 100% !important;
-}}
-.dataset-loading {{
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    height: {plot_height}px;
-    font-family: Inter, system-ui, sans-serif;
-    color: #6b7280;
-}}
-</style>
-<div class="dataset-loading" id="dataset-loading">Loading dataset landscape...</div>
-{html_content}
-<script>
-// Hide loading indicator once plot is rendered and make bubbles clickable
-document.addEventListener('DOMContentLoaded', function() {{
-    const loading = document.getElementById('dataset-loading');
-    const plot = document.getElementById('dataset-bubble');
-
-    function showPlot() {{
-        if (loading) {{
-            loading.style.display = 'none';
-        }}
-        if (plot) {{
-            plot.style.display = 'block';
-        }}
-    }}
-
-    function hookPlotlyClick(attempts) {{
-        if (!plot || typeof plot.on !== 'function') {{
-            if (attempts < 40) {{
-                window.setTimeout(function() {{ hookPlotlyClick(attempts + 1); }}, 60);
-            }}
-            return;
-        }}
-        plot.on('plotly_click', function(evt) {{
-            const point = evt && evt.points && evt.points[0];
-            const url = point && point.customdata && point.customdata[8];
-            if (url) {{
-                window.open(url, '_blank', 'noopener');
-            }}
-        }});
-        showPlot();
-    }}
-
-    hookPlotlyClick(0);
-    showPlot();
-}});
-</script>
-"""
-
-    with open(str(out_path), "w", encoding="utf-8") as f:
-        f.write(styled_html)
-    return str(out_path)
-
-
-def human_readable_size(num_bytes: int) -> str:
-    """Format bytes using the closest unit among MB, GB, TB (fallback to KB/B).
-
-    Chooses the largest unit such that the value is >= 1. Uses base 1024.
-    """
-    if num_bytes is None:
-        return "0 B"
-    size = float(num_bytes)
-    units = [
-        (1024**4, "TB"),
-        (1024**3, "GB"),
-        (1024**2, "MB"),
-        (1024**1, "KB"),
-        (1, "B"),
-    ]
-    for factor, unit in units:
-        if size >= factor:
-            value = size / factor
-            # Use no decimals for B/KB; two decimals otherwise
-            if unit in ("B", "KB"):
-                return f"{int(round(value))} {unit}"
-            return f"{value:.2f} {unit}"
-    return "0 B"
-
-
-def get_dataset_url(name: str) -> str:
-    """Generate dataset URL for plots (relative to dataset summary page)."""
-    if name is None or (isinstance(name, float) and pd.isna(name)):
-        return ""
-    text = str(name).strip()
-    if not text:
-        return ""
-    return f"../../api/dataset/eegdash.dataset.{text.upper()}.html"
-
-
 def wrap_dataset_name(name: str):
     # Remove any surrounding whitespace
     name = name.strip()
@@ -658,10 +160,13 @@ def main(source_dir: str, target_dir: str):
             f, index_col=False, header=0, skipinitialspace=True
         )  # , sep=";")
         # Generate bubble chart from the raw data to have access to size_bytes
-        # Use x-axis as number of subjects so participant counts lead the story
         bubble_path = target_dir / "dataset_bubble.html"
-        gen_datasets_bubble(df_raw, str(bubble_path), x_var="subjects")
-        copyfile(bubble_path, STATIC_DATASET_DIR / bubble_path.name)
+        bubble_output = generate_dataset_bubble(
+            df_raw,
+            bubble_path,
+            x_var="subjects",
+        )
+        copyfile(bubble_output, STATIC_DATASET_DIR / bubble_output.name)
 
         df = prepare_table(df_raw)
         # preserve int values
@@ -716,282 +221,10 @@ def main(source_dir: str, target_dir: str):
 
         # Generate KDE ridgeline plot for modality participant distributions
         try:
-            d_modal = df_raw[df_raw["dataset"].str.lower() != "test"].copy()
-            d_modal["modality_label"] = d_modal["modality of exp"].apply(
-                _primary_modality
-            )
-            d_modal["n_subjects"] = pd.to_numeric(
-                d_modal["n_subjects"], errors="coerce"
-            )
-            d_modal = d_modal.dropna(subset=["n_subjects"])
-
-            # Filter out "Other" modality
-            d_modal = d_modal[d_modal["modality_label"] != "Other"]
-
-            # Calculate median participants per modality and reorder ascending
-            median_participants = (
-                d_modal.groupby("modality_label")["n_subjects"].median().sort_values()
-            )
-            order = [
-                label
-                for label in median_participants.index
-                if label in d_modal["modality_label"].unique()
-            ]
-
-            fig_kde = go.Figure()
-            rng = np.random.default_rng(42)
-            amplitude = 0.6
-            row_spacing = 0.95
-
-            for idx, label in enumerate(order):
-                subset = d_modal[d_modal["modality_label"] == label].copy()
-                vals = subset["n_subjects"].astype(float).dropna()
-                if len(vals) < 3:
-                    continue
-                # Generate URLs for datasets in this modality
-                subset["dataset_url"] = subset["dataset"].apply(get_dataset_url)
-                log_vals = np.log10(vals)
-                grid = np.linspace(log_vals.min() - 0.25, log_vals.max() + 0.25, 240)
-                kde = gaussian_kde(log_vals)
-                density = kde(grid)
-                if density.max() <= 0:
-                    continue
-                density_norm = density / density.max()
-                baseline = idx * row_spacing
-                y_curve = baseline + density_norm * amplitude
-                x_curve = 10**grid
-
-                color = MODALITY_COLOR_MAP.get(label, "#6b7280")
-                fill = _hex_to_rgba(color, 0.28)
-
-                fig_kde.add_trace(
-                    go.Scatter(
-                        x=np.concatenate([x_curve, x_curve[::-1]]),
-                        y=np.concatenate([y_curve, np.full_like(y_curve, baseline)]),
-                        name=label,
-                        fill="toself",
-                        fillcolor=fill,
-                        line=dict(color="rgba(0,0,0,0)"),
-                        hoverinfo="skip",
-                        showlegend=False,
-                    )
-                )
-
-                fig_kde.add_trace(
-                    go.Scatter(
-                        x=x_curve,
-                        y=y_curve,
-                        mode="lines",
-                        name=label,
-                        line=dict(color=color, width=2),
-                        hovertemplate=f"<b>{label}</b><br>#Participants: %{{x:.0f}}<extra></extra>",
-                        showlegend=False,
-                    )
-                )
-
-                jitter = rng.uniform(0.02, amplitude * 0.5, size=len(vals))
-                median_val = float(median_participants.get(label, np.nan))
-
-                # Prepare custom data with dataset names and URLs
-                custom_data = np.column_stack(
-                    [subset["dataset"].to_numpy(), subset["dataset_url"].to_numpy()]
-                )
-                fig_kde.add_trace(
-                    go.Scatter(
-                        x=vals,
-                        y=np.full_like(vals, baseline) + jitter,
-                        mode="markers",
-                        name=label,
-                        marker=dict(color=color, size=8, opacity=0.6),
-                        customdata=custom_data,
-                        hovertemplate="<b><a href='%{customdata[1]}' target='_parent'>%{customdata[0]}</a></b><br>#Participants: %{x}<br><i>Click to view dataset details</i><extra></extra>",
-                        showlegend=False,
-                    )
-                )
-
-                if np.isfinite(median_val) and median_val > 0:
-                    fig_kde.add_trace(
-                        go.Scatter(
-                            x=[median_val, median_val],
-                            y=[baseline, baseline + amplitude],
-                            mode="lines",
-                            line=dict(color=color, width=2, dash="dash"),
-                            hovertemplate=(
-                                f"<b>{label}</b><br>Median participants: {median_val:.0f}<extra></extra>"
-                            ),
-                            showlegend=False,
-                        )
-                    )
-
-            if fig_kde.data:
-                fig_kde.update_layout(
-                    height=max(650, 140 * len(order)),
-                    width=1200,  # Set explicit width for consistent sizing
-                    template="plotly_white",
-                    xaxis=dict(
-                        type="log",
-                        title=dict(
-                            text="Number of Participants (Log Scale)",
-                            font=dict(size=18),
-                        ),
-                        showgrid=True,
-                        gridcolor="rgba(0,0,0,0.08)",
-                        zeroline=False,
-                        dtick=1,
-                        minor=dict(showgrid=True, gridcolor="rgba(0,0,0,0.04)"),
-                        tickfont=dict(size=14),
-                    ),
-                    yaxis=dict(
-                        title=dict(text="Modality", font=dict(size=18)),
-                        tickmode="array",
-                        tickvals=[idx * row_spacing for idx in range(len(order))],
-                        ticktext=order,
-                        showgrid=False,
-                        range=[
-                            -0.25,
-                            max(
-                                0.35, (len(order) - 1) * row_spacing + amplitude + 0.25
-                            ),
-                        ],
-                        tickfont=dict(size=14),
-                    ),
-                    showlegend=False,
-                    margin=dict(l=120, r=40, t=108, b=80),
-                    title=dict(
-                        text=f"<br><sub>Based on a EEG-Dash Datasets avaliables at {datetime.now().strftime('%d/%m/%Y')}.</sub>",
-                        x=0.5,
-                        xanchor="center",
-                        y=0.98,
-                        yanchor="top",
-                        font=dict(size=20),
-                    ),
-                    autosize=True,  # Enable auto-sizing to fill container
-                    font=dict(size=16),
-                )
-
-                # Add annotation highlighting Visual distribution
-                fig_kde.add_annotation(
-                    xref="paper",
-                    yref="paper",
-                    x=0.98,
-                    y=0.02,
-                    text="Visual studies consistently use the<br>largest sample sizes, typically 20-30 participants",
-                    showarrow=False,
-                    font=dict(size=14, color="#111827"),
-                    bgcolor="rgba(255,255,255,0.9)",
-                    bordercolor="rgba(17,24,39,0.3)",
-                    borderwidth=1,
-                    borderpad=8,
-                    xanchor="right",
-                    yanchor="bottom",
-                )
-                # Add CSS and loading indicator for immediate proper sizing
-                kde_height = max(650, 150 * len(order))
-                plot_config = {
-                    "responsive": True,
-                    "displaylogo": False,
-                    "modeBarButtonsToRemove": ["lasso2d", "select2d"],
-                    "toImageButtonOptions": {
-                        "format": "png",
-                        "filename": "participant_kde",
-                        "height": kde_height,
-                        "width": 1200,
-                        "scale": 2,
-                    },
-                }
-                fig_spec = fig_kde.to_plotly_json()
-                data_json = json.dumps(fig_spec.get("data", []), cls=PlotlyJSONEncoder)
-                layout_json = json.dumps(
-                    fig_spec.get("layout", {}), cls=PlotlyJSONEncoder
-                )
-                config_json = json.dumps(plot_config, cls=PlotlyJSONEncoder)
-
-                # Wrap with styling to ensure proper initial sizing and defer Plotly rendering
-                styled_html = f"""
-<style>
-#dataset-kde-modalities {{
-    width: 100% !important;
-    max-width: 1200px;
-    height: {kde_height}px !important;
-    min-height: {kde_height}px;
-    margin: 0 auto;
-    display: none;
-}}
-#dataset-kde-modalities.plotly-graph-div {{
-    width: 100% !important;
-    height: 100% !important;
-}}
-.kde-loading {{
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    height: {kde_height}px;
-    font-family: Inter, system-ui, sans-serif;
-    color: #6b7280;
-}}
-</style>
-<div class="kde-loading" id="kde-loading">Loading participant distribution...</div>
-<div id="dataset-kde-modalities" class="plotly-graph-div"></div>
-<script>
-(function() {{
-  const TARGET_ID = 'dataset-kde-modalities';
-  const FIG_DATA = {data_json};
-  const FIG_LAYOUT = {layout_json};
-  const FIG_CONFIG = {config_json};
-
-  function onReady(callback) {{
-    if (document.readyState === 'loading') {{
-      document.addEventListener('DOMContentLoaded', callback, {{ once: true }});
-    }} else {{
-      callback();
-    }}
-  }}
-
-  function renderPlot() {{
-    const container = document.getElementById(TARGET_ID);
-    if (!container) {{
-      return;
-    }}
-
-    const draw = () => {{
-      if (!window.Plotly) {{
-        window.requestAnimationFrame(draw);
-        return;
-      }}
-
-      window.Plotly.newPlot(TARGET_ID, FIG_DATA, FIG_LAYOUT, FIG_CONFIG).then((plot) => {{
-        const loading = document.getElementById('kde-loading');
-        if (loading) {{
-          loading.style.display = 'none';
-        }}
-        container.style.display = 'block';
-
-        plot.on('plotly_click', (event) => {{
-          const point = event.points && event.points[0];
-          if (!point || !point.customdata) {{
-            return;
-          }}
-          const url = point.customdata[1];
-          if (url) {{
-            const resolved = new URL(url, window.location.href);
-            window.open(resolved.href, '_self');
-          }}
-        }});
-      }});
-    }};
-
-    draw();
-  }}
-
-  onReady(renderPlot);
-}})();
-</script>
-"""
-
-                kde_path = Path(target_dir) / "dataset_kde_modalities.html"
-                with open(kde_path, "w", encoding="utf-8") as f:
-                    f.write(styled_html)
-                copyfile(kde_path, STATIC_DATASET_DIR / kde_path.name)
+            kde_path = target_dir / "dataset_kde_modalities.html"
+            kde_output = generate_modality_ridgeline(df_raw, kde_path)
+            if kde_output:
+                copyfile(kde_output, STATIC_DATASET_DIR / kde_output.name)
         except Exception as exc:
             print(f"[dataset KDE] Skipped due to error: {exc}")
 

From 73a54b5e3b1faff72f8d4813e9bd0a2b0a820f3f Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 22:34:27 +0200
Subject: [PATCH 22/30] fixing small details

---
 docs/plot_dataset/__init__.py          |   1 +
 docs/plot_dataset/plot_sankey.py       | 111 +++++++++-
 docs/prepare_summary_tables.py         |  14 +-
 docs/source/dataset_summary.rst        |   4 +
 docs/source/dataset_summary/sankey.rst |  20 ++
 docs/source/dataset_summary/table.rst  | 293 +++++++++++++------------
 6 files changed, 287 insertions(+), 156 deletions(-)
 create mode 100644 docs/source/dataset_summary/sankey.rst

diff --git a/docs/plot_dataset/__init__.py b/docs/plot_dataset/__init__.py
index a258d27f..85942823 100644
--- a/docs/plot_dataset/__init__.py
+++ b/docs/plot_dataset/__init__.py
@@ -9,4 +9,5 @@
     TYPE_COLOR_MAP,
     hex_to_rgba,
 )
+from .plot_sankey import generate_dataset_sankey  # noqa: F401
 from .ridgeline import generate_modality_ridgeline  # noqa: F401
diff --git a/docs/plot_dataset/plot_sankey.py b/docs/plot_dataset/plot_sankey.py
index f33b6534..a6c163eb 100644
--- a/docs/plot_dataset/plot_sankey.py
+++ b/docs/plot_dataset/plot_sankey.py
@@ -22,16 +22,10 @@
     from colours import CANONICAL_MAP, COLUMN_COLOR_MAPS, hex_to_rgba
 
 DEFAULT_COLUMNS = ["Type Subject", "modality of exp", "type of exp"]
+__all__ = ["generate_dataset_sankey", "build_sankey"]
 
 
-def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
-    df = pd.read_csv(
-        path,
-        index_col=False,
-        header=0,
-        skipinitialspace=True,
-    )
-    # Ensure n_subjects is read, as it's needed for weighting
+def _prepare_dataframe(df: pd.DataFrame, columns: Sequence[str]) -> pd.DataFrame:
     all_columns = list(columns)
     if "n_subjects" not in all_columns:
         all_columns.append("n_subjects")
@@ -55,7 +49,7 @@ def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
         cleaned[col] = cleaned[col].fillna("Unknown")
 
         # 2. Split multi-valued cells
-        cleaned[col] = cleaned[col].astype(str).str.split("/|;|,")
+        cleaned[col] = cleaned[col].astype(str).str.split(r"/|;|,", regex=True)
         cleaned = cleaned.explode(col)
 
         # 3. Clean up whitespace and any empty strings created by splitting
@@ -77,6 +71,16 @@ def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
     return cleaned[all_columns]
 
 
+def _load_dataframe(path: Path, columns: Sequence[str]) -> pd.DataFrame:
+    df = pd.read_csv(
+        path,
+        index_col=False,
+        header=0,
+        skipinitialspace=True,
+    )
+    return _prepare_dataframe(df, columns)
+
+
 def _build_sankey_data(df: pd.DataFrame, columns: Sequence[str]):
     node_labels: list[str] = []
     node_colors: list[str] = []
@@ -268,6 +272,95 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
     return fig
 
 
+def generate_dataset_sankey(
+    df: pd.DataFrame,
+    out_html: str | Path,
+    *,
+    columns: Sequence[str] | None = None,
+) -> Path:
+    """Generate the dataset Sankey diagram and write it to *out_html*."""
+    selected_columns = list(columns) if columns is not None else list(DEFAULT_COLUMNS)
+    prepared = _prepare_dataframe(df, selected_columns)
+    fig = build_sankey(prepared, selected_columns)
+
+    out_path = Path(out_html)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    html_content = fig.to_html(
+        full_html=False,
+        include_plotlyjs=False,
+        div_id="dataset-sankey",
+        config={
+            "responsive": True,
+            "displaylogo": False,
+            "modeBarButtonsToRemove": ["lasso2d", "select2d"],
+        },
+    )
+
+    styled_html = f"""
+<style>
+#dataset-sankey {{
+    width: 100% !important;
+    max-width: 1200px;
+    height: 640px !important;
+    min-height: 640px;
+    margin: 0 auto;
+    display: none;
+}}
+#dataset-sankey.plotly-graph-div {{
+    width: 100% !important;
+    height: 100% !important;
+}}
+.sankey-loading {{
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: 640px;
+    font-family: Inter, system-ui, sans-serif;
+    color: #6b7280;
+}}
+</style>
+<div class="sankey-loading" id="sankey-loading">Loading dataset flow...</div>
+{html_content}
+<script>
+document.addEventListener('DOMContentLoaded', function() {{
+    const loading = document.getElementById('sankey-loading');
+    const plot = document.getElementById('dataset-sankey');
+
+    function showPlot() {{
+        if (loading) {{
+            loading.style.display = 'none';
+        }}
+        if (plot) {{
+            plot.style.display = 'block';
+        }}
+    }}
+
+    function waitForPlot(attempts) {{
+        if (!plot) {{
+            return;
+        }}
+        if (typeof plot.on === 'function') {{
+            showPlot();
+            return;
+        }}
+        if (attempts > 40) {{
+            showPlot();
+            return;
+        }}
+        window.setTimeout(function() {{ waitForPlot(attempts + 1); }}, 80);
+    }}
+
+    waitForPlot(0);
+    window.setTimeout(showPlot, 1200);
+}});
+</script>
+"""
+
+    out_path.write_text(styled_html, encoding="utf-8")
+    return out_path
+
+
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         description="Generate a Sankey diagram from the dataset summary CSV."
diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index 51085274..e1980c73 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -6,7 +6,11 @@
 
 import numpy as np
 import pandas as pd
-from plot_dataset import generate_dataset_bubble, generate_modality_ridgeline
+from plot_dataset import (
+    generate_dataset_bubble,
+    generate_dataset_sankey,
+    generate_modality_ridgeline,
+)
 from plot_dataset.utils import get_dataset_url, human_readable_size
 from table_tag_utils import wrap_tags
 
@@ -168,6 +172,14 @@ def main(source_dir: str, target_dir: str):
         )
         copyfile(bubble_output, STATIC_DATASET_DIR / bubble_output.name)
 
+        # Generate Sankey diagram showing dataset flow across categories
+        try:
+            sankey_path = target_dir / "dataset_sankey.html"
+            sankey_output = generate_dataset_sankey(df_raw, sankey_path)
+            copyfile(sankey_output, STATIC_DATASET_DIR / sankey_output.name)
+        except Exception as exc:
+            print(f"[dataset Sankey] Skipped due to error: {exc}")
+
         df = prepare_table(df_raw)
         # preserve int values
         df["n_subjects"] = df["n_subjects"].astype(int)
diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst
index 642fbb2e..63abefeb 100644
--- a/docs/source/dataset_summary.rst
+++ b/docs/source/dataset_summary.rst
@@ -29,6 +29,10 @@ To leverage recent and ongoing advancements in large-scale computational methods
 
       .. include:: dataset_summary/kde.rst
 
+   .. tab-item:: Dataset Flow
+
+      .. include:: dataset_summary/sankey.rst
+
    .. tab-item:: Landscape
 
       .. include:: dataset_summary/bubble.rst
diff --git a/docs/source/dataset_summary/sankey.rst b/docs/source/dataset_summary/sankey.rst
new file mode 100644
index 00000000..3403ab62
--- /dev/null
+++ b/docs/source/dataset_summary/sankey.rst
@@ -0,0 +1,20 @@
+.. title:: Dataset flow
+
+.. rubric:: Dataset flow
+
+.. raw:: html
+
+   <figure class="eegdash-figure" style="margin: 0 0 1.25rem 0;">
+
+.. raw:: html
+   :file: ../_static/dataset_generated/dataset_sankey.html
+
+.. raw:: html
+
+   <figcaption class="eegdash-caption">
+     Figure: Dataset flow across population, modality, and cognitive domain.
+     Link thickness is proportional to the total number of subjects, and the tooltip
+     reports both subject and dataset counts. Hover and click legend entries to
+     explore specific segments.
+   </figcaption>
+   </figure>
diff --git a/docs/source/dataset_summary/table.rst b/docs/source/dataset_summary/table.rst
index 3975b97f..542a87b5 100644
--- a/docs/source/dataset_summary/table.rst
+++ b/docs/source/dataset_summary/table.rst
@@ -25,151 +25,152 @@ In addition, EEG-DaSh will incorporate a subset of the data converted from `NEMA
    </figure>
 
 Pathology, modality, and dataset type now surface as consistent color-coded tags so you can scan the table at a glance.
+
 .. raw:: html
 
-  <!-- jQuery + DataTables core -->
-  <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
-  <link rel="stylesheet" href="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.css"/>
-  <script src="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.js"></script>
-
-  <!-- Buttons + SearchPanes (+ Select required by SearchPanes) -->
-  <link rel="stylesheet" href="https://cdn.datatables.net/buttons/2.4.2/css/buttons.dataTables.min.css">
-  <script src="https://cdn.datatables.net/buttons/2.4.2/js/dataTables.buttons.min.js"></script>
-  <link rel="stylesheet" href="https://cdn.datatables.net/select/1.7.0/css/select.dataTables.min.css">
-  <link rel="stylesheet" href="https://cdn.datatables.net/searchpanes/2.3.1/css/searchPanes.dataTables.min.css">
-  <script src="https://cdn.datatables.net/select/1.7.0/js/dataTables.select.min.js"></script>
-  <script src="https://cdn.datatables.net/searchpanes/2.3.1/js/dataTables.searchPanes.min.js"></script>
-
-  <style>
-    /* Styling for the Total row (placed in tfoot) */
-    table.sd-table tfoot td {
-      font-weight: 600;
-      border-top: 2px solid rgba(0,0,0,0.2);
-      background: #f9fafb;
-      /* Match body cell padding to keep perfect alignment */
-      padding: 8px 10px !important;
-      vertical-align: middle;
-    }
-
-    /* Right-align numeric-like columns (2..8) consistently for body & footer */
-    table.sd-table tbody td:nth-child(n+2),
-    table.sd-table tfoot td:nth-child(n+2) {
-      text-align: right;
-    }
-    /* Keep first column (Dataset/Total) left-aligned */
-    table.sd-table tbody td:first-child,
-    table.sd-table tfoot td:first-child {
-      text-align: left;
-    }
-  </style>
-
-  <script>
-  // Helper: robustly extract values for SearchPanes when needed
-  function tagsArrayFromHtml(html) {
-    if (html == null) return [];
-    // If it's numeric or plain text, just return as a single value
-    if (typeof html === 'number') return [String(html)];
-    if (typeof html === 'string' && html.indexOf('<') === -1) return [html.trim()];
-    // Else parse any .tag elements inside HTML
-    var tmp = document.createElement('div');
-    tmp.innerHTML = html;
-    var tags = Array.from(tmp.querySelectorAll('.tag')).map(function(el){
-      return (el.textContent || '').trim();
-    });
-    return tags.length ? tags : [tmp.textContent.trim()];
-  }
-
-  // Helper: parse human-readable sizes like "4.31 GB" into bytes (number)
-  function parseSizeToBytes(text) {
-    if (!text) return 0;
-    var s = String(text).trim();
-    var m = s.match(/([\d,.]+)\s*(TB|GB|MB|KB|B)/i);
-    if (!m) return 0;
-    var value = parseFloat(m[1].replace(/,/g, ''));
-    var unit = m[2].toUpperCase();
-    var factor = { B:1, KB:1024, MB:1024**2, GB:1024**3, TB:1024**4 }[unit] || 1;
-    return value * factor;
-  }
-
-  $(function () {
-    var $table = $('#datasets-table');
-    if (!$table.length) {
-      return;
-    }
-    if ($.fn.DataTable && $.fn.DataTable.isDataTable($table[0])) {
-      return;
-    }
-
-    // 1) Move the "Total" row into <tfoot> so sorting/filtering never moves it
-    var $tbody = $table.find('tbody');
-    var $total = $tbody.find('tr').filter(function(){
-      return $(this).find('td').eq(0).text().trim() === 'Total';
-    });
-    if ($total.length) {
-      var $tfoot = $table.find('tfoot');
-      if (!$tfoot.length) $tfoot = $('<tfoot/>').appendTo($table);
-      $total.appendTo($tfoot);
-    }
-
-    // 2) Initialize DataTable with SearchPanes button
-    var FILTER_COLS = [1,2,3,4,5,6];
-    // Detect the index of the size column by header text
-    var sizeIdx = (function(){
-      var idx = -1;
-      $table.find('thead th').each(function(i){
-        var t = $(this).text().trim().toLowerCase();
-        if (t === 'size on disk' || t === 'size') idx = i;
-      });
-      return idx;
-    })();
-
-    var table = $table.DataTable({
-      dom: 'Blfrtip',
-      paging: false,
-      searching: true,
-      info: false,
-      language: {
-        search: 'Filter dataset:',
-        searchPanes: { collapse: { 0: 'Filters', _: 'Filters (%d)' } }
-      },
-      buttons: [{
-        extend: 'searchPanes',
-        text: 'Filters',
-        config: { cascadePanes: true, viewTotal: true, layout: 'columns-4', initCollapsed: false }
-      }],
-      columnDefs: (function(){
-        var defs = [
-          { searchPanes: { show: true }, targets: FILTER_COLS }
-        ];
-        if (sizeIdx !== -1) {
-          defs.push({
-            targets: sizeIdx,
-            render: function(data, type) {
-              if (type === 'sort' || type === 'type') {
-                return parseSizeToBytes(data);
-              }
-              return data;
-            }
-          });
-        }
-        return defs;
-      })()
-    });
-
-    // 3) UX: click a header to open the relevant filter pane
-    $table.find('thead th').each(function (i) {
-      if ([1,2,3,4].indexOf(i) === -1) return;
-      $(this).css('cursor','pointer').attr('title','Click to filter this column');
-      $(this).on('click', function () {
-        table.button('.buttons-searchPanes').trigger();
-        setTimeout(function () {
-          var idx = [1,2,3,4].indexOf(i);
-          var $container = $(table.searchPanes.container());
-          var $pane = $container.find('.dtsp-pane').eq(idx);
-          var $title = $pane.find('.dtsp-title');
-          if ($title.length) $title.trigger('click');
-        }, 0);
-      });
-    });
-  });
-  </script>
+   <!-- jQuery + DataTables core -->
+   <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
+   <link rel="stylesheet" href="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.css"/>
+   <script src="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.js"></script>
+
+   <!-- Buttons + SearchPanes (+ Select required by SearchPanes) -->
+   <link rel="stylesheet" href="https://cdn.datatables.net/buttons/2.4.2/css/buttons.dataTables.min.css">
+   <script src="https://cdn.datatables.net/buttons/2.4.2/js/dataTables.buttons.min.js"></script>
+   <link rel="stylesheet" href="https://cdn.datatables.net/select/1.7.0/css/select.dataTables.min.css">
+   <link rel="stylesheet" href="https://cdn.datatables.net/searchpanes/2.3.1/css/searchPanes.dataTables.min.css">
+   <script src="https://cdn.datatables.net/select/1.7.0/js/dataTables.select.min.js"></script>
+   <script src="https://cdn.datatables.net/searchpanes/2.3.1/js/dataTables.searchPanes.min.js"></script>
+
+   <style>
+     /* Styling for the Total row (placed in tfoot) */
+     table.sd-table tfoot td {
+       font-weight: 600;
+       border-top: 2px solid rgba(0,0,0,0.2);
+       background: #f9fafb;
+       /* Match body cell padding to keep perfect alignment */
+       padding: 8px 10px !important;
+       vertical-align: middle;
+     }
+
+     /* Right-align numeric-like columns (2..8) consistently for body & footer */
+     table.sd-table tbody td:nth-child(n+2),
+     table.sd-table tfoot td:nth-child(n+2) {
+       text-align: right;
+     }
+     /* Keep first column (Dataset/Total) left-aligned */
+     table.sd-table tbody td:first-child,
+     table.sd-table tfoot td:first-child {
+       text-align: left;
+     }
+   </style>
+
+   <script>
+   // Helper: robustly extract values for SearchPanes when needed
+   function tagsArrayFromHtml(html) {
+     if (html == null) return [];
+     // If it's numeric or plain text, just return as a single value
+     if (typeof html === 'number') return [String(html)];
+     if (typeof html === 'string' && html.indexOf('<') === -1) return [html.trim()];
+     // Else parse any .tag elements inside HTML
+     var tmp = document.createElement('div');
+     tmp.innerHTML = html;
+     var tags = Array.from(tmp.querySelectorAll('.tag')).map(function(el){
+       return (el.textContent || '').trim();
+     });
+     return tags.length ? tags : [tmp.textContent.trim()];
+   }
+
+   // Helper: parse human-readable sizes like "4.31 GB" into bytes (number)
+   function parseSizeToBytes(text) {
+     if (!text) return 0;
+     var s = String(text).trim();
+     var m = s.match(/([\d,.]+)\s*(TB|GB|MB|KB|B)/i);
+     if (!m) return 0;
+     var value = parseFloat(m[1].replace(/,/g, ''));
+     var unit = m[2].toUpperCase();
+     var factor = { B:1, KB:1024, MB:1024**2, GB:1024**3, TB:1024**4 }[unit] || 1;
+     return value * factor;
+   }
+
+   $(function () {
+     var $table = $('#datasets-table');
+     if (!$table.length) {
+       return;
+     }
+     if ($.fn.DataTable && $.fn.DataTable.isDataTable($table[0])) {
+       return;
+     }
+
+     // 1) Move the "Total" row into <tfoot> so sorting/filtering never moves it
+     var $tbody = $table.find('tbody');
+     var $total = $tbody.find('tr').filter(function(){
+       return $(this).find('td').eq(0).text().trim() === 'Total';
+     });
+     if ($total.length) {
+       var $tfoot = $table.find('tfoot');
+       if (!$tfoot.length) $tfoot = $('<tfoot/>').appendTo($table);
+       $total.appendTo($tfoot);
+     }
+
+     // 2) Initialize DataTable with SearchPanes button
+     var FILTER_COLS = [1,2,3,4,5,6];
+     // Detect the index of the size column by header text
+     var sizeIdx = (function(){
+       var idx = -1;
+       $table.find('thead th').each(function(i){
+         var t = $(this).text().trim().toLowerCase();
+         if (t === 'size on disk' || t === 'size') idx = i;
+       });
+       return idx;
+     })();
+
+     var table = $table.DataTable({
+       dom: 'Blfrtip',
+       paging: false,
+       searching: true,
+       info: false,
+       language: {
+         search: 'Filter dataset:',
+         searchPanes: { collapse: { 0: 'Filters', _: 'Filters (%d)' } }
+       },
+       buttons: [{
+         extend: 'searchPanes',
+         text: 'Filters',
+         config: { cascadePanes: true, viewTotal: true, layout: 'columns-4', initCollapsed: false }
+       }],
+       columnDefs: (function(){
+         var defs = [
+           { searchPanes: { show: true }, targets: FILTER_COLS }
+         ];
+         if (sizeIdx !== -1) {
+           defs.push({
+             targets: sizeIdx,
+             render: function(data, type) {
+               if (type === 'sort' || type === 'type') {
+                 return parseSizeToBytes(data);
+               }
+               return data;
+             }
+           });
+         }
+         return defs;
+       })()
+     });
+
+     // 3) UX: click a header to open the relevant filter pane
+     $table.find('thead th').each(function (i) {
+       if ([1,2,3,4].indexOf(i) === -1) return;
+       $(this).css('cursor','pointer').attr('title','Click to filter this column');
+       $(this).on('click', function () {
+         table.button('.buttons-searchPanes').trigger();
+         setTimeout(function () {
+           var idx = [1,2,3,4].indexOf(i);
+           var $container = $(table.searchPanes.container());
+           var $pane = $container.find('.dtsp-pane').eq(idx);
+           var $title = $pane.find('.dtsp-title');
+           if ($title.length) $title.trigger('click');
+         }, 0);
+       });
+     });
+   });
+   </script>

From c7ea66924d3eb063e5060d92bd0bfd2c335b93f5 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 22:40:53 +0200
Subject: [PATCH 23/30] improve this

---
 docs/plot_dataset/plot_sankey.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/docs/plot_dataset/plot_sankey.py b/docs/plot_dataset/plot_sankey.py
index a6c163eb..f2506ec0 100644
--- a/docs/plot_dataset/plot_sankey.py
+++ b/docs/plot_dataset/plot_sankey.py
@@ -300,25 +300,37 @@ def generate_dataset_sankey(
     styled_html = f"""
 <style>
 #dataset-sankey {{
-    width: 100% !important;
-    max-width: 1200px;
-    height: 640px !important;
-    min-height: 640px;
+    width: min(1400px, calc(100vw - 4rem));
+    max-width: 100%;
+    height: clamp(640px, 60vw, 820px);
+    min-height: 560px;
     margin: 0 auto;
     display: none;
 }}
 #dataset-sankey.plotly-graph-div {{
     width: 100% !important;
     height: 100% !important;
+    min-height: inherit;
 }}
 .sankey-loading {{
     display: flex;
     justify-content: center;
     align-items: center;
-    height: 640px;
+    height: clamp(640px, 60vw, 820px);
     font-family: Inter, system-ui, sans-serif;
     color: #6b7280;
 }}
+
+@media (max-width: 768px) {{
+    #dataset-sankey {{
+        width: calc(100vw - 2rem);
+        height: 520px;
+        min-height: 520px;
+    }}
+    .sankey-loading {{
+        height: 520px;
+    }}
+}}
 </style>
 <div class="sankey-loading" id="sankey-loading">Loading dataset flow...</div>
 {html_content}

From 25d36fa98729cee31045d2ecf0134bb610769c9d Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 22:47:40 +0200
Subject: [PATCH 24/30] updating the title

---
 docs/source/dataset_summary/sankey.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/dataset_summary/sankey.rst b/docs/source/dataset_summary/sankey.rst
index 3403ab62..66304778 100644
--- a/docs/source/dataset_summary/sankey.rst
+++ b/docs/source/dataset_summary/sankey.rst
@@ -1,6 +1,6 @@
 .. title:: Dataset flow
 
-.. rubric:: Dataset flow
+.. rubric:: Sankey diagrams of EEGDash Datasets by Population, Modality, and Cognitive Domain
 
 .. raw:: html
 

From 4c4fdcb12e2ae978201e637a53bd361d955965e2 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 22:47:50 +0200
Subject: [PATCH 25/30] removing the css

---
 docs/plot_dataset/plot_sankey.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/plot_dataset/plot_sankey.py b/docs/plot_dataset/plot_sankey.py
index f2506ec0..511b920b 100644
--- a/docs/plot_dataset/plot_sankey.py
+++ b/docs/plot_dataset/plot_sankey.py
@@ -225,7 +225,6 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
     fig = go.Figure(sankey)
 
     fig.update_layout(
-        title_text="Sankey diagrams of EEGDash Datasets by Population, Modality, and Cognitive Domain",
         font=dict(size=14),
         margin=dict(b=100),  # Add bottom margin to make space for the note
         annotations=[

From 7d6a6a88e1510dde8f047b8477954a92b19f7ed3 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 23:05:28 +0200
Subject: [PATCH 26/30] fixing the plot sankey

---
 docs/plot_dataset/plot_sankey.py | 79 ++------------------------------
 1 file changed, 5 insertions(+), 74 deletions(-)

diff --git a/docs/plot_dataset/plot_sankey.py b/docs/plot_dataset/plot_sankey.py
index 511b920b..fb41a3a8 100644
--- a/docs/plot_dataset/plot_sankey.py
+++ b/docs/plot_dataset/plot_sankey.py
@@ -226,7 +226,10 @@ def build_sankey(df: pd.DataFrame, columns: Sequence[str]) -> go.Figure:
 
     fig.update_layout(
         font=dict(size=14),
-        margin=dict(b=100),  # Add bottom margin to make space for the note
+        height=900,
+        width=None,
+        autosize=True,
+        margin=dict(t=40, b=40, l=40, r=40),
         annotations=[
             dict(
                 x=0,
@@ -296,79 +299,7 @@ def generate_dataset_sankey(
         },
     )
 
-    styled_html = f"""
-<style>
-#dataset-sankey {{
-    width: min(1400px, calc(100vw - 4rem));
-    max-width: 100%;
-    height: clamp(640px, 60vw, 820px);
-    min-height: 560px;
-    margin: 0 auto;
-    display: none;
-}}
-#dataset-sankey.plotly-graph-div {{
-    width: 100% !important;
-    height: 100% !important;
-    min-height: inherit;
-}}
-.sankey-loading {{
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    height: clamp(640px, 60vw, 820px);
-    font-family: Inter, system-ui, sans-serif;
-    color: #6b7280;
-}}
-
-@media (max-width: 768px) {{
-    #dataset-sankey {{
-        width: calc(100vw - 2rem);
-        height: 520px;
-        min-height: 520px;
-    }}
-    .sankey-loading {{
-        height: 520px;
-    }}
-}}
-</style>
-<div class="sankey-loading" id="sankey-loading">Loading dataset flow...</div>
-{html_content}
-<script>
-document.addEventListener('DOMContentLoaded', function() {{
-    const loading = document.getElementById('sankey-loading');
-    const plot = document.getElementById('dataset-sankey');
-
-    function showPlot() {{
-        if (loading) {{
-            loading.style.display = 'none';
-        }}
-        if (plot) {{
-            plot.style.display = 'block';
-        }}
-    }}
-
-    function waitForPlot(attempts) {{
-        if (!plot) {{
-            return;
-        }}
-        if (typeof plot.on === 'function') {{
-            showPlot();
-            return;
-        }}
-        if (attempts > 40) {{
-            showPlot();
-            return;
-        }}
-        window.setTimeout(function() {{ waitForPlot(attempts + 1); }}, 80);
-    }}
-
-    waitForPlot(0);
-    window.setTimeout(showPlot, 1200);
-}});
-</script>
-"""
-
-    out_path.write_text(styled_html, encoding="utf-8")
+    out_path.write_text(html_content, encoding="utf-8")
     return out_path
 
 

From ae7abada3f16ba650fbde5756394026fbd451454 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 23:06:42 +0200
Subject: [PATCH 27/30] renaming to better categorization

---
 docs/source/dataset_summary.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst
index 63abefeb..c3ef7a33 100644
--- a/docs/source/dataset_summary.rst
+++ b/docs/source/dataset_summary.rst
@@ -25,7 +25,7 @@ To leverage recent and ongoing advancements in large-scale computational methods
 
       .. include:: dataset_summary/table.rst
 
-   .. tab-item:: Participant KDE
+   .. tab-item:: Participant Distribution
 
       .. include:: dataset_summary/kde.rst
 
@@ -33,7 +33,7 @@ To leverage recent and ongoing advancements in large-scale computational methods
 
       .. include:: dataset_summary/sankey.rst
 
-   .. tab-item:: Landscape
+   .. tab-item:: Scatter of Sample Size vs. Recording Duration
 
       .. include:: dataset_summary/bubble.rst
 

From 2102e2c0fb724c0ed158ce83854828812d273dad Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 23:09:52 +0200
Subject: [PATCH 28/30] small note

---
 docs/source/index.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 01b8e41d..632007c6 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -20,11 +20,14 @@ EEG Dash Homepage
 
 
 .. rst-class:: h4 text-center font-weight-light my-4
-
 The EEG-DaSh data archive will establish a data-sharing resource for MEEG (EEG, MEG) data, enabling 
 large-scale computational advancements to preserve and share scientific data from publicly funded 
 research for machine learning and deep learning applications.
 
+.. rst-class:: text-center
+
+**Note:** The "DaSh" in EEG-DaSh stands for **Data Share**.
+
 The EEG-DaSh data archive is a collaborative effort led by the University of California, San Diego (UCSD) and Ben-Gurion University of the Negev (BGU) and partially funded by the National Science Foundation (NSF). All are welcome to contribute to the https://github.com/sccn/EEGDash project.
 
 The archive is currently still in :bdg-danger:`beta testing` mode, so be kind. 

From ad00d264ff8fa8d08b561dae6705dea19c1a5414 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 23:18:57 +0200
Subject: [PATCH 29/30] updating the table

---
 docs/prepare_summary_tables.py        | 161 +++++++++++++++++++++++++-
 docs/source/dataset_summary/table.rst | 149 ------------------------
 2 files changed, 160 insertions(+), 150 deletions(-)

diff --git a/docs/prepare_summary_tables.py b/docs/prepare_summary_tables.py
index e1980c73..033824f3 100644
--- a/docs/prepare_summary_tables.py
+++ b/docs/prepare_summary_tables.py
@@ -1,4 +1,5 @@
 import glob
+import textwrap
 from argparse import ArgumentParser
 from datetime import datetime
 from pathlib import Path
@@ -53,6 +54,163 @@ def wrap_dataset_name(name: str):
     },
 }
 
+DATA_TABLE_TEMPLATE = textwrap.dedent(
+    r"""
+<!-- jQuery + DataTables core -->
+<script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
+<link rel="stylesheet" href="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.css"/>
+<script src="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.js"></script>
+
+<!-- Buttons + SearchPanes (+ Select required by SearchPanes) -->
+<link rel="stylesheet" href="https://cdn.datatables.net/buttons/2.4.2/css/buttons.dataTables.min.css">
+<script src="https://cdn.datatables.net/buttons/2.4.2/js/dataTables.buttons.min.js"></script>
+<link rel="stylesheet" href="https://cdn.datatables.net/select/1.7.0/css/select.dataTables.min.css">
+<link rel="stylesheet" href="https://cdn.datatables.net/searchpanes/2.3.1/css/searchPanes.dataTables.min.css">
+<script src="https://cdn.datatables.net/select/1.7.0/js/dataTables.select.min.js"></script>
+<script src="https://cdn.datatables.net/searchpanes/2.3.1/js/dataTables.searchPanes.min.js"></script>
+
+<style>
+    /* Styling for the Total row (placed in tfoot) */
+    table.sd-table tfoot td {
+        font-weight: 600;
+        border-top: 2px solid rgba(0,0,0,0.2);
+        background: #f9fafb;
+        /* Match body cell padding to keep perfect alignment */
+        padding: 8px 10px !important;
+        vertical-align: middle;
+    }
+
+    /* Right-align numeric-like columns (2..8) consistently for body & footer */
+    table.sd-table tbody td:nth-child(n+2),
+    table.sd-table tfoot td:nth-child(n+2) {
+        text-align: right;
+    }
+    /* Keep first column (Dataset/Total) left-aligned */
+    table.sd-table tbody td:first-child,
+    table.sd-table tfoot td:first-child {
+        text-align: left;
+    }
+</style>
+
+<TABLE_HTML>
+
+<script>
+// Helper: robustly extract values for SearchPanes when needed
+function tagsArrayFromHtml(html) {
+    if (html == null) return [];
+    // If it's numeric or plain text, just return as a single value
+    if (typeof html === 'number') return [String(html)];
+    if (typeof html === 'string' && html.indexOf('<') === -1) return [html.trim()];
+    // Else parse any .tag elements inside HTML
+    const tmp = document.createElement('div');
+    tmp.innerHTML = html;
+    const tags = Array.from(tmp.querySelectorAll('.tag')).map(function(el){
+        return (el.textContent || '').trim();
+    });
+    return tags.length ? tags : [tmp.textContent.trim()];
+}
+
+// Helper: parse human-readable sizes like "4.31 GB" into bytes (number)
+function parseSizeToBytes(text) {
+    if (!text) return 0;
+    const s = String(text).trim();
+    const m = s.match(/([\d,.]+)\s*(TB|GB|MB|KB|B)/i);
+    if (!m) return 0;
+    const value = parseFloat(m[1].replace(/,/g, ''));
+    const unit = m[2].toUpperCase();
+    const factor = { B:1, KB:1024, MB:1024**2, GB:1024**3, TB:1024**4 }[unit] || 1;
+    return value * factor;
+}
+
+document.addEventListener('DOMContentLoaded', function () {
+    const table = document.getElementById('datasets-table');
+    if (!table || !window.jQuery || !window.jQuery.fn || !window.jQuery.fn.DataTable) {
+        return;
+    }
+
+    const $table = window.jQuery(table);
+    if (window.jQuery.fn.DataTable.isDataTable(table)) {
+        return;
+    }
+
+    // 1) Move the "Total" row into <tfoot> so sorting/filtering never moves it
+    const $tbody = $table.find('tbody');
+    const $total = $tbody.find('tr').filter(function(){
+        return window.jQuery(this).find('td').eq(0).text().trim() === 'Total';
+    });
+    if ($total.length) {
+        let $tfoot = $table.find('tfoot');
+        if (!$tfoot.length) $tfoot = window.jQuery('<tfoot/>').appendTo($table);
+        $total.appendTo($tfoot);
+    }
+
+    // 2) Initialize DataTable with SearchPanes button
+    const FILTER_COLS = [1,2,3,4,5,6];
+    // Detect the index of the size column by header text
+    const sizeIdx = (function(){
+        let idx = -1;
+        $table.find('thead th').each(function(i){
+            const t = window.jQuery(this).text().trim().toLowerCase();
+            if (t === 'size on disk' || t === 'size') idx = i;
+        });
+        return idx;
+    })();
+
+    const dataTable = $table.DataTable({
+        dom: 'Blfrtip',
+        paging: false,
+        searching: true,
+        info: false,
+        language: {
+            search: 'Filter dataset:',
+            searchPanes: { collapse: { 0: 'Filters', _: 'Filters (%d)' } }
+        },
+        buttons: [{
+            extend: 'searchPanes',
+            text: 'Filters',
+            config: { cascadePanes: true, viewTotal: true, layout: 'columns-4', initCollapsed: false }
+        }],
+        columnDefs: (function(){
+            const defs = [
+                { searchPanes: { show: true }, targets: FILTER_COLS }
+            ];
+            if (sizeIdx !== -1) {
+                defs.push({
+                    targets: sizeIdx,
+                    render: function(data, type) {
+                        if (type === 'sort' || type === 'type') {
+                            return parseSizeToBytes(data);
+                        }
+                        return data;
+                    }
+                });
+            }
+            return defs;
+        })()
+    });
+
+    // 3) UX: click a header to open the relevant filter pane
+    $table.find('thead th').each(function (i) {
+        if ([1,2,3,4].indexOf(i) === -1) return;
+        window.jQuery(this)
+            .css('cursor','pointer')
+            .attr('title','Click to filter this column')
+            .on('click', function () {
+                dataTable.button('.buttons-searchPanes').trigger();
+                window.setTimeout(function () {
+                    const idx = [1,2,3,4].indexOf(i);
+                    const $container = window.jQuery(dataTable.searchPanes.container());
+                    const $pane = $container.find('.dtsp-pane').eq(idx);
+                    const $title = $pane.find('.dtsp-title');
+                    if ($title.length) $title.trigger('click');
+                }, 0);
+            });
+    });
+});
+</script>
+"""
+)
+
 
 def _tag_normalizer(kind: str):
     canonical = {k.lower(): v for k, v in DATASET_CANONICAL_MAP.get(kind, {}).items()}
@@ -226,8 +384,9 @@ def main(source_dir: str, target_dir: str):
             escape=False,
             table_id="datasets-table",
         )
+        html_table = DATA_TABLE_TEMPLATE.replace("<TABLE_HTML>", html_table)
         table_path = target_dir / "dataset_summary_table.html"
-        with open(table_path, "+w", encoding="utf-8") as f:
+        with open(table_path, "w", encoding="utf-8") as f:
             f.write(html_table)
         copyfile(table_path, STATIC_DATASET_DIR / table_path.name)
 
diff --git a/docs/source/dataset_summary/table.rst b/docs/source/dataset_summary/table.rst
index 542a87b5..b409b575 100644
--- a/docs/source/dataset_summary/table.rst
+++ b/docs/source/dataset_summary/table.rst
@@ -25,152 +25,3 @@ In addition, EEG-DaSh will incorporate a subset of the data converted from `NEMA
    </figure>
 
 Pathology, modality, and dataset type now surface as consistent color-coded tags so you can scan the table at a glance.
-
-.. raw:: html
-
-   <!-- jQuery + DataTables core -->
-   <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
-   <link rel="stylesheet" href="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.css"/>
-   <script src="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.js"></script>
-
-   <!-- Buttons + SearchPanes (+ Select required by SearchPanes) -->
-   <link rel="stylesheet" href="https://cdn.datatables.net/buttons/2.4.2/css/buttons.dataTables.min.css">
-   <script src="https://cdn.datatables.net/buttons/2.4.2/js/dataTables.buttons.min.js"></script>
-   <link rel="stylesheet" href="https://cdn.datatables.net/select/1.7.0/css/select.dataTables.min.css">
-   <link rel="stylesheet" href="https://cdn.datatables.net/searchpanes/2.3.1/css/searchPanes.dataTables.min.css">
-   <script src="https://cdn.datatables.net/select/1.7.0/js/dataTables.select.min.js"></script>
-   <script src="https://cdn.datatables.net/searchpanes/2.3.1/js/dataTables.searchPanes.min.js"></script>
-
-   <style>
-     /* Styling for the Total row (placed in tfoot) */
-     table.sd-table tfoot td {
-       font-weight: 600;
-       border-top: 2px solid rgba(0,0,0,0.2);
-       background: #f9fafb;
-       /* Match body cell padding to keep perfect alignment */
-       padding: 8px 10px !important;
-       vertical-align: middle;
-     }
-
-     /* Right-align numeric-like columns (2..8) consistently for body & footer */
-     table.sd-table tbody td:nth-child(n+2),
-     table.sd-table tfoot td:nth-child(n+2) {
-       text-align: right;
-     }
-     /* Keep first column (Dataset/Total) left-aligned */
-     table.sd-table tbody td:first-child,
-     table.sd-table tfoot td:first-child {
-       text-align: left;
-     }
-   </style>
-
-   <script>
-   // Helper: robustly extract values for SearchPanes when needed
-   function tagsArrayFromHtml(html) {
-     if (html == null) return [];
-     // If it's numeric or plain text, just return as a single value
-     if (typeof html === 'number') return [String(html)];
-     if (typeof html === 'string' && html.indexOf('<') === -1) return [html.trim()];
-     // Else parse any .tag elements inside HTML
-     var tmp = document.createElement('div');
-     tmp.innerHTML = html;
-     var tags = Array.from(tmp.querySelectorAll('.tag')).map(function(el){
-       return (el.textContent || '').trim();
-     });
-     return tags.length ? tags : [tmp.textContent.trim()];
-   }
-
-   // Helper: parse human-readable sizes like "4.31 GB" into bytes (number)
-   function parseSizeToBytes(text) {
-     if (!text) return 0;
-     var s = String(text).trim();
-     var m = s.match(/([\d,.]+)\s*(TB|GB|MB|KB|B)/i);
-     if (!m) return 0;
-     var value = parseFloat(m[1].replace(/,/g, ''));
-     var unit = m[2].toUpperCase();
-     var factor = { B:1, KB:1024, MB:1024**2, GB:1024**3, TB:1024**4 }[unit] || 1;
-     return value * factor;
-   }
-
-   $(function () {
-     var $table = $('#datasets-table');
-     if (!$table.length) {
-       return;
-     }
-     if ($.fn.DataTable && $.fn.DataTable.isDataTable($table[0])) {
-       return;
-     }
-
-     // 1) Move the "Total" row into <tfoot> so sorting/filtering never moves it
-     var $tbody = $table.find('tbody');
-     var $total = $tbody.find('tr').filter(function(){
-       return $(this).find('td').eq(0).text().trim() === 'Total';
-     });
-     if ($total.length) {
-       var $tfoot = $table.find('tfoot');
-       if (!$tfoot.length) $tfoot = $('<tfoot/>').appendTo($table);
-       $total.appendTo($tfoot);
-     }
-
-     // 2) Initialize DataTable with SearchPanes button
-     var FILTER_COLS = [1,2,3,4,5,6];
-     // Detect the index of the size column by header text
-     var sizeIdx = (function(){
-       var idx = -1;
-       $table.find('thead th').each(function(i){
-         var t = $(this).text().trim().toLowerCase();
-         if (t === 'size on disk' || t === 'size') idx = i;
-       });
-       return idx;
-     })();
-
-     var table = $table.DataTable({
-       dom: 'Blfrtip',
-       paging: false,
-       searching: true,
-       info: false,
-       language: {
-         search: 'Filter dataset:',
-         searchPanes: { collapse: { 0: 'Filters', _: 'Filters (%d)' } }
-       },
-       buttons: [{
-         extend: 'searchPanes',
-         text: 'Filters',
-         config: { cascadePanes: true, viewTotal: true, layout: 'columns-4', initCollapsed: false }
-       }],
-       columnDefs: (function(){
-         var defs = [
-           { searchPanes: { show: true }, targets: FILTER_COLS }
-         ];
-         if (sizeIdx !== -1) {
-           defs.push({
-             targets: sizeIdx,
-             render: function(data, type) {
-               if (type === 'sort' || type === 'type') {
-                 return parseSizeToBytes(data);
-               }
-               return data;
-             }
-           });
-         }
-         return defs;
-       })()
-     });
-
-     // 3) UX: click a header to open the relevant filter pane
-     $table.find('thead th').each(function (i) {
-       if ([1,2,3,4].indexOf(i) === -1) return;
-       $(this).css('cursor','pointer').attr('title','Click to filter this column');
-       $(this).on('click', function () {
-         table.button('.buttons-searchPanes').trigger();
-         setTimeout(function () {
-           var idx = [1,2,3,4].indexOf(i);
-           var $container = $(table.searchPanes.container());
-           var $pane = $container.find('.dtsp-pane').eq(idx);
-           var $title = $pane.find('.dtsp-title');
-           if ($title.length) $title.trigger('click');
-         }, 0);
-       });
-     });
-   });
-   </script>

From 638bd3d8b37ebe3cc2966634aa5750411228fe02 Mon Sep 17 00:00:00 2001
From: bruAristimunha <b.aristimunha@gmail.com>
Date: Wed, 1 Oct 2025 23:25:43 +0200
Subject: [PATCH 30/30] removing relative path

---
 docs/plot_dataset/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/plot_dataset/utils.py b/docs/plot_dataset/utils.py
index fbce6502..2a518d69 100644
--- a/docs/plot_dataset/utils.py
+++ b/docs/plot_dataset/utils.py
@@ -99,7 +99,7 @@ def get_dataset_url(name: str) -> str:
     text = str(name).strip()
     if not text:
         return ""
-    return f"../../api/dataset/eegdash.dataset.{text.upper()}.html"
+    return f"api/dataset/eegdash.dataset.{text.upper()}.html"
 
 
 def ensure_directory(path: str | Path) -> Path: