This notebook reproduces the **Nuclear tracking and motion analysis along the radial axis** described in the manuscript.  

## Analysis Workflow
- **Preprocessing**  
  - Sample drift is corrected with MultiStackReg.
  - Nuclear radial trajectories are extracted in Fiji using the Manual Track plugin.  
  - CSV files are imported, headers cleaned, and tracks sorted by time.  
  - Derived columns include:
    - `time` (frame index × interval in minutes)
    - `index` (track identifier)
    - Interval displacements (relative to the previous frame)
    - Instantaneous speeds  

- **Filtering**  
  - **Radial fluctuations**: keep only trajectories ≥150 min; truncate to the first 150 min.  
  - **Basal migrations**: i.e., **migration from apical surface**; keep only trajectories ≥40 min; truncate to the first 40 min.  

- **Quantifications**  
  - **Mean speed**: calculated as the total path length (sum of interval displacements) divided by total duration.  
  - **Net displacement**: displacement vectors relative to the initial position (t=0),projected onto the radial axis.

- **Variability Analysis**  
  - Net displacement traces are fit with linear regression.  
  - Root-mean-square error (RMSE) of residuals is computed to quantify fluctuation variability for both radial fluctuations and basal migrations.  

- **Outputs**  
  - Filtered and truncated trajectory tables (Excel).  
  - Net displacement, mean speed, instantaneous speed
  - Net displacement traces with regression overlays.  
  - RMSE values per nucleus (Excel summary + plots).  

In [None]:
# imports
import pandas as pd
import math
import numpy as np
import os
import seaborn as sns
from glob import glob
from pathlib import Path
import scipy.stats as stats
import itertools
from sklearn.linear_model import LinearRegression
from matplotlib.cm import get_cmap
import matplotlib.pyplot as plt
from statistics import mean 

# Section1: Below is to calculate the net displacement from the initial position

!!!!! NOTE that initial position refers to apical point, net displacement refers to the 'DistanceToApical' in the script

NOTE that the blocks under Section1 applies to both nuclear radial fluctuations and basal migrations

In [None]:
# === read data 
file_path = "input_folder_name" ## data structure: file_path {condition} >> cvs. file 
os.listdir(file_path)

In [None]:
pixel = 0.5303 # image resolution

In [None]:
for f in os.listdir(file_path):
    file_extension = os.path.splitext(f)[-1]
    file_name = os.path.splitext(f)[0]
    if file_extension == '.csv':
        # Read the CSV data
        data = pd.read_csv(file_path + f, encoding='utf-8', encoding_errors="ignore")
        
        # Rename columns to avoid special characters
        data.columns = ["Track n", "Slice", "X", "Y", "Distance", "Velocity", "Pixel_Value"]
        df = data[["Track n", "X", "Y", "Distance", "Velocity"]]

        # Group by 'Track n'
        grouped_data = df.groupby('Track n')
        
        # Create a new column named 'DistanceToApical'
        df['DistanceToApical'] = np.nan
        h_list = []
        
        # Calculate distance from the apical point 
        for name, group in grouped_data:
            first_row = group.iloc[0]
            distances = np.sqrt((group['X'] - first_row['X'])**2 + (group['Y'] - first_row['Y'])**2)
            distances = round(distances * pixel, 2)
            
            df.loc[df['Track n'] == name, 'DistanceToApical'] = distances.tolist()

        # Remove the first three rows in each group: first 2 points are just to label the apical and basal surface
        df = df.groupby('Track n').apply(lambda group: group.iloc[3:]).reset_index(drop=True)
        df['Time'] = df.groupby('Track n').cumcount() * 10
        
        # Save 
        grouped_data_df['DistanceToApical'].to_excel(f"{file_path}/{file_name}_distance_to_apical.xlsx")

# Section2:Below is for nuclear **radial fluctuations**

### Step 1: read data 

In [None]:
BASE_FOLDER = "input-folder-name"
output_folder_path = 'output-folder-name'
N_POINTS = 16   # read only the first 16 time points (i.e., 150-min trajectory)

DEFAULT_DT_MIN = 10.0  

OUT_SUMMARY_XLSX = "metrics_summary.xlsx"
OUT_SPEED_TS_XLSX = "instantaneous_speeds.xlsx"

### Step 2: data processing

In [None]:
def coerce_numeric(s):
    return pd.to_numeric(s, errors="coerce")

def infer_dt_minutes(time_col):
    """Infer dt (in minutes) from the median spacing of time stamps."""
    t = coerce_numeric(time_col).dropna().to_numpy()
    if t.size < 2:
        return DEFAULT_DT_MIN
    diffs = np.diff(np.sort(t))
    dt = np.median(diffs) if diffs.size else DEFAULT_DT_MIN
    if not np.isfinite(dt) or dt <= 0:
        return DEFAULT_DT_MIN
    return float(dt)

def make_index_final(idx_val, track_val):
    return f"Index={idx_val}|Track={track_val}"

def process_group_firstN(g, file_name, sheet_name, idx_val, track_val, N=N_POINTS):
    """
    Clean group, enforce first N time points, compute metrics.
    Returns (summary_dict, timeseries_df) or (None, None) if < N rows.
    """

    df = g.copy()
    if "Time" not in df or "Distance" not in df:
        return None, None

    df["Time"] = coerce_numeric(df["Time"])
    df["Distance"] = coerce_numeric(df["Distance"])  # signed Δx per interval (µm)
    df = df.dropna(subset=["Time", "Distance"]).sort_values("Time")

    if len(df) < N:
        return None, None

    # Keep only the first N time points
    dfN = df.head(N).copy()
    index_final = make_index_final(idx_val, track_val)
    dfN["index-final"] = index_final

    # Infer dt using the selected N rows
    dt_min = infer_dt_minutes(dfN["Time"])

    # Instantaneous speed (µm/min): |Δx| / dt
    delta_x = dfN["Distance"].to_numpy()
    v = np.abs(delta_x) / dt_min

    # Path length & mean speed over these N intervals
    path_length_um = float(np.sum(np.abs(delta_x)))
    total_time_min = float(len(dfN) * dt_min)
    mean_speed = path_length_um / total_time_min if total_time_min > 0 else np.nan


    # Summary row (per track)
    summary = dict(
        file=file_name,
        sheet=sheet_name,
        Index=idx_val,
        Track_n=track_val,
        index_final=index_final,
        n_timepoints_used=n,
        dt_minutes=dt_min,
        total_time_minutes=total_time_min,
        path_length_um=path_length_um,
        mean_speed_um_per_min=mean_speed,
     
    )

    # Per-interval time series (use [t_start, t_end] for clarity)
    t_end = dfN["Time"].to_numpy()
    t_start = t_end - dt_min

    ts = pd.DataFrame({
        "file": file_name,
        "sheet": sheet_name,
        "Index": idx_val,
        "Track_n": track_val,
        "index-final": index_final,
        "t_start_min": t_start,
        "t_end_min": t_end,
        "delta_x_um": delta_x,
        "inst_speed_um_per_min": v,
    })

    return summary, ts

def run_pipeline():
    all_summaries = []
    all_ts = []

    xlsx_paths = glob.glob(os.path.join(BASE_FOLDER, "*.xlsx"))
    if not xlsx_paths:
        print(f"[warn] No .xlsx files found in {BASE_FOLDER}")
        return

    for filepath in xlsx_paths:
        file_name = Path(filepath).name
        try:
            xls = pd.ExcelFile(filepath)
        except Exception as e:
            print(f"[skip] Could not open {file_name}: {e}")
            continue

        for sheet_name in xls.sheet_names:
            try:
                df = xls.parse(sheet_name)
            except Exception as e:
                print(f"[skip] {file_name} / {sheet_name}: {e}")
                continue

            # Check required columns
            required = {"Index", "Track n", "Time", "Distance"}
            missing = required - set(df.columns)
            if missing:
                print(f"[skip] {file_name} / {sheet_name}: missing columns {missing}")
                continue

            # Group by droplet (Index × Track n)
            groups = df.groupby(["Index", "Track n"], dropna=False)
            for (idx_val, track_val), g in groups:
                summary, ts = process_group_firstN(
                    g, file_name, sheet_name, idx_val, track_val, N=N_POINTS
                )
                if summary is None:
                    continue
                all_summaries.append(summary)
                all_ts.append(ts)

    # Collate and write Excel outputs
    if all_summaries:
        summary_df = pd.DataFrame(all_summaries).sort_values(
            ["file", "sheet", "Index", "Track_n"]
        )
        out_path = os.path.join(BASE_FOLDER, OUT_SUMMARY_XLSX)
        with pd.ExcelWriter(out_path, engine="xlsxwriter") as writer:
            summary_df.to_excel(writer, index=False, sheet_name="summary")
        print(f"[ok] wrote {OUT_SUMMARY_XLSX} with {len(summary_df)} tracks")
    else:
        print("[warn] No qualifying tracks (>= 15 time points). No summary written.")

    if all_ts:
        ts_df = pd.concat(all_ts, ignore_index=True).sort_values(
            ["file", "sheet", "Index", "Track_n", "t_end_min"]
        )
        out_path = os.path.join(BASE_FOLDER, OUT_SPEED_TS_XLSX)
        with pd.ExcelWriter(out_path, engine="xlsxwriter") as writer:
            ts_df.to_excel(writer, index=False, sheet_name="instantaneous")
        print(f"[ok] wrote {OUT_SPEED_TS_XLSX} with {len(ts_df)} intervals")
    else:
        print("[warn] No timeseries written (no groups with >= 15 points).")

if __name__ == "__main__":
    run_pipeline()


### Step 3: visualize instantaneous speed

In [None]:
# ==== LOAD DATA ====
INSTANT_XLSX = OUT_SPEED_TS_XLSX   # excel generated from the cell above
SHEET_NAME = "instantaneous"  
xlsx_path = os.path.join(BASE_FOLDER, INSTANT_XLSX)
df = pd.read_excel(xlsx_path, sheet_name=SHEET_NAME)         

In [None]:
# ==== CONFIG ====
POINT_SIZE = 25
LINE_WIDTH = 1
ALPHA_LINE = 0.9
ALPHA_SCAT = 0.75
CMAP_NAME = "tab20"                         
LEGEND_OUTSIDE = True                       
GROUP_BY_SHEET = False       

In [None]:
# ensure numeric & clean
for col in ["t_end_min", "inst_speed_um_per_min"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")
df = df.dropna(subset=["file", "index-final", "t_end_min", "inst_speed_um_per_min"])

# helper: nice title
def pretty_title(file_name, sheet_name=None):
    title = f"Instantaneous speed (line + scatter)\n{file_name}"
    if sheet_name:
        title += f"  |  sheet: {sheet_name}"
    return title

# helper: color iterator per plot
def color_cycle(n, cmap_name=CMAP_NAME):
    cmap = plt.get_cmap(cmap_name)
    return [cmap(i % cmap.N) for i in range(n)]

# ==== PLOTTING ====
out_dir = BASE_FOLDER
files = df["file"].unique()

for file_name in files:
    df_file = df[df["file"] == file_name]

    sheet_groups = [("", df_file)] if not GROUP_BY_SHEET else df_file.groupby("sheet", dropna=False)

    for sheet_name, df_subset in sheet_groups:
        tracks = list(df_subset["index-final"].unique())
        if len(tracks) == 0:
            continue

        colors = color_cycle(len(tracks))


        plt.figure(figsize=(5, 4))

        for color, idx_final in zip(colors, tracks):
            g = df_subset[df_subset["index-final"] == idx_final].copy()
            g = g.sort_values("t_end_min")

            x = g["t_end_min"].to_numpy()
            y = g["inst_speed_um_per_min"].to_numpy()

            plt.plot(
                x, y, color=color, lw=LINE_WIDTH, alpha=ALPHA_LINE
            )
            plt.scatter(
                x, y, color=color, s=POINT_SIZE, alpha=ALPHA_SCAT, edgecolors="none"
            )

        plt.xlabel("Time (min)")
        plt.ylabel("Instantaneous speed (µm/min)")
        ttl = pretty_title(file_name, sheet_name if GROUP_BY_SHEET else None)
        plt.title(ttl)


        xmin, xmax = np.nanmin(df_subset["t_end_min"]), np.nanmax(df_subset["t_end_min"])
        if np.isfinite(xmin) and np.isfinite(xmax) and xmin != xmax:
            pad = 0.02 * (xmax - xmin)
            plt.xlim(xmin - pad, xmax + pad)

        stem = Path(file_name).stem
        if GROUP_BY_SHEET:
            sheet_label = str(sheet_name).replace("/", "_")
            out_pdf = os.path.join(out_dir, f"inst_speed_{stem}__{sheet_label}.pdf")
        else:
            out_pdf = os.path.join(out_dir, f"inst_speed_{stem}.pdf")

        plt.savefig(out_pdf, dpi=300)
        # plt.close()

        print(f"[ok] saved {out_pdf}")

### Step 4: fit the net displacement with linear regression and compute the RMSE

In [None]:
OUT_SUMMARY_XLSX = "rmse_regression-radial-fluctuations-summary.xlsx"

In [None]:
# ==== HELPERS ====
def coerce_numeric(s):
    return pd.to_numeric(s, errors="coerce")

def make_index_final(idx_val, track_val):
    return f"Index={idx_val}|Track={track_val}"

def process_group_rmse_regression(g, file_name, sheet_name, idx_val, track_val, N=N_POINTS):
    """
    For one droplet track:
    - Require at least N points
    - Keep only first N time points
    - Fit linear regression
    - Compute RMSE of residuals
    """
    if not {"Time", "Normalized_DistanceToApical"} <= set(g.columns):
        return None

    df = g.copy()
    df["Time"] = coerce_numeric(df["Time"])
    df["Normalized_DistanceToApical"] = coerce_numeric(df["Normalized_DistanceToApical"])
    df = df.dropna(subset=["Time", "Normalized_DistanceToApical"]).sort_values("Time")

    if len(df) < N:
        return None  # skip if not enough points

    # restrict to first N time points
    dfN = df.head(N).copy()

    # regression input
    X = dfN["Time"].to_numpy().reshape(-1, 1)
    y = dfN["Normalized_DistanceToApical"].to_numpy()

    # fit regression
    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)

    # residuals & RMSE
    residuals = y - y_pred
    rmse = float(np.sqrt(np.mean(residuals**2)))

    return dict(
        file=file_name,
        sheet=sheet_name,
        Index=idx_val,
        Track_n=track_val,
        index_final=make_index_final(idx_val, track_val),
        n_points_used=len(dfN),
        slope=model.coef_[0],
        intercept=model.intercept_,
        rmse_regression=rmse
    )

# ==== MAIN ====
def run_pipeline():
    results = []

    xlsx_paths = glob.glob(os.path.join(BASE_FOLDER, "*.xlsx"))
    if not xlsx_paths:
        print(f"[warn] No .xlsx files found in {BASE_FOLDER}")
        return

    for filepath in xlsx_paths:
        file_name = Path(filepath).name
        try:
            xls = pd.ExcelFile(filepath)
        except Exception as e:
            print(f"[skip] Could not open {file_name}: {e}")
            continue

        for sheet_name in xls.sheet_names:
            try:
                df = xls.parse(sheet_name)
            except Exception as e:
                print(f"[skip] {file_name} / {sheet_name}: {e}")
                continue

            required = {"Index", "Track n", "Time", "Normalized_DistanceToApical"}
            missing = required - set(df.columns)
            if missing:
                print(f"[skip] {file_name} / {sheet_name}: missing {missing}")
                continue

            # group by droplet track
            groups = df.groupby(["Index", "Track n"], dropna=False)
            for (idx_val, track_val), g in groups:
                summary = process_group_rmse_regression(
                    g, file_name, sheet_name, idx_val, track_val, N=N_POINTS
                )
                if summary is not None:
                    results.append(summary)

    # write output
    if results:
        summary_df = pd.DataFrame(results).sort_values(
            ["file", "sheet", "Index", "Track_n"]
        )
        out_path = os.path.join(BASE_FOLDER, OUT_SUMMARY_XLSX)
        summary_df.to_excel(out_path, index=False, sheet_name="rmse_regression")
        print(f"[ok] wrote {OUT_SUMMARY_XLSX} with {len(summary_df)} tracks")
    else:
        print("[warn] No groups processed.")

if __name__ == "__main__":
    run_pipeline()

# Section3:Below is for nuclear **basal migrations**

### Step 1: read data 

In [None]:
BASE_FOLDER = "input-folder-name"
output_folder_path = 'output-folder-name'
N_POINTS = 5   # read only the first 5 time points (i.e., 40-min trajectory)

DEFAULT_DT_MIN = 10.0  

OUT_SUMMARY_XLSX = "metrics_summary.xlsx"
OUT_SPEED_TS_XLSX = "instantaneous_speeds.xlsx"

### Step 2: data processing

In [None]:
def coerce_numeric(s):
    return pd.to_numeric(s, errors="coerce")

def infer_dt_minutes(time_col):
    """Infer dt (in minutes) from the median spacing of time stamps."""
    t = coerce_numeric(time_col).dropna().to_numpy()
    if t.size < 2:
        return DEFAULT_DT_MIN
    diffs = np.diff(np.sort(t))
    dt = np.median(diffs) if diffs.size else DEFAULT_DT_MIN
    if not np.isfinite(dt) or dt <= 0:
        return DEFAULT_DT_MIN
    return float(dt)

def make_index_final(idx_val, track_val):
    return f"Index={idx_val}|Track={track_val}"

def process_group_firstN(g, file_name, sheet_name, idx_val, track_val, N=N_POINTS):
    """
    Clean group, enforce first N time points, compute metrics.
    Returns (summary_dict, timeseries_df) or (None, None) if < N rows.
    """

    df = g.copy()
    if "Time" not in df or "Distance" not in df:
        return None, None

    df["Time"] = coerce_numeric(df["Time"])
    df["Distance"] = coerce_numeric(df["Distance"])  # signed Δx per interval (µm)
    df = df.dropna(subset=["Time", "Distance"]).sort_values("Time")

    if len(df) < N:
        return None, None

    # Keep only the first N time points
    dfN = df.head(N).copy()
    index_final = make_index_final(idx_val, track_val)
    dfN["index-final"] = index_final

    # Infer dt using the selected N rows
    dt_min = infer_dt_minutes(dfN["Time"])

    # Instantaneous speed (µm/min): |Δx| / dt
    delta_x = dfN["Distance"].to_numpy()
    v = np.abs(delta_x) / dt_min

    # Path length & mean speed over these N intervals
    path_length_um = float(np.sum(np.abs(delta_x)))
    total_time_min = float(len(dfN) * dt_min)
    mean_speed = path_length_um / total_time_min if total_time_min > 0 else np.nan


    # Summary row (per track)
    summary = dict(
        file=file_name,
        sheet=sheet_name,
        Index=idx_val,
        Track_n=track_val,
        index_final=index_final,
        n_timepoints_used=n,
        dt_minutes=dt_min,
        total_time_minutes=total_time_min,
        path_length_um=path_length_um,
        mean_speed_um_per_min=mean_speed,
     
    )

    # Per-interval time series (use [t_start, t_end] for clarity)
    t_end = dfN["Time"].to_numpy()
    t_start = t_end - dt_min

    ts = pd.DataFrame({
        "file": file_name,
        "sheet": sheet_name,
        "Index": idx_val,
        "Track_n": track_val,
        "index-final": index_final,
        "t_start_min": t_start,
        "t_end_min": t_end,
        "delta_x_um": delta_x,
        "inst_speed_um_per_min": v,
    })

    return summary, ts

def run_pipeline():
    all_summaries = []
    all_ts = []

    xlsx_paths = glob.glob(os.path.join(BASE_FOLDER, "*.xlsx"))
    if not xlsx_paths:
        print(f"[warn] No .xlsx files found in {BASE_FOLDER}")
        return

    for filepath in xlsx_paths:
        file_name = Path(filepath).name
        try:
            xls = pd.ExcelFile(filepath)
        except Exception as e:
            print(f"[skip] Could not open {file_name}: {e}")
            continue

        for sheet_name in xls.sheet_names:
            try:
                df = xls.parse(sheet_name)
            except Exception as e:
                print(f"[skip] {file_name} / {sheet_name}: {e}")
                continue

            # Check required columns
            required = {"Index", "Track n", "Time", "Distance"}
            missing = required - set(df.columns)
            if missing:
                print(f"[skip] {file_name} / {sheet_name}: missing columns {missing}")
                continue

            # Group by droplet (Index × Track n)
            groups = df.groupby(["Index", "Track n"], dropna=False)
            for (idx_val, track_val), g in groups:
                summary, ts = process_group_firstN(
                    g, file_name, sheet_name, idx_val, track_val, N=N_POINTS
                )
                if summary is None:
                    continue
                all_summaries.append(summary)
                all_ts.append(ts)

    # Collate and write Excel outputs
    if all_summaries:
        summary_df = pd.DataFrame(all_summaries).sort_values(
            ["file", "sheet", "Index", "Track_n"]
        )
        out_path = os.path.join(BASE_FOLDER, OUT_SUMMARY_XLSX)
        with pd.ExcelWriter(out_path, engine="xlsxwriter") as writer:
            summary_df.to_excel(writer, index=False, sheet_name="summary")
        print(f"[ok] wrote {OUT_SUMMARY_XLSX} with {len(summary_df)} tracks")
    else:
        print("[warn] No qualifying tracks (>= 5 time points). No summary written.")


### Step 3: fit the net displacement with linear regression and compute the RMSE

In [None]:
OUT_SUMMARY_XLSX = "rmse_regression-basal-fluctuation-summary.xlsx"

In [None]:
# ==== HELPERS ====
def coerce_numeric(s):
    return pd.to_numeric(s, errors="coerce")

def process_group_rmse_regression(g, file_name, sheet_name, idx_val, N=N_POINTS):
    """
    For one droplet track (grouped by 'index1'):
      - Make Normalized_DistanceToApical = DistanceToApical - DistanceToApical[0]
      - Use normalised_time as x
      - Require at least N points; keep only first N by time
      - Fit y = a*x + b (numpy.polyfit)
      - RMSE = sqrt(mean((y - yhat)^2))
    Returns a summary dict or None.
    """
    required_cols = {"normalised_time", "DistanceToApical"}
    if not required_cols.issubset(g.columns):
        return None

    df = g.copy()
    df["normalised_time"] = coerce_numeric(df["normalised_time"])
    df["DistanceToApical"] = coerce_numeric(df["DistanceToApical"])
    df = df.dropna(subset=["normalised_time", "DistanceToApical"]).sort_values("normalised_time")
    if df.empty:
        return None

    # build Normalized_DistanceToApical (relative to the first observed value in time)
    first_val = df["DistanceToApical"].iloc[0]
    df["Normalized_DistanceToApical"] = df["DistanceToApical"] - first_val

    # require at least N points
    if len(df) < N:
        return None

    # keep first N by time
    dfN = df.head(N).copy()

    # regression inputs
    x = dfN["normalised_time"].to_numpy()
    y = dfN["Normalized_DistanceToApical"].to_numpy()

    # fit y ~ a*x + b using numpy (no sklearn dependency)
    # polyfit returns [slope, intercept] for deg=1
    slope, intercept = np.polyfit(x, y, 1)
    y_hat = slope * x + intercept

    # RMSE of residuals around the fitted line
    residuals = y - y_hat
    rmse = float(np.sqrt(np.mean(residuals**2)))

    return dict(
        file=file_name,
        sheet=sheet_name,
        index_final=idx_val,                 
        n_points_used=len(dfN),            
        slope=slope,
        intercept=intercept,
        rmse_regression=rmse
    )

# ==== MAIN ====
def run_pipeline():
    results = []

    xlsx_paths = glob.glob(os.path.join(BASE_FOLDER, "*.xlsx"))
    if not xlsx_paths:
        print(f"[warn] No .xlsx files found in {BASE_FOLDER}")
        return

    for filepath in xlsx_paths:
        file_name = Path(filepath).name
        try:
            xls = pd.ExcelFile(filepath)
        except Exception as e:
            print(f"[skip] Could not open {file_name}: {e}")
            continue

        for sheet_name in xls.sheet_names:
            try:
                df = xls.parse(sheet_name)
            except Exception as e:
                print(f"[skip] {file_name} / {sheet_name}: {e}")
                continue

            required = {"index1", "normalised_time", "DistanceToApical"}
            missing = required - set(df.columns)
            if missing:
                print(f"[skip] {file_name} / {sheet_name}: missing {missing}")
                continue

            # group by droplet track (index1)
            for idx_val, g in df.groupby("index1", dropna=False):
                summary = process_group_rmse_regression(
                    g, file_name, sheet_name, idx_val, N=N_POINTS
                )
                if summary is not None:
                    results.append(summary)

    # write output
    if results:
        summary_df = pd.DataFrame(results).sort_values(["file", "sheet", "index_final"])
        out_path = os.path.join(BASE_FOLDER, OUT_SUMMARY_XLSX)
        summary_df.to_excel(out_path, index=False, sheet_name="rmse_regression")
        print(f"[ok] wrote {OUT_SUMMARY_XLSX} with {len(summary_df)} tracks")
    else:
        print("[warn] No groups processed (check columns / N_POINTS threshold).")

if __name__ == "__main__":
    run_pipeline()