In [1]:
# -*- coding: utf-8 -*-
import os, re, json
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import curve_fit

# ======================================================
# ===================== CONFIG =========================
# ======================================================
HQNO_CSV   = r'C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3A_hqno_gfp\3A_HQNO_gfp_gradient_visualization.csv'
JSON_CAL   = r'C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3B_hqno_calibration\3B_hqno_calibration.json'

# Optional: filter to a subset of replicates (e.g., {2,3}) or set to None for all
REPLICATE_FILTER: set[int] | None = {4,5,7}  # e.g., {4,5,7}

# User-tunable options
intensity_col = 'median_intensity'    # column to fit (in HQNO_CSV)
bin_size = 50/30                      # µm-per-bin for x-axis scaling
figsize = (5, 4)
plot_each = False                     # True → save per-chamber diagnostic plots

# Physical constraint: L must be beyond the last fitted bin by at least this many bins
L_margin_bins = 1.0

# Sweep settings
SWEEP_CUTOFFS = [0, 1, 2, 3, 4, 5, 6]
OUT_ROOT = Path('hqno_cutoff_sweep').resolve()
OUT_ROOT.mkdir(exist_ok=True, parents=True)

# ======================================================
# ===================== HELPERS ========================
# ======================================================

def ensure_dir(p: Path) -> Path:
    p.mkdir(exist_ok=True, parents=True)
    return p


def load_calibration(json_path: str | Path):
    with open(json_path, 'r', encoding='utf-8') as f:
        cal = json.load(f)
    a = float(cal['params']['a'])
    c = float(cal['params']['c'])
    b = float(cal['params']['b'])
    return a, c, b


def uptake_diffusion_sinh(x, c_max, L, A):
    """
    Numerically stable uptake–diffusion profile WITHOUT offset:
        c(x) = c_max * sinh(sqrt(A)*(L - x)) / sinh(sqrt(A)*L)
    """
    sA = np.sqrt(A)
    return c_max * np.sinh(sA * (L - x)) / np.sinh(sA * L)


def hill_inverse(y, a, c, b):
    """Inverse Hill: c * (y / (a - y))**(1/b)"""
    return c * (y / (a - y))**(1/b)


def propagate_tail_nans_after_second(x, arrays):
    """
    Given x (y_bin) and a list of arrays (e.g., [intensity_bg, concentration]),
    find the first index where the *second* NaN occurs (consider NaN in ANY array).
    NaN-out all entries at strictly larger x in all arrays. Returns new arrays.
    """
    x = np.asarray(x)
    order = np.argsort(x)
    nan_count = 0
    cut_x = None
    for k in order:
        if any(not np.isfinite(a[k]) for a in arrays):
            nan_count += 1
            if nan_count >= 2:
                cut_x = x[k]
                break
    if cut_x is None:
        return arrays
    tail_mask = x > cut_x
    out = []
    for a in arrays:
        a2 = a.copy()
        a2[tail_mask] = np.nan
        out.append(a2)
    return out


def extract_repl_num(x):
    """Parse replicate number from values like 4, '4', 'R4', 'rep4', etc."""
    if pd.isna(x):
        return np.nan
    m = re.search(r'\d+', str(x))
    return int(m.group()) if m else np.nan

# ======================================================
# ================= CORE FITTER ========================
# ======================================================

def run_hqno_fit_for_cutoff(
    cut_off: int,
    hqno_csv: str | Path,
    calibration_json: str | Path,
    out_dir: Path,
    *,
    intensity_col: str,
    bin_size: float | None,
    figsize=(5,4),
    plot_each=False,
    L_margin_bins=1.0,
    replicate_filter: set[int] | None = None,
):
    out_dir = ensure_dir(out_dir)
    OUT_CSV = out_dir / '3C_HQNO_concentration.csv'
    OUT_PROFILE_CSV = out_dir / '3C_HQNO_concentration_profiles.csv'

    # Load calibration (Hill)
    a_cal, c_cal, b_cal = load_calibration(calibration_json)

    # Load main data (+ optional replicate filter)
    outer = pd.read_csv(hqno_csv)
    outer['replicate_num'] = outer['replicate'].apply(extract_repl_num)
    before_n = len(outer)
    if replicate_filter:
        outer = outer[outer['replicate_num'].isin(replicate_filter)].copy()
    after_n = len(outer)
    if after_n == 0:
        print(f"[cut_off={cut_off}] WARNING: No rows after replicate filter {sorted(replicate_filter) if replicate_filter else 'None'}")
    else:
        print(f"[cut_off={cut_off}] Rows kept: {after_n}/{before_n} (replicate filter = {sorted(replicate_filter) if replicate_filter else 'None'})")

    # unique (replicate, pos) combos to fit
    combos = outer[['replicate', 'pos']].drop_duplicates().reset_index(drop=True)

    results = []
    profile_rows = []

    for _, row in combos.iterrows():
        rep, pos = row['replicate'], row['pos']
        df_o = outer[(outer['replicate'] == rep) & (outer['pos'] == pos)].copy()

        xdata_bins = df_o['y_bin'].to_numpy(dtype=float)
        ydata_raw  = df_o[intensity_col].to_numpy(dtype=float)

        # ---------------- Background (RHL-style): per-chamber min finite bin ----------------
        valid_raw = np.isfinite(ydata_raw)
        if not np.any(valid_raw):
            BG_VALUE = np.nan
        else:
            BG_VALUE = float(np.nanmin(ydata_raw[valid_raw]))

        # Subtract background: NEGATIVES -> NaN (match your sweep style)
        y_bg_corrected = ydata_raw - BG_VALUE
        y_bg_corrected = np.where(y_bg_corrected < 0, np.nan, y_bg_corrected)

        # Inverse-Hill only where valid
        valid_inv = np.isfinite(y_bg_corrected) & (y_bg_corrected >= 0) & (y_bg_corrected < a_cal)
        ydata = np.full_like(y_bg_corrected, np.nan, dtype=float)
        if np.any(valid_inv):
            ydata[valid_inv] = hill_inverse(y_bg_corrected[valid_inv], a_cal, c_cal, b_cal)

        # After the *second* NaN, NaN-out all larger x-bins (in both arrays)
        y_bg_corrected, ydata = propagate_tail_nans_after_second(xdata_bins, [y_bg_corrected, ydata])

        # Trim by cut_off for fitting (xfit purely positional)
        if isinstance(cut_off, (int, np.integer)) and cut_off >= 0:
            xfit_bins = xdata_bins[cut_off:]
            yfit_all  = ydata[cut_off:]
        else:
            raise ValueError('cut_off must be a non-negative integer')

        # Fit with bounds: L outside chamber, c_max≥0, A>0 (NO offset)
        if xfit_bins.size == 0:
            c_max_fit = A_fit = L_fit = np.nan
            r2 = rmse = np.nan
            status = 'no_data'
            fit_at_bins = np.full_like(xdata_bins, np.nan, dtype=float)
        else:
            # Enforce L based on the chamber's x-range (not on NaN-masked points)
            last_bin_available = float(np.max(xfit_bins))
            L_lower = last_bin_available + max(1.0, float(L_margin_bins))  # strictly beyond data

            # Use only finite y for fitting
            mask_fit = np.isfinite(yfit_all)
            x_fit = xfit_bins[mask_fit]
            y_fit = yfit_all[mask_fit]

            if y_fit.size < 3:
                c_max_fit = A_fit = L_fit = np.nan
                r2 = rmse = np.nan
                status = 'too_few_points'
                fit_at_bins = np.full_like(xdata_bins, np.nan, dtype=float)
            else:
                def model_free_L(x, c_max, L, A):
                    return uptake_diffusion_sinh(x, c_max, L, A)

                # Initial guesses (inside bounds)
                yfit_max = float(np.max(y_fit))
                p0 = [
                    max(1e-9, 3.0 * yfit_max),  # c_max
                    L_lower + 1.0,              # L
                    1.0                         # A
                ]

                eps = 1e-12
                lower_bounds = [0.0, L_lower, eps]
                upper_bounds = [np.inf, 2*L_lower, np.inf]

                try:
                    popt, _ = curve_fit(
                        model_free_L,
                        x_fit, y_fit,
                        p0=p0,
                        bounds=(lower_bounds, upper_bounds),
                        maxfev=10000
                    )
                    c_max_fit, L_fit, A_fit = map(float, popt)
                    status = 'success'

                    # Metrics
                    y_pred = model_free_L(x_fit, *popt)
                    resid = y_fit - y_pred
                    ssr = float(np.sum(resid**2))
                    tss = float(np.sum((y_fit - np.mean(y_fit))**2))
                    r2 = 1 - ssr / tss if tss > 0 else np.nan
                    rmse = float(np.sqrt(ssr / y_fit.size)) if y_fit.size > 0 else np.nan

                    # Model values at original bin positions
                    fit_at_bins = model_free_L(xdata_bins, c_max_fit, L_fit, A_fit)

                except Exception as e:
                    c_max_fit = A_fit = L_fit = np.nan
                    r2 = rmse = np.nan
                    status = f'fit_error: {e}'
                    fit_at_bins = np.full_like(xdata_bins, np.nan, dtype=float)

        # store per-fit summary row (NO offset)
        results.append({
            'replicate': rep,
            'pos': pos,
            'bg_offset': (float(BG_VALUE) if np.isfinite(BG_VALUE) else np.nan),
            'bg_method': 'min_bin_in_chamber',
            'c_max': c_max_fit,
            'L': L_fit,
            'A': A_fit,
            'r2': r2,
            'rmse': rmse,
            'status': status
        })

        # per-bin profile rows (intensity_bg_corrected & concentrations can be NaN)
        if bin_size is not None:
            x_phys = xdata_bins * float(bin_size)
        else:
            x_phys = np.full_like(xdata_bins, np.nan, dtype=float)

        for yb, xp, raw_v, bg_corr_v, conc, fy in zip(
            xdata_bins, x_phys, ydata_raw, y_bg_corrected, ydata, fit_at_bins
        ):
            profile_rows.append({
                'replicate': rep,
                'pos': pos,
                'y_bin': float(yb),
                'x_physical': float(xp) if np.isfinite(xp) else np.nan,
                'intensity_raw': (float(raw_v) if np.isfinite(raw_v) else np.nan),
                'intensity_bg_corrected': (float(bg_corr_v) if np.isfinite(bg_corr_v) else np.nan),
                'bg_offset': (float(BG_VALUE) if np.isfinite(BG_VALUE) else np.nan),
                'bg_method': 'min_bin_in_chamber',
                'estimated_hqno_concentration': (float(conc) if np.isfinite(conc) else np.nan),
                'fit_y': (float(fy) if np.isfinite(fy) else np.nan),
                'status': status
            })

        # Optional per-chamber plotting
        if plot_each and xdata_bins.size:
            x_plot = x_phys if bin_size is not None else xdata_bins
            x_label = f"Distance to PA ({'µm' if bin_size and bin_size > 0 else 'bins'})"

            x_dense_bins = np.linspace(np.nanmin(xdata_bins), np.nanmax(xdata_bins), 200)
            y_dense = None
            if status == 'success':
                y_dense = uptake_diffusion_sinh(x_dense_bins, c_max_fit, L_fit, A_fit)
                y_dense = np.clip(y_dense, 0.0, None)  # display-only guard
            x_dense_plot = x_dense_bins * float(bin_size) if bin_size is not None else x_dense_bins

            fig, ax = plt.subplots(figsize=figsize)
            ax.scatter(x_plot, ydata, s=20, label='bg(min bin) → inverse-Hill', zorder=2)
            if y_dense is not None:
                ax.plot(x_dense_plot, y_dense, 'r-', lw=2, label='fit', zorder=3)
            ax.set_xlabel(x_label)
            ax.set_ylabel(f"{intensity_col} → HQNO concentration")
            ttl_suffix = f"  [r²={r2:.2f}, RMSE={rmse:.2f}]" if np.isfinite(r2) else ""
            ax.set_title(f"{rep} — {pos}{ttl_suffix}")
            ax.grid(True, alpha=0.3)
            ax.legend()
            plt.tight_layout()
            plt.savefig(out_dir / f"fit_{rep}_{pos}.png", dpi=200, bbox_inches='tight')
            plt.close(fig)

    # save per-fit summary to CSV
    cols = ['replicate','pos','bg_offset','bg_method','c_max','L','A','r2','rmse','status']
    df_results = pd.DataFrame(results, columns=cols)
    df_results.to_csv(OUT_CSV, index=False)
    print(f"[cut_off={cut_off}] Saved {len(df_results)} fits to {OUT_CSV}")

    # save per-bin concentration profiles to CSV
    profile_cols = ['replicate','pos','y_bin','x_physical','intensity_raw',
                    'intensity_bg_corrected','bg_offset','bg_method','estimated_hqno_concentration',
                    'fit_y','status']
    df_profiles = pd.DataFrame(profile_rows, columns=profile_cols)
    df_profiles.to_csv(OUT_PROFILE_CSV, index=False)
    print(f"[cut_off={cut_off}] Saved {len(df_profiles)} profile rows to {OUT_PROFILE_CSV}")

    # metadata
    meta = {
        'cut_off': cut_off,
        'bg_method': 'min_bin_in_chamber',
        'bin_size': bin_size,
        'L_margin_bins': L_margin_bins,
        'intensity_col': intensity_col,
        'replicate_filter': sorted(replicate_filter) if replicate_filter else None,
        'calibration_json': str(Path(calibration_json)),
        'hqno_csv': str(Path(hqno_csv)),
    }
    with open(out_dir / 'run_meta.json', 'w', encoding='utf-8') as f:
        json.dump(meta, f, indent=2)

    return OUT_CSV  # for QC


# ======================================================
# ================== QC PLOTTING =======================
# ======================================================

def run_qc_plots(fits_csv: Path, out_dir: Path):
    out_dir = ensure_dir(out_dir / 'fit_qc_plots')

    # skip if file missing or empty
    if not Path(fits_csv).exists() or os.path.getsize(fits_csv) == 0:
        print(f"[qc] Skip: {fits_csv} missing or empty")
        return

    try:
        df = pd.read_csv(fits_csv)
    except pd.errors.EmptyDataError:
        print(f"[qc] Skip: {fits_csv} has no columns")
        return

    if df.empty:
        print(f"[qc] Skip: {fits_csv} has headers but no rows")
        return

    # Keep a copy with all rows (for status plot)
    df_all = df.copy()

    # Filter to successful fits for parameter analysis
    df = df[df['status'] == 'success'].copy()
    if df.empty:
        print(f"[qc] Skip plots: no successful fits in {fits_csv}")
        return

    # Coerce numerics
    for col in ['c_max','L','A','r2','rmse']:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Derived logs
    df['log10_A'] = np.log10(df['A'].where(df['A'] > 0))
    df['log10_cmax'] = np.log10(df['c_max'].where(df['c_max'] > 0))

    # ---- Summary table by replicate ----
    summary = (df.groupby('replicate', dropna=True)
                 .agg(
                     n=('pos','count'),
                     cmax_med=('c_max','median'),
                     cmax_iqr=('c_max', lambda s: np.subtract(*np.percentile(s.dropna(), [75,25]))),
                     A_med=('A','median'),
                     A_iqr=('A', lambda s: np.subtract(*np.percentile(s.dropna(), [75,25]))),
                     L_med=('L','median'),
                     r2_med=('r2','median'),
                     rmse_med=('rmse','median')
                 )
                 .reset_index())
    summary.to_csv(out_dir/'fit_summary_by_replicate.csv', index=False)

    # ---- 20/50/80 percentiles by replicate ----
    def _p(series, q):
        s = pd.to_numeric(series, errors='coerce').dropna()
        return np.nan if s.empty else np.percentile(s, q)

    percentiles = (df.groupby('replicate', dropna=True)
                     .apply(lambda g: pd.Series({
                         'c_max_p20': _p(g['c_max'], 20), 'c_max_med': _p(g['c_max'], 50), 'c_max_p80': _p(g['c_max'], 80),
                         'A_p20':     _p(g['A'], 20),     'A_med':     _p(g['A'], 50),     'A_p80':     _p(g['A'], 80),
                         'L_p20':     _p(g['L'], 20),     'L_med':     _p(g['L'], 50),     'L_p80':     _p(g['L'], 80),
                     }))
                     .reset_index())
    percentiles.to_csv(out_dir/'percentiles_20_50_80_by_replicate.csv', index=False)

    # ---- Plots ----
    sns.set(style='whitegrid', context='talk')
    palette = sns.color_palette('tab10', n_colors=max(1, df['replicate'].nunique()))

    # 1) c_max by replicate
    plt.figure(figsize=(10,6))
    sns.boxplot(data=df, x='replicate', y='c_max', palette=palette, showfliers=False)
    sns.stripplot(data=df, x='replicate', y='c_max', color='k', size=4, alpha=0.6, jitter=0.25)
    plt.title('HQNO: c_max by replicate')
    plt.tight_layout()
    plt.savefig(out_dir/'cmax_by_replicate.png', dpi=200)
    plt.close()

    # 2) A by replicate (log scale)
    plt.figure(figsize=(10,6))
    sns.boxplot(data=df, x='replicate', y='log10_A', palette=palette, showfliers=False)
    sns.stripplot(data=df, x='replicate', y='log10_A', color='k', size=4, alpha=0.6, jitter=0.25)
    plt.ylabel('log10(A)')
    plt.title('HQNO: A (log-scale) by replicate')
    plt.tight_layout()
    plt.savefig(out_dir/'A_log_by_replicate.png', dpi=200)
    plt.close()

    # 3) L by replicate
    plt.figure(figsize=(10,6))
    sns.boxplot(data=df, x='replicate', y='L', palette=palette, showfliers=False)
    sns.stripplot(data=df, x='replicate', y='L', color='k', size=4, alpha=0.6, jitter=0.25)
    plt.title('HQNO: L by replicate')
    plt.tight_layout()
    plt.savefig(out_dir/'L_by_replicate.png', dpi=200)
    plt.close()

    # 4) Fit quality: r2 and rmse
    fig, axes = plt.subplots(1,2, figsize=(14,6))
    sns.boxplot(data=df, x='replicate', y='r2', ax=axes[0], palette=palette, showfliers=False)
    sns.stripplot(data=df, x='replicate', y='r2', ax=axes[0], color='k', size=4, alpha=0.6, jitter=0.25)
    axes[0].set_title('HQNO: R² by replicate')
    sns.boxplot(data=df, x='replicate', y='rmse', ax=axes[1], palette=palette, showfliers=False)
    sns.stripplot(data=df, x='replicate', y='rmse', ax=axes[1], color='k', size=4, alpha=0.6, jitter=0.25)
    axes[1].set_title('HQNO: RMSE by replicate')
    plt.tight_layout()
    plt.savefig(out_dir/'fit_quality_by_replicate.png', dpi=200)
    plt.close(fig)

    # 5) Status counts (QC)
    plt.figure(figsize=(8,5))
    status_counts = (df_all
                     .groupby(['replicate','status'])
                     .size()
                     .reset_index(name='count'))
    sns.barplot(data=status_counts, x='replicate', y='count', hue='status', palette='Set2')
    plt.title('HQNO: Fit status counts by replicate')
    plt.tight_layout()
    plt.savefig(out_dir/'status_counts_by_replicate.png', dpi=200)
    plt.close()

    # 6) c_max vs log10(A) with size ~ RMSE (use Seaborn size mapping)
    plt.figure(figsize=(8,6))
    plot_df = df.dropna(subset=['c_max','log10_A','rmse']).copy()
    sns.scatterplot(
        data=plot_df,
        x='c_max', y='log10_A',
        hue='replicate',
        size='rmse',
        sizes=(20, 200),
        palette=palette,
        alpha=0.8,
        edgecolor='white',
        linewidth=0.5,
        legend='full'
    )
    plt.title('HQNO: c_max vs log10(A) (point size ~ RMSE)')
    plt.tight_layout()
    plt.savefig(out_dir/'cmax_vs_logA_scatter.png', dpi=200)
    plt.close()

# ======================================================
# =========== CROSS-CUTOFF SUMMARY & PLOTS ============
# ======================================================

def build_cross_cutoff_summary(out_root: Path, sweep_cutoffs: list[int]):
    """
    Aggregates all per-run 3C_HQNO_concentration.csv files,
    computes summaries by cut_off, and makes comparison plots.
    """
    records = []
    status_records = []

    for c in sweep_cutoffs:
        run_dir = out_root / f"cutoff_{c}"
        csv_path = run_dir / '3C_HQNO_concentration.csv'
        if not csv_path.exists() or os.path.getsize(csv_path) == 0:
            print(f"[warn] Missing/empty results for cut_off={c}: {csv_path}")
            continue
        try:
            df = pd.read_csv(csv_path)
        except pd.errors.EmptyDataError:
            print(f"[warn] No columns to parse for cut_off={c}: {csv_path}")
            continue

        df['cut_off'] = c

        # status counts
        sc = (df.groupby('status').size().reset_index(name='count'))
        sc['cut_off'] = c
        status_records.append(sc)

        # only success for param stats
        dfg = df[df['status'] == 'success'].copy()
        for col in ['c_max','L','A','r2','rmse']:
            dfg[col] = pd.to_numeric(dfg[col], errors='coerce')

        if not dfg.empty:
            rec = {
                'cut_off': c,
                'n_success': int(dfg.shape[0]),
                'c_max_med': float(dfg['c_max'].median()),
                'c_max_iqr': float(np.subtract(*np.percentile(dfg['c_max'].dropna(), [75,25]))) if dfg['c_max'].notna().any() else np.nan,
                'A_med': float(dfg['A'].median()),
                'A_iqr': float(np.subtract(*np.percentile(dfg['A'].dropna(), [75,25]))) if dfg['A'].notna().any() else np.nan,
                'L_med': float(dfg['L'].median()),
                'L_iqr': float(np.subtract(*np.percentile(dfg['L'].dropna(), [75,25]))) if dfg['L'].notna().any() else np.nan,
                'r2_med': float(dfg['r2'].median()),
                'rmse_med': float(dfg['rmse'].median()),
            }
        else:
            rec = {'cut_off': c, 'n_success': 0,
                   'c_max_med': np.nan, 'c_max_iqr': np.nan,
                   'A_med': np.nan, 'A_iqr': np.nan,
                   'L_med': np.nan, 'L_iqr': np.nan,
                   'r2_med': np.nan, 'rmse_med': np.nan}
        records.append(rec)

    summary_dir = ensure_dir(out_root / 'sweep_summary')
    if not records:
        print('[warn] No records to summarize.')
        return

    df_sum = pd.DataFrame.from_records(records).sort_values('cut_off')
    df_sum.to_csv(summary_dir / 'summary_by_cutoff.csv', index=False)

    if status_records:
        df_status = pd.concat(status_records, ignore_index=True)
        df_status.to_csv(summary_dir / 'status_counts_by_cutoff.csv', index=False)
    else:
        df_status = pd.DataFrame(columns=['status','count','cut_off'])

    # ---- Comparison plots ----
    sns.set(style='whitegrid', context='talk')

    # 1) Success counts vs cut_off
    plt.figure(figsize=(8,5))
    plt.plot(df_sum['cut_off'], df_sum['n_success'], marker='o')
    plt.title('HQNO: Number of successful fits vs cut_off')
    plt.xlabel('cut_off (bins dropped at start)')
    plt.ylabel('n_success')
    plt.tight_layout()
    plt.savefig(summary_dir / 'n_success_vs_cutoff.png', dpi=200)
    plt.close()

    # 2) Medians with IQR ribbons for key params
    def line_with_iqr(metric_med, metric_iqr, ylabel, fname):
        plt.figure(figsize=(8,5))
        x = df_sum['cut_off'].to_numpy()
        y = df_sum[metric_med].to_numpy(dtype=float)
        iqr = df_sum[metric_iqr].to_numpy(dtype=float)
        ylo = y - 0.5*iqr
        yhi = y + 0.5*iqr
        plt.plot(x, y, marker='o', label=f"{metric_med.replace('_',' ')}")
        plt.fill_between(x, ylo, yhi, alpha=0.2, label='± IQR/2')
        plt.xlabel('cut_off')
        plt.ylabel(ylabel)
        plt.title(f'HQNO: {ylabel} vs cut_off (median ± half-IQR)')
        plt.tight_layout()
        plt.savefig(summary_dir / fname, dpi=200)
        plt.close()

    line_with_iqr('c_max_med','c_max_iqr', 'c_max', 'cmax_vs_cutoff.png')
    line_with_iqr('A_med','A_iqr', 'A', 'A_vs_cutoff.png')
    line_with_iqr('L_med','L_iqr', 'L', 'L_vs_cutoff.png')

    # 3) r2 and rmse medians vs cut_off
    plt.figure(figsize=(8,5))
    plt.plot(df_sum['cut_off'], df_sum['r2_med'], marker='o')
    plt.xlabel('cut_off')
    plt.ylabel('median R²')
    plt.title('HQNO: Fit quality (R² median) vs cut_off')
    plt.tight_layout()
    plt.savefig(summary_dir / 'r2_median_vs_cutoff.png', dpi=200)
    plt.close()

    plt.figure(figsize=(8,5))
    plt.plot(df_sum['cut_off'], df_sum['rmse_med'], marker='o')
    plt.xlabel('cut_off')
    plt.ylabel('median RMSE')
    plt.title('HQNO: Fit quality (RMSE median) vs cut_off')
    plt.tight_layout()
    plt.savefig(summary_dir / 'rmse_median_vs_cutoff.png', dpi=200)
    plt.close()

    # 4) Stacked status bars by cut_off
    if not df_status.empty:
        pivot = df_status.pivot_table(index='cut_off', columns='status', values='count', aggfunc='sum').fillna(0)
        pivot = pivot.sort_index()
        pivot.plot(kind='bar', stacked=True, figsize=(12,6))
        plt.title('HQNO: Status counts by cut_off')
        plt.xlabel('cut_off')
        plt.ylabel('count')
        plt.tight_layout()
        plt.savefig(summary_dir / 'status_stacked_by_cutoff.png', dpi=200)
        plt.close()


# ======================================================
# ====================== MAIN ==========================
# ======================================================
if __name__ == '__main__':
    # Run sweep
    for c in SWEEP_CUTOFFS:
        run_dir = OUT_ROOT / f'cutoff_{c}'
        fits_csv = run_hqno_fit_for_cutoff(
            cut_off=c,
            hqno_csv=HQNO_CSV,
            calibration_json=JSON_CAL,
            out_dir=run_dir,
            intensity_col=intensity_col,
            bin_size=bin_size,
            figsize=figsize,
            plot_each=plot_each,
            L_margin_bins=L_margin_bins,
            replicate_filter=REPLICATE_FILTER,
        )
        if Path(fits_csv).exists() and os.path.getsize(fits_csv) > 0:
            run_qc_plots(fits_csv=fits_csv, out_dir=run_dir)
        else:
            print(f"[main] Skipping QC for cut_off={c}: {fits_csv} missing/empty")

    # Cross-cutoff comparison
    build_cross_cutoff_summary(OUT_ROOT, SWEEP_CUTOFFS)
    print(f"\nAll done. Replicate filter: {sorted(REPLICATE_FILTER) if REPLICATE_FILTER else 'None'}. Results in: {OUT_ROOT}")


[cut_off=0] Rows kept: 974/2083 (replicate filter = [4, 5, 7])
[cut_off=0] Saved 33 fits to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_0\3C_HQNO_concentration.csv
[cut_off=0] Saved 974 profile rows to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_0\3C_HQNO_concentration_profiles.csv


  .apply(lambda g: pd.Series({

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='c_max', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='log10_A', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='L', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='r2', ax=ax

[cut_off=1] Rows kept: 974/2083 (replicate filter = [4, 5, 7])
[cut_off=1] Saved 33 fits to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_1\3C_HQNO_concentration.csv
[cut_off=1] Saved 974 profile rows to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_1\3C_HQNO_concentration_profiles.csv


  .apply(lambda g: pd.Series({

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='c_max', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='log10_A', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='L', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='r2', ax=ax

[cut_off=2] Rows kept: 974/2083 (replicate filter = [4, 5, 7])
[cut_off=2] Saved 33 fits to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_2\3C_HQNO_concentration.csv
[cut_off=2] Saved 974 profile rows to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_2\3C_HQNO_concentration_profiles.csv


  .apply(lambda g: pd.Series({

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='c_max', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='log10_A', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='L', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='r2', ax=ax

[cut_off=3] Rows kept: 974/2083 (replicate filter = [4, 5, 7])
[cut_off=3] Saved 33 fits to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_3\3C_HQNO_concentration.csv
[cut_off=3] Saved 974 profile rows to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_3\3C_HQNO_concentration_profiles.csv


  .apply(lambda g: pd.Series({

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='c_max', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='log10_A', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='L', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='r2', ax=ax

[cut_off=4] Rows kept: 974/2083 (replicate filter = [4, 5, 7])
[cut_off=4] Saved 33 fits to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_4\3C_HQNO_concentration.csv
[cut_off=4] Saved 974 profile rows to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_4\3C_HQNO_concentration_profiles.csv


  .apply(lambda g: pd.Series({

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='c_max', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='log10_A', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='L', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='r2', ax=ax

[cut_off=5] Rows kept: 974/2083 (replicate filter = [4, 5, 7])
[cut_off=5] Saved 33 fits to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_5\3C_HQNO_concentration.csv
[cut_off=5] Saved 974 profile rows to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_5\3C_HQNO_concentration_profiles.csv


  .apply(lambda g: pd.Series({

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='c_max', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='log10_A', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='L', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='r2', ax=ax

[cut_off=6] Rows kept: 974/2083 (replicate filter = [4, 5, 7])
[cut_off=6] Saved 33 fits to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_6\3C_HQNO_concentration.csv
[cut_off=6] Saved 974 profile rows to C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep\cutoff_6\3C_HQNO_concentration_profiles.csv


  .apply(lambda g: pd.Series({

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='c_max', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='log10_A', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='L', palette=palette, showfliers=False)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df, x='replicate', y='r2', ax=ax


All done. Replicate filter: [4, 5, 7]. Results in: C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3C_hqno_concentration\hqno_cutoff_sweep
