In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import curve_fit
import json
import re

# =========================
# Config
# =========================
# Per-bin (y_bin) HQNO data for fitting
HQNO_CSV    = r'C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3A_hqno_gfp\3A_HQNO_gfp_gradient_visualization.csv'

# Hill calibration JSON (HQNO calibration)
JSON        = r'C:\Users\raachben\Documents\PhD\projets\Guilia\Paper\Analysis\Figures\Figures3\3B_hqno_calibration\3B_hqno_calibration.json'

# Outputs
OUTPUT_CSV           = r'3C_HQNO_concentration.csv'
OUTPUT_PROFILE_CSV   = r'3C_HQNO_concentration_profiles.csv'
OUTPUT_BGCORR_CSV    = r'3C_HQNO_bg_corrected_intensities.csv'   # tidy per-bin, bg-corrected intensities
OUTPUT_BG_CHAMBER    = r'3C_HQNO_bg_minbin_by_chamber.csv'        # per-chamber bg log (RHL-style)

# User-tunable analysis options
intensity_col  = 'median_intensity'  # column in HQNO_CSV to background-correct
cut_off        = 3                   # drop this many leading y_bins
plot_each      = False
bin_size       = 50 / 30             # µm per bin (for plotting/export convenience)
figsize        = (5, 4)
L_margin_bins  = 1.0                 # L must be >= last fitted bin + this margin

# =========================
# Load calibration
# =========================
with open(JSON, "r") as f:
    cal = json.load(f)
calibration_a  = cal["params"]["a"]
calibration_c  = cal["params"]["c"]
calibration_b  = cal["params"]["b"]

# =========================
# Helpers
# =========================

def uptake_diffusion_sinh(x, c_max, L, A):
    """
    Numerically stable uptake–diffusion (no offset):
    c(x) = c_max * sinh(sqrt(A)*(L - x)) / sinh(sqrt(A)*L)
    """
    sA = np.sqrt(A)
    return c_max * np.sinh(sA * (L - x)) / np.sinh(sA * L)


def hill_inverse(y, a, c, b):
    """Inverse Hill: c * (y / (a - y))**(1/b)"""
    return c * (y / (a - y))**(1/b)


def _normalize_replicate_key(rep_val) -> str:
    """
    Normalize replicate labels like 1, '1', 'replicate_1', 'Rep1' → 'replicate_1'.
    (Kept for consistency; not used for background anymore.)
    """
    if pd.isna(rep_val):
        raise ValueError("Replicate is NaN/None.")
    s = str(rep_val).strip().lower()
    m = re.search(r'(\d+)', s)
    if m:
        return f"replicate_{int(m.group(1))}"
    return s

# =========================
# Load main per-bin data & fit (RHL-style per-chamber min-bin background)
# =========================
outer = pd.read_csv(HQNO_CSV)

needed_cols = {'replicate', 'pos', 'y_bin', intensity_col}
missing = needed_cols - set(outer.columns)
if missing:
    raise RuntimeError(f"HQNO_CSV missing required columns: {sorted(missing)}")

combos = outer[['replicate', 'pos']].drop_duplicates().reset_index(drop=True)

results = []
profile_rows = []
bgcorr_rows = []
bg_log_rows = []

for _, row in combos.iterrows():
    rep_raw, pos = row['replicate'], row['pos']

    df_o = outer[(outer['replicate'] == rep_raw) & (outer['pos'] == pos)].copy()
    xdata_bins = df_o['y_bin'].to_numpy(dtype=float)
    ydata_raw  = df_o[intensity_col].to_numpy(dtype=float)

    # --- RHL-style: chamber-specific background = minimum finite bin intensity in this chamber ---
    valid_raw = np.isfinite(ydata_raw)
    if not np.any(valid_raw):
        BG_VALUE = np.nan
    else:
        BG_VALUE = float(np.nanmin(ydata_raw[valid_raw]))

    # subtract; clip negatives → 0
    y_bg_corrected = ydata_raw - BG_VALUE
    y_bg_corrected = np.where(np.isfinite(y_bg_corrected), np.maximum(y_bg_corrected, 0.0), np.nan)

    # --- Convert intensity → concentration via inverse Hill (guard y < a) ---
    eps = 1e-12
    y_for_inv = np.minimum(y_bg_corrected, calibration_a - eps)
    ydata = hill_inverse(y_for_inv, calibration_a, calibration_c, calibration_b)

    # Trim by cut_off for fitting
    if isinstance(cut_off, (int, np.integer)) and cut_off >= 0:
        xfit_bins = xdata_bins[cut_off:]
        yfit_all  = ydata[cut_off:]
    else:
        raise ValueError("cut_off must be a non-negative integer")

    # --- Fit with bounds: L beyond data, c_max≥0, A>0 (NO offset) ---
    if xfit_bins.size == 0:
        c_max_fit = A_fit = L_fit = np.nan
        r2 = rmse = np.nan
        status = 'no_data'
        fit_at_bins = np.full_like(xdata_bins, np.nan, dtype=float)
    else:
        last_bin_fit = float(np.max(xfit_bins))
        L_lower = last_bin_fit + max(1.0, float(L_margin_bins))

        mask_fit = np.isfinite(yfit_all)
        x_fit = xfit_bins[mask_fit]
        y_fit = yfit_all[mask_fit]

        if y_fit.size < 3:
            c_max_fit = A_fit = L_fit = np.nan
            r2 = rmse = np.nan
            status = 'too_few_points'
            fit_at_bins = np.full_like(xdata_bins, np.nan, dtype=float)
        else:
            def model_free_L(x, c_max, L, A):
                return uptake_diffusion_sinh(x, c_max, L, A)

            yfit_max = float(np.max(y_fit))
            p0 = [max(1e-9, 3.0 * yfit_max),  # c_max
                  L_lower + 1.0,              # L initial
                  1.0]                        # A
            lb_eps = 1e-12
            lower_bounds = [0.0, L_lower, lb_eps]
            upper_bounds = [np.inf, 2*L_lower, np.inf]

            try:
                popt, _ = curve_fit(
                    model_free_L, x_fit, y_fit,
                    p0=p0, bounds=(lower_bounds, upper_bounds),
                    maxfev=10000
                )
                c_max_fit, L_fit, A_fit = map(float, popt)
                status = 'success'

                y_pred = model_free_L(x_fit, *popt)
                resid = y_fit - y_pred
                ssr = float(np.sum(resid**2))
                tss = float(np.sum((y_fit - np.mean(y_fit))**2))
                r2 = 1 - ssr / tss if tss > 0 else np.nan
                rmse = float(np.sqrt(ssr / y_fit.size)) if y_fit.size > 0 else np.nan

                fit_at_bins = model_free_L(xdata_bins, c_max_fit, L_fit, A_fit)

            except Exception as e:
                c_max_fit = A_fit = L_fit = np.nan
                r2 = rmse = np.nan
                status = f'fit_error: {e}'
                fit_at_bins = np.full_like(xdata_bins, np.nan, dtype=float)

    # Per-fit summary
    results.append({
        'replicate': rep_raw,
        'pos': pos,
        'bg_offset': (float(BG_VALUE) if np.isfinite(BG_VALUE) else np.nan),
        'bg_method': 'min_bin_in_chamber',
        'c_max': c_max_fit,
        'L': L_fit,
        'A': A_fit,
        'r2': r2,
        'rmse': rmse,
        'status': status
    })

    # Per-bin profile rows (+ tidy bg-corrected intensities)
    if bin_size is not None:
        x_phys = xdata_bins * float(bin_size)
    else:
        x_phys = np.full_like(xdata_bins, np.nan, dtype=float)

    for yb, xp, raw_v, bg_corr_v, conc, fy in zip(
        xdata_bins, x_phys, ydata_raw, y_bg_corrected, ydata, fit_at_bins
    ):
        profile_rows.append({
            'replicate': rep_raw,
            'pos': pos,
            'y_bin': float(yb),
            'x_physical': float(xp) if np.isfinite(xp) else np.nan,
            'intensity_raw': (float(raw_v) if np.isfinite(raw_v) else np.nan),
            'intensity_bg_corrected': (float(bg_corr_v) if np.isfinite(bg_corr_v) else np.nan),
            'bg_offset': (float(BG_VALUE) if np.isfinite(BG_VALUE) else np.nan),
            'bg_method': 'min_bin_in_chamber',
            'estimated_hqno_concentration': (float(conc) if np.isfinite(conc) else np.nan),
            'fit_y': (float(fy) if np.isfinite(fy) else np.nan),
            'status': status
        })

        bgcorr_rows.append({
            'replicate': rep_raw,
            'pos': pos,
            'y_bin': float(yb),
            'x_physical': float(xp) if np.isfinite(xp) else np.nan,
            'intensity_bg_corrected': (float(bg_corr_v) if np.isfinite(bg_corr_v) else np.nan),
            'bg_offset': (float(BG_VALUE) if np.isfinite(BG_VALUE) else np.nan),
            'bg_method': 'min_bin_in_chamber'
        })

    # Log per-chamber background
    bg_log_rows.append({
        'replicate': rep_raw,
        'pos': pos,
        'bg_offset_min_bin': (float(BG_VALUE) if np.isfinite(BG_VALUE) else np.nan),
        'n_bins': int(valid_raw.sum()),
        'bg_method': 'min_bin_in_chamber'
    })

    # Optional per-chamber plotting
    if plot_each and xdata_bins.size:
        if bin_size is not None:
            x_plot = xdata_bins * float(bin_size)
            x_label = f"Distance to PA ({'µm' if bin_size and bin_size > 0 else 'units'})"
        else:
            x_plot = xdata_bins
            x_label = "Distance to PA (bins)"

        x_dense_bins = np.linspace(np.nanmin(xdata_bins), np.nanmax(xdata_bins), 200)
        y_dense = None
        if np.isfinite(c_max_fit) and np.isfinite(L_fit) and np.isfinite(A_fit):
            y_dense = uptake_diffusion_sinh(x_dense_bins, c_max_fit, L_fit, A_fit)
            y_dense = np.clip(y_dense, 0.0, None)
        x_dense_plot = x_dense_bins * float(bin_size) if bin_size is not None else x_dense_bins

        fig, ax = plt.subplots(figsize=figsize)
        ax.scatter(x_plot, ydata, s=20, label='bg(min bin) → concentration', zorder=2)
        if y_dense is not None:
            ax.plot(x_dense_plot, y_dense, 'r-', lw=2, label='fit', zorder=3)
        ax.set_xlabel(x_label)
        ax.set_ylabel(f"{intensity_col} (bg-corrected → inverse-Hill)")
        ttl_suffix = f"  [r²={r2:.2f}, RMSE={rmse:.2f}]" if np.isfinite(r2) else ""
        ax.set_title(f"{rep_raw} — {pos}{ttl_suffix}")
        ax.grid(True, alpha=0.3)
        ax.legend()
        plt.tight_layout()

# Save outputs
pd.DataFrame(results).to_csv(OUTPUT_CSV, index=False)
print(f"Saved {len(results)} fits (per-chamber min-bin HQNO backgrounds) to {OUTPUT_CSV}")

pd.DataFrame(profile_rows).to_csv(OUTPUT_PROFILE_CSV, index=False)
print(f"Saved {len(profile_rows)} profile rows to {OUTPUT_PROFILE_CSV}")

pd.DataFrame(bgcorr_rows).to_csv(OUTPUT_BGCORR_CSV, index=False)
print(f"Saved {len(bgcorr_rows)} bg-corrected intensity rows to {OUTPUT_BGCORR_CSV}")

pd.DataFrame(bg_log_rows).to_csv(OUTPUT_BG_CHAMBER, index=False)
print(f"Saved per-chamber backgrounds (min bin) to {OUTPUT_BG_CHAMBER}")


Saved 71 fits (per-chamber min-bin HQNO backgrounds) to 3C_HQNO_concentration.csv
Saved 2092 profile rows to 3C_HQNO_concentration_profiles.csv
Saved 2092 bg-corrected intensity rows to 3C_HQNO_bg_corrected_intensities.csv
Saved per-chamber backgrounds (min bin) to 3C_HQNO_bg_minbin_by_chamber.csv


: 