## Celerite2 fit with Auxtel data

- author : Sylvie Dagoret-Campagne
- creation date : 2025-11-27
- last update : 2025-11-30
- on my Mac : use kernel gp_celerite2_py312

https://celerite2.readthedocs.io/en/latest/

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from celerite2 import terms, GaussianProcess
import scipy.optimize as op
from scipy.optimize import minimize

In [None]:
filename_auxtel_data = "data/pwv/auxtel_PWV_data.csv"

In [None]:
# ================================================================
# 1️⃣ Load Auxtel CSV
# ================================================================

In [None]:
def load_auxtel(path_csv):
    df = pd.read_csv(path_csv)
    df["Time"] = pd.to_datetime(df["Time"], utc=True)
    t0 = df["Time"].min()
    t_days = (df["Time"] - t0).dt.total_seconds().values / 86400.0
    y = df["PWV [mm]_x"].values
    yerr = df["PWV [mm]_err_x"].values
    idx = np.argsort(t_days)
    return t_days[idx], y[idx], yerr[idx], t0

In [None]:
# ================================================================
# 2️⃣ Build multi-periodic kernel
# ================================================================

In [None]:
def build_kernel():
    periods = [365.25, 182.625, 91.3125, 1.0, 0.5]  # jours
    amplitudes = [3.0, 2.0, 1.5, 0.5, 0.3]

    terms_list = []
    for P, s in zip(periods, amplitudes):
        w0 = 2*np.pi / P
        Q  = 1.0 / np.sqrt(2)
        S0 = s**2
        terms_list.append(
            terms.SHOTerm(S0=S0, Q=Q, w0=w0)
        )

    # Drift basse fréquence
    drift = terms.SHOTerm(S0=5.0**2, Q=0.5, w0=2*np.pi/2000.0)

    # TermSum avec unpacking
    #return terms.TermSum(*terms_list, drift)
    kernel = terms.TermSum(*terms_list) + drift
    return kernel

In [None]:
# ================================================================
# 3️⃣ Run GP fit
# ================================================================

In [None]:
def run_auxtel_gp(path_csv, jitter=0.05):
    t, y, yerr, t0 = load_auxtel(path_csv)
    print(f"Loaded {len(t)} Auxtel PWV points")

    # ------------------------------
    # Build kernel et GP
    # ------------------------------
    kernel = build_kernel()
    gp = GaussianProcess(kernel)
    gp.compute(t, diag=yerr**2 + jitter**2)  # jitter blanc ajouté
    print("Initial log-likelihood:", gp.log_likelihood(y))

    # ------------------------------
    # Préparer vecteur initial et bounds pour l'optimisation
    # ------------------------------
    x0 = []
    bounds = []
    for term in kernel.terms:
        if isinstance(term, terms.SHOTerm):
            x0.extend([term.S0, term.Q, term.w0])
            bounds.extend([
                (1e-3, 50.0),               # S0
                (0.01, 10.0),               # Q
                (2*np.pi/2000, 2*np.pi/0.1) # w0
            ])

    # ------------------------------
    # Fonction de log-likelihood négative
    # ------------------------------
    def neg_log_like(x):
        i = 0
        for term in kernel.terms:
            if isinstance(term, terms.SHOTerm):
                term.S0 = x[i]; i += 1
                term.Q  = x[i]; i += 1
                term.w0 = x[i]; i += 1
        gp.compute(t, diag=yerr**2 + jitter**2)
        return -gp.log_likelihood(y)

    # ------------------------------
    # Optimisation
    # ------------------------------
    res = minimize(neg_log_like, x0, method="L-BFGS-B", bounds=bounds)
    print("Optimization success:", res.success)
    print("Final negative log-likelihood:", res.fun)

    # ------------------------------
    # Appliquer les paramètres optimisés
    # ------------------------------
    i = 0
    for term in kernel.terms:
        if isinstance(term, terms.SHOTerm):
            term.S0 = res.x[i]; i += 1
            term.Q  = res.x[i]; i += 1
            term.w0 = res.x[i]; i += 1

    # ------------------------------
    # Prédiction
    # ------------------------------
    t_pred = np.linspace(t.min(), t.max(), 2000)
    #cond = gp.predict(y, t_pred,return_var = True)  # renvoie un ConditionalDistribution
    mu, variance = gp.predict(y, t=t_pred, return_var=True)
    #mu = cond.mean
    #var = cond.variance
    std = np.sqrt(variance)
   
    # ------------------------------
    # Plot
    # ------------------------------
    plt.figure(figsize=(14,5))
    plt.errorbar(t, y, yerr, fmt=".k", alpha=0.3, label="Auxtel PWV")
    plt.plot(t_pred, mu, lw=2, label="GP mean")
    plt.fill_between(t_pred, mu-std, mu+std, alpha=0.3)
    plt.xlabel(f"Days since {t0.date()}")
    plt.ylabel("PWV [mm]")
    plt.title("celerite2 GP fit — Auxtel PWV")
    plt.legend()
    plt.tight_layout()
    plt.show()

    return kernel, t, y, yerr, t_pred, mu, std

In [None]:
def run_auxtel_gp(path_csv, jitter=0.001):
    t, y, yerr, t0 = load_auxtel(path_csv)
    print(f"Loaded {len(t)} Auxtel PWV points")

    # ------------------------------
    # Build kernel et GP
    # ------------------------------
    kernel = build_kernel()
    gp = GaussianProcess(kernel)
    gp.compute(t, diag=yerr**2 + jitter**2)
    print("Initial log-likelihood:", gp.log_likelihood(y))

    # ------------------------------
    # Préparer vecteur initial et bounds
    # ------------------------------
    x0 = []
    bounds = []
    for term in kernel.terms:
        if isinstance(term, terms.SHOTerm):
            x0.extend([term.S0, term.Q, term.w0])
            bounds.extend([
                (1e-3, 50.0),        # S0
                (0.01, 10.0),        # Q
                (2*np.pi/2000, 2*np.pi/0.1)  # w0
            ])

    # ------------------------------
    # Fonction de log-likelihood négative
    # ------------------------------
    def neg_log_like(x):
        i = 0
        for term in kernel.terms:
            if isinstance(term, terms.SHOTerm):
                term.S0 = x[i]; i += 1
                term.Q  = x[i]; i += 1
                term.w0 = x[i]; i += 1
        gp.compute(t, diag=yerr**2 + jitter**2)
        return -gp.log_likelihood(y)

    # ------------------------------
    # Optimisation
    # ------------------------------
    res = minimize(neg_log_like, x0, method="L-BFGS-B", bounds=bounds)
    print("Optimization success:", res.success)
    print("Final negative log-likelihood:", res.fun)

    # ------------------------------
    # Appliquer les paramètres optimisés
    # ------------------------------
    i = 0
    for term in kernel.terms:
        if isinstance(term, terms.SHOTerm):
            term.S0 = res.x[i]; i += 1
            term.Q  = res.x[i]; i += 1
            term.w0 = res.x[i]; i += 1

    # ------------------------------
    # Prédiction
    # ------------------------------
    t_pred = np.linspace(t.min(), t.max(), 2000)
    gp.compute(t, diag=yerr**2 + jitter**2)  # compute sur les points observés
    mu, variance = gp.predict(y, t=t_pred, return_var=True)
    std = np.sqrt(variance)

    # ------------------------------
    # Clip PWV entre 0 et 20
    # ------------------------------
    mu = np.clip(mu, 0.0, 20.0)
    std = np.clip(std, 0.0, 20.0)

    # ------------------------------
    # Plot
    # ------------------------------
    plt.figure(figsize=(14,5))
    plt.errorbar(t, y, yerr, fmt=".k", alpha=0.3, label="Auxtel PWV")
    plt.plot(t_pred, mu, lw=2, label="GP mean")
    plt.fill_between(t_pred, mu-std, mu+std, alpha=0.3)
    plt.xlabel(f"Days since {t0.date()}")
    plt.ylabel("PWV [mm]")
    plt.title("celerite2 GP fit — Auxtel PWV")
    plt.legend()
    plt.tight_layout()
    plt.show()

    return kernel, t, y, yerr, t_pred, mu, std


In [None]:
# ================================================================
# 4️⃣ Run script
# ================================================================

In [None]:
#if __name__ == "__main__":
    # Remplacez par votre fichier CSV
#    filename = "auxtel_full_pwv.csv"
#    run_auxtel_gp(filename)

In [None]:
run_auxtel_gp(filename_auxtel_data)