# 0. Read and prepare data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from sklearn.linear_model import LinearRegression
from scipy.optimize import curve_fit

df = pd.read_csv('synthetic_fit_results.csv')
df["Lbar"] = (df["MeanMin"] + df["MeanMax"])/2
df["dL"]   = (df["DiffMin"] + df["DiffMax"])/2
dt = 16667
df['kdL']  = df['dL'] / dt
w = df["Count"].astype(float)                # number of data
df



# 1. Regression for each $\bar{L}$ (1st Regression)

In [None]:
records = []
for (p, Lbar_bin), df_bin in df.groupby(['P', 'Lbar']):
    if len(df_bin) < 5:          # skip if less than 5 data
        continue

    df_fit = df_bin
    if df_fit.empty:
        continue

    lin1 = LinearRegression()
    lin1.fit(df_fit[['kdL']], df_fit['Mu'],
             sample_weight=df_fit['Count'])

    a, b = lin1.coef_[0], lin1.intercept_
    records.append([p, Lbar_bin, a, b, df_fit['Count'].sum()])
ab = pd.DataFrame(records, columns=["P","Lbar","a","b","Count"])
print(ab)

In [None]:
plt.figure(figsize=(10, 6))

# Set colors and markers
colors = {1: 'red', 0: 'blue', -1: 'green'}
markers = {1: 'o', 0: 's', -1: '^'}

# Draw scatter plot
for p in ab['P'].unique():
    mask = ab['P'] == p
    plt.scatter(ab[mask]['Lbar'], 
                1/ab[mask]['a'],
                c=colors[p],
                marker=markers[p],
                s=ab[mask]['Count']/ab['Count'].max()*300,  # Adjust point size based on number of data
                alpha=0.6,
                label=f'P={p}')

plt.xlabel(r'$\bar{L}$')
plt.ylabel('a')
plt.title('Lbar vs a by Polarity')
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots()
color_map = {1: 'red', 0: 'blue', -1: 'blue'}
Lbar_save=0

for Lbar_bin, df_bin in df.groupby("Lbar"):
    if len(df_bin) < 5:
        continue
    # retrieve slope and intercept from ab DataFrame
    a = ab.loc[ab["Lbar"] == Lbar_bin, "a"].values[0]
    b = ab.loc[ab["Lbar"] == Lbar_bin, "b"].values[0]
    
    # plot the raw data points
    for p, df_p in df_bin.groupby("P"):
        color = color_map.get(p, "gray") 
        ax.scatter(df_p["kdL"], df_p["Mu"], alpha=0.5)

    
    # compute and plot the regression line
    x_vals = np.linspace(df_bin["kdL"].min(), df_bin["kdL"].max(), 100)
    y_vals = a * x_vals + b
    ax.plot(x_vals, y_vals, label=f"Lbar={Lbar_bin}")


ax.set_xlabel("kdL")
ax.set_ylabel("Mu")
ax.set_title("Linear Regression Fits by Lbar Bin")
ax.legend(loc='upper left')
plt.show()


# 2. Use $a^{-1}$ to fit $k_1$,$k_2$ (2nd Regression)

In [None]:
fit2_result = {}
for p, df_pol in ab.groupby('P'):
    # ab["inv_a_dt"] = 1/(ab["a"]*dt)
    df_pol["inv_a"] = 1/df_pol["a"]
    lin2 = LinearRegression()
    x_hat2 = df_pol[["Lbar"]]
    y_hat2 = df_pol["inv_a"]
    lin2.fit(x_hat2, y_hat2, sample_weight=df_pol["Count"])

    k1 = 1/lin2.coef_[0]
    k2 = lin2.intercept_ * k1
    fit2_result[p] = (k1, k2)
    print(f"k1_{p}={k1:.3e},  k2_{p}={k2:.3e}")

    plt.figure(figsize=(8,6))
    sizes = df_pol["Count"] / df_pol["Count"].max() * 300  

    # Draw scatter plot
    plt.scatter(x_hat2, y_hat2, s=sizes, alpha=.7,
                label="data points", color='tab:blue')

    # Draw fitting line
    x_line = np.linspace(ab["Lbar"].min(), ab["Lbar"].max(), 200)
    y_line = (1/k1)*x_line + k2/k1
    plt.plot(x_line, y_line, 'r', lw=2)

    # Set plot style
    plt.xlabel(r"$\bar L$")
    plt.ylabel(r"$1/a$")
    plt.title("2nd Regression")
    plt.grid(ls='--', alpha=.4)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

In [None]:
plt.figure(figsize=(8,6))
sizes = ab["Count"] / ab["Count"].max() * 300 

# Draw scatter plot
plt.scatter(x_hat2, y_hat2, s=sizes, alpha=.7,
            label="data points", color='tab:blue')

# Draw fitting line
x_line = np.linspace(ab["Lbar"].min(), ab["Lbar"].max(), 200)
y_line = (1/k1)*x_line + k2/k1
plt.plot(x_line, y_line, 'r', lw=2)

# Set plot style
plt.xlabel(r"$\bar L$")
plt.ylabel(r"$1/a$")
plt.title("2nd Regression")
plt.grid(ls='--', alpha=.4)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

# 3. Fit $k_4$, $k_5$ and cross-validate $k_1$ (3rd Regression)

In [None]:
fit3_results = {}  # 保存各极性的 (k1, k4, k5)
df = df.merge(ab[["Lbar","a","b"]], on="Lbar", how="inner")
for p, df_pol in df.groupby('P'):
    df_pol["c"] = df_pol["kdL"] / (df_pol["Lbar"] + k2)
    x_hat3 = df_pol[["c","Lbar"]]
    y_hat3 = df_pol["Mu"]
    lin3 = LinearRegression()
    lin3.fit(x_hat3, y_hat3, sample_weight=df_pol["Count"])

    k1_check, k5 = lin3.coef_
    k4 = lin3.intercept_
    fit3_results[p] = (k1_check, k4, k5)
    print(f"k1_check_{p}={k1_check:.3e}, k4_{p}={k4:.3e}, k5_{p}={k5:.3e}")

In [None]:
# --- Visualize b = k1·c + k5·L̄ + k4 ---
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

fig = plt.figure(figsize=(7,5))
ax = fig.add_subplot(111, projection='3d')

for p, df_pol in df.groupby("P"):
    df_pol = df_pol.copy()
    k1, k4, k5 = fit3_results[p]
    k2_pol = fit2_result[p][1]
    
    df_pol["c"] = df_pol["kdL"] / (df_pol["Lbar"] + k2_pol)
    
    # --- Plots ---
    color = 'red' if p == 1 else 'blue'
    ax.scatter(df_pol["c"], df_pol["Lbar"], df_pol["Mu"],
               color=color, s=df_pol["Count"]/df_pol["Count"].max()*50,
               alpha=0.7, label=f"{p} events")

    # --- Surface ---
    c_span = np.linspace(df_pol["c"].min(), df_pol["c"].max(), 30)
    L_span = np.linspace(df_pol["Lbar"].min(), df_pol["Lbar"].max(), 30)
    C, L = np.meshgrid(c_span, L_span)
    B = k1 * C + k5 * L + k4
    ax.plot_surface(C, L, B, color=color, alpha=0.25, edgecolor='none')

ax.set_xlabel(r"$c=k_{dL}/(\bar L+k_2)$")
ax.set_ylabel(r"$\bar L$")
ax.set_zlabel("$\mu$")
ax.set_title("Third regression surface")

ax.view_init(elev=25, azim=45)
plt.tight_layout()
plt.show()
