In [1]:
import numpy as np
import pandas as pd

rng = np.random.default_rng(42)

# Simulate kinetic data with hidden temperature/pH dependency
def simulate_kinetic_data(n_samples=100):
    T = rng.uniform(20, 50, n_samples)  # temperature in Celsius
    pH = rng.uniform(5, 9, n_samples)   # pH range

    # Latent model for log(k): decreasing with T, peak around pH 7
    # Example: Gaussian bell curve in pH centered at 7, and linear decrease with T
    log_k_true = (
        -0.03 * (T - 30)  # decreases with temperature
        - 0.5 * (pH - 7) ** 2  # bell-shaped pH dependence
    )

    k = np.exp(log_k_true)

    time_points = np.linspace(0, 10, 20)
    data = []
    for i in range(n_samples):
        for t in time_points:
            S0 = 5.0
            P = S0 * (1 - np.exp(-k[i] * t))
            noise = np.random.normal(0, 0.02)
            data.append({
                'SampleID': i,
                'Time': t,
                'P_obs': np.clip(P + noise, 0, None),
                'T': T[i],
                'pH': pH[i]
            })

    return pd.DataFrame(data)

# Generate and save
kinetic_data = simulate_kinetic_data()
kinetic_data.to_csv("kinetic_data.csv", index=False)
kinetic_data.head(20)

Unnamed: 0,SampleID,Time,P_obs,T,pH
0,0,0.0,0.0,43.218681,8.634323
1,0,0.526316,0.466136,43.218681,8.634323
2,0,1.052632,0.849512,43.218681,8.634323
3,0,1.578947,1.229015,43.218681,8.634323
4,0,2.105263,1.516633,43.218681,8.634323
5,0,2.631579,1.869528,43.218681,8.634323
6,0,3.157895,2.14196,43.218681,8.634323
7,0,3.684211,2.422761,43.218681,8.634323
8,0,4.210526,2.611833,43.218681,8.634323
9,0,4.736842,2.840877,43.218681,8.634323
