In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import statsmodels.formula.api as smf
from statsmodels.iolib.smpickle import load_pickle
from scipy.stats import chi2, norm

from saturation.utils import *

# Configuration variables

In [3]:
BASE_PATH = "/data/saturation/thesis_run_20250223/"

# Range of simulation IDs to be used for fitting, inclusive
MIN_SIMULATION_ID = 1
MAX_SIMULATION_ID = 150

# Filter on the simulations to be used for fitting, inclusive
MAX_SLOPE = -2.5

N_NSTATS = 100

# Retrieve simulation configuration

In [4]:
configs_df = read_configs_pandas(base_path=BASE_PATH)
configs_dict = configs_df.set_index("simulation_id").to_dict(orient="index")

In [5]:
steep_slope_simulation_ids = list(
    configs_df[configs_df.simulation_id.between(MIN_SIMULATION_ID, MAX_SIMULATION_ID) & (configs_df.slope <= MAX_SLOPE)].simulation_id
)
shallow_slope_simulation_ids = list(
    configs_df[configs_df.simulation_id.between(MIN_SIMULATION_ID, MAX_SIMULATION_ID) & (configs_df.slope > MAX_SLOPE)].simulation_id
)
all_simulation_ids = list(configs_dict.keys())

In [6]:
statistics_by_simulation_id = []
for simulation_id in steep_slope_simulation_ids + shallow_slope_simulation_ids:
    statistics = pd.read_parquet(f"data/statistics_{simulation_id}.parquet")
    statistics["slope"] = configs_dict[simulation_id]["slope"]
    statistics["rim_erasure_exponent"] = configs_dict[simulation_id]["rim_erasure_method"]["exponent"]
    statistics_by_simulation_id.append(statistics)
statistics = pd.concat(statistics_by_simulation_id, axis=0)

# Fit the pooled model

## Load the statistics data back from disk

In [7]:
statistics_by_simulation_id = []
for simulation_id in steep_slope_simulation_ids + shallow_slope_simulation_ids:
    statistics = pd.read_parquet(f"data/statistics_{simulation_id}.parquet")
    statistics["slope"] = configs_dict[simulation_id]["slope"]
    statistics["rim_erasure_exponent"] = configs_dict[simulation_id]["rim_erasure_method"]["exponent"]
    statistics_by_simulation_id.append(statistics)
statistics = pd.concat(statistics_by_simulation_id, axis=0)

## Fit the model, write out to disk

In [8]:
# Sample the data; 10% is nearly the max that can fit into memory
steep_slope_statistics_sample = statistics.loc[steep_slope_simulation_ids].sample(frac=0.1).copy()

model_formula = (
    "lifespan ~ 1 "
    "+ slope:rim_erasure_exponent:np.log(radius) "
    "+ rim_erasure_exponent:np.log(radius) "
    "+ np.log(-slope)"
)

lifespan_model = smf.negativebinomial(
    data=steep_slope_statistics_sample,
    formula=model_formula
).fit(
    maxiter=1000,
    method="BFGS"
)
lifespan_model.summary()

Optimization terminated successfully.
         Current function value: 15.813099
         Iterations: 18
         Function evaluations: 23
         Gradient evaluations: 23


0,1,2,3
Dep. Variable:,lifespan,No. Observations:,8999998.0
Model:,NegativeBinomial,Df Residuals:,8999994.0
Method:,MLE,Df Model:,3.0
Date:,"Mon, 03 Mar 2025",Pseudo R-squ.:,0.05726
Time:,17:40:34,Log-Likelihood:,-142320000.0
converged:,True,LL-Null:,-150960000.0
Covariance Type:,nonrobust,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,12.6877,0.002,5887.874,0.000,12.683,12.692
slope:rim_erasure_exponent:np.log(radius),-0.9633,0.001,-1685.301,0.000,-0.964,-0.962
rim_erasure_exponent:np.log(radius),-1.8366,0.002,-923.941,0.000,-1.841,-1.833
np.log(-slope),1.1023,0.002,635.514,0.000,1.099,1.106
alpha,0.2316,0.000,2200.828,0.000,0.231,0.232


In [9]:
lifespan_model.save("data/pooled_lifespan_model_steep_slope.pkl")

# Create the prediction dataset

## Reload the model from disk

In [10]:
lifespan_model = load_pickle("data/pooled_lifespan_model_steep_slope.pkl")

In [11]:
lifespan_model.summary()

0,1,2,3
Dep. Variable:,lifespan,No. Observations:,8999998.0
Model:,NegativeBinomial,Df Residuals:,8999994.0
Method:,MLE,Df Model:,3.0
Date:,"Mon, 03 Mar 2025",Pseudo R-squ.:,0.05726
Time:,17:40:37,Log-Likelihood:,-142320000.0
converged:,True,LL-Null:,-150960000.0
Covariance Type:,nonrobust,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,12.6877,0.002,5887.874,0.000,12.683,12.692
slope:rim_erasure_exponent:np.log(radius),-0.9633,0.001,-1685.301,0.000,-0.964,-0.962
rim_erasure_exponent:np.log(radius),-1.8366,0.002,-923.941,0.000,-1.841,-1.833
np.log(-slope),1.1023,0.002,635.514,0.000,1.099,1.106
alpha,0.2316,0.000,2200.828,0.000,0.231,0.232


## Predict using Little's Law (or simply production slope) for all simulations

In [12]:
# Extract coefficients
slope_exponent_radius_coeff = lifespan_model.params["slope:rim_erasure_exponent:np.log(radius)"]
exponent_radius_coeff = lifespan_model.params["rim_erasure_exponent:np.log(radius)"]

In [13]:
# Construct Little's Law model predictions for each simulation
predictions = pd.DataFrame(all_simulation_ids, columns=["simulation_id"])
predictions["slope"] = [configs_dict[x]["slope"] for x in all_simulation_ids]
predictions["rim_erasure_exponent"] = [configs_dict[x]["rim_erasure_method"]["exponent"] for x in all_simulation_ids]
predictions["lifespan_model_exponent"] = [
    (
    slope_exponent_radius_coeff * x.slope * x.rim_erasure_exponent
    + exponent_radius_coeff * x.rim_erasure_exponent
    ) if x.slope <= MAX_SLOPE else 0
    for x in predictions.itertuples()
]
predictions["littles_law_model_slope_prediction"] = predictions.slope + predictions.lifespan_model_exponent
predictions.set_index("simulation_id", inplace=True)

## Estimate slopes for each simulation using MLE

### Load states data

In [14]:
states = None
for simulation_id in all_simulation_ids:
    state = pd.read_parquet(f"data/states_{simulation_id}_{N_NSTATS}.parquet")
    if states is None:
        states = state
    else:
        states = pd.concat([states, state], axis=0)

### Estimate MLE slope and sigma for each simulation

In [15]:
for simulation_id in all_simulation_ids:
    state = states.loc[simulation_id]
    mle_slope, sigma = estimate_cumulative_slope(
        radii=state.radius,
        rmin=configs_dict[simulation_id]["rstat"],
        rmax=configs_dict[simulation_id]["rmax"],
        min_search_slope=-10.0,
        max_search_slope=-1
    )
    predictions.loc[simulation_id, "mle_slope"] = mle_slope
    predictions.loc[simulation_id, "mle_slope_sigma"] = sigma

In [16]:
predictions

Unnamed: 0_level_0,slope,rim_erasure_exponent,lifespan_model_exponent,littles_law_model_slope_prediction,mle_slope,mle_slope_sigma
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12,-4.25,0.2,0.451461,-3.798539,-3.792793,0.024017
122,-1.50,0.2,0.000000,-1.500000,-1.531532,0.003645
141,-1.00,0.1,0.000000,-1.000000,-1.045045,0.005201
28,-4.00,0.8,1.613186,-2.386814,-2.396396,0.005140
74,-2.75,0.4,0.324952,-2.425048,-2.477477,0.004375
...,...,...,...,...,...,...
139,-1.25,0.9,0.000000,-1.250000,-1.261261,0.004197
159,-4.70,1.0,2.690780,-2.009220,-2.045045,0.004070
69,-3.00,0.9,0.947880,-2.052120,-2.108108,0.002822
131,-1.25,0.1,0.000000,-1.250000,-1.297297,0.004293


## Save off predictions

In [17]:
predictions.to_parquet("data/predictions.parquet")

# Perform hypothesis testing

## Reload predictions from disk

In [18]:
predictions = pd.read_parquet("data/predictions.parquet")

## Perform the TOST

In [19]:
def tost_equivalence_test(
    *,
    mle_slope: float,
    mle_slope_sigma: float,
    predicted_slope: float,
    margin: float
) -> float:
    """
    Perform a TOST (Two One-Sided Test) equivalence check for a single simulation.

    Null hypothesis (H0): The true slope is outside ±margin of predicted_slope.
    Alternative (H1): The true slope is within ±margin of predicted_slope.

    This function returns a single p-value for the equivalence test,
    following the approach of taking the maximum of the two one-sided p-values
    (i.e., p_equiv = max(p1, p2)).

    Parameters:
    -----------
    mle_slope : float
        MLE estimate of the slope for this simulation.
    mle_slope_sigma : float
        Standard error of the MLE slope estimate.
    predicted_slope : float
        The predicted slope from the model (Little's Law).
    margin : float
        Equivalence margin (e.g., ±0.05).

    Returns:
    --------
    p_equiv : float
        A single p-value for the TOST equivalence test.
        Typically compared to alpha (e.g., 0.05).
        A smaller value indicates stronger evidence of equivalence.
    """
    # Two one-sided tests:
    #  1) slope > predicted_slope - margin
    #  2) slope < predicted_slope + margin

    z1 = (
        mle_slope
        - (predicted_slope - margin)
    ) / mle_slope_sigma
    p1 = 1.0 - norm.cdf(z1)

    z2 = (
        (predicted_slope + margin)
        - mle_slope
    ) / mle_slope_sigma
    p2 = 1.0 - norm.cdf(z2)

    # A single TOST p-value is often the max of these two p-values.
    p_equiv = max(p1, p2)

    return p_equiv


def fishers_method(
    p_values: list[float]
) -> tuple[float, float]:
    """
    Combine a list of p-values using Fisher's method.

    Returns:
    --------
    chi2_stat : float
        The combined chi-square statistic.
    combined_pval : float
        p-value for the combined test.
    """
    valid_pvals = [
        p for p in p_values
        if 0.0 < p < 1.0
    ]

    if not valid_pvals:
        # If no valid p-values, return defaults
        return 0.0, 1.0

    chi2_stat = -2.0 * np.sum(
        np.log(valid_pvals)
    )
    df = 2 * len(valid_pvals)
    combined_pval = 1.0 - chi2.cdf(
        chi2_stat,
        df
    )

    return chi2_stat, combined_pval


def run_equivalence_testing(
    *,
    df: pd.DataFrame,
    margin: float,
    alpha: float
) -> pd.DataFrame:
    """
    Main routine to run TOST per simulation, then apply Fisher's method
    to combine p-values for an overall conclusion.

    The DataFrame `df` must contain columns:
      - "littles_law_model_slope_prediction"
      - "mle_slope"
      - "mle_slope_sigma"

    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with required columns, indexed by simulation_id or similar.
    margin : float
        Equivalence margin (e.g., ±0.05).
    alpha : float
        Significance level for TOST (e.g., 0.05).

    Returns:
    --------
    results_df : pd.DataFrame
        A DataFrame with:
          - "predicted_slope"
          - "predicted_lower_ci"
          - "predicted_upper_ci"
          - "mle_slope"
          - "mle_slope_sigma"
          - "mle_lower_ci"
          - "mle_upper_ci"
          - "p_tost"
          - "equivalent"
        The function prints a Fisher combined p-value for the entire set.
    """
    # z-critical value for the (1 - 2*alpha)% CI
    # e.g., alpha=0.05 => 1 - 2*0.05=0.90 => z ~1.645
    z_crit = norm.ppf(1.0 - alpha)

    p_values = []
    dfs_list = []

    for idx, row in df.iterrows():
        predicted_slope = row["littles_law_model_slope_prediction"]
        est_slope = row["mle_slope"]
        est_sigma = row["mle_slope_sigma"]

        # 1) TOST p-value
        p_tost = tost_equivalence_test(
            mle_slope=est_slope,
            mle_slope_sigma=est_sigma,
            predicted_slope=predicted_slope,
            margin=margin
        )

        # 2) Equivalence pass/fail
        equivalent = (p_tost < alpha)

        # 3) Confidence Interval for predicted slope: simply ± margin
        predicted_lower_ci = predicted_slope - margin
        predicted_upper_ci = predicted_slope + margin

        # 4) (1 - 2*alpha)% CI for MLE slope
        # e.g. for alpha=0.05 => 90% CI
        mle_lower_ci = est_slope - z_crit * est_sigma
        mle_upper_ci = est_slope + z_crit * est_sigma

        p_values.append(p_tost)
        dfs_list.append({
            "simulation_id": idx,
            "predicted_slope": predicted_slope,
            "predicted_lower_ci": predicted_lower_ci,
            "predicted_upper_ci": predicted_upper_ci,
            "mle_slope": est_slope,
            "mle_slope_sigma": est_sigma,
            "mle_lower_ci": mle_lower_ci,
            "mle_upper_ci": mle_upper_ci,
            "p_tost": p_tost,
            "equivalent": equivalent
        })

    results_df = pd.DataFrame(dfs_list).set_index("simulation_id")

    # Combine p-values via Fisher
    chi2_stat, combined_pval = fishers_method(p_values)

    dof = 2 * len(p_values)
    print(f"Fisher Combined chi-square = {chi2_stat:.4f} with dof={dof}")
    print(f"Fisher Combined p-value = {combined_pval}")
    print("Conclusion: If combined_p-value < alpha, the model's predicted slope is strongly supported across all simulations.")

    return results_df

In [20]:
predictions_training_set = predictions.loc[steep_slope_simulation_ids + shallow_slope_simulation_ids].copy()
equivalence_test_results = run_equivalence_testing(
    df=predictions_training_set,
    alpha=0.05,
    margin=0.1
)

result_cols = [
    "mle_lower_ci",
    "mle_upper_ci",
    "predicted_lower_ci",
    "predicted_upper_ci",
    "p_tost",
    "equivalent"
]
predictions_training_set[result_cols] = equivalence_test_results[result_cols]

Fisher Combined chi-square = 1278.7377 with dof=300
Fisher Combined p-value = 0.0
Conclusion: If combined_p-value < alpha, the model's predicted slope is strongly supported across all simulations.


In [21]:
equivalence_test_results

Unnamed: 0_level_0,predicted_slope,predicted_lower_ci,predicted_upper_ci,mle_slope,mle_slope_sigma,mle_lower_ci,mle_upper_ci,p_tost,equivalent
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
12,-3.798539,-3.898539,-3.698539,-3.792793,0.024017,-3.832297,-3.753288,0.000043,True
28,-2.386814,-2.486814,-2.286814,-2.396396,0.005140,-2.404851,-2.387942,0.000000,True
74,-2.425048,-2.525048,-2.325048,-2.477477,0.004375,-2.484673,-2.470282,0.000000,True
18,-2.444158,-2.544158,-2.344158,-2.477477,0.005985,-2.487322,-2.467633,0.000000,True
68,-2.157440,-2.257440,-2.057440,-2.207207,0.003209,-2.212485,-2.201929,0.000000,True
...,...,...,...,...,...,...,...,...,...
145,-1.000000,-1.100000,-0.900000,-1.036036,0.005124,-1.044465,-1.027607,0.000000,True
123,-1.500000,-1.600000,-1.400000,-1.549550,0.003686,-1.555613,-1.543486,0.000000,True
115,-1.750000,-1.850000,-1.650000,-1.774775,0.003276,-1.780163,-1.769387,0.000000,True
139,-1.250000,-1.350000,-1.150000,-1.261261,0.004197,-1.268164,-1.254358,0.000000,True


In [22]:
predictions_training_set[~predictions_training_set.equivalent].sort_values(["slope", "rim_erasure_exponent"])

Unnamed: 0_level_0,slope,rim_erasure_exponent,lifespan_model_exponent,littles_law_model_slope_prediction,mle_slope,mle_slope_sigma,mle_lower_ci,mle_upper_ci,predicted_lower_ci,predicted_upper_ci,p_tost,equivalent
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,-4.5,0.1,0.249812,-4.250188,-4.288288,0.039797,-4.353748,-4.222828,-4.350188,-4.150188,0.059928,False
95,-2.25,0.5,0.0,-2.25,-2.144144,0.003209,-2.149422,-2.138866,-2.35,-2.15,0.965999,False
96,-2.25,0.6,0.0,-2.25,-2.099099,0.003057,-2.104127,-2.094071,-2.35,-2.15,1.0,False
97,-2.25,0.7,0.0,-2.25,-2.063063,0.002908,-2.067846,-2.05828,-2.35,-2.15,1.0,False
98,-2.25,0.8,0.0,-2.25,-2.018018,0.002762,-2.022562,-2.013474,-2.35,-2.15,1.0,False
99,-2.25,0.9,0.0,-2.25,-1.963964,0.002595,-1.968232,-1.959696,-2.35,-2.15,1.0,False
100,-2.25,1.0,0.0,-2.25,-1.90991,0.002463,-1.913961,-1.905859,-2.35,-2.15,1.0,False
108,-2.0,0.8,0.0,-2.0,-1.891892,0.002859,-1.896595,-1.887189,-2.1,-1.9,0.997714,False
109,-2.0,0.9,0.0,-2.0,-1.864865,0.002763,-1.86941,-1.860319,-2.1,-1.9,1.0,False
110,-2.0,1.0,0.0,-2.0,-1.81982,0.00265,-1.824179,-1.81546,-2.1,-1.9,1.0,False


# Hypothesis test on the testing set

In [23]:
testing_set_simulation_ids = [x.simulation_id for x in predictions.reset_index().itertuples() if x.simulation_id not in (steep_slope_simulation_ids + shallow_slope_simulation_ids)]

In [24]:
predictions_testing_set = predictions.loc[testing_set_simulation_ids].copy()
equivalence_test_results = run_equivalence_testing(
    df=predictions_testing_set,
    alpha=0.05,
    margin=0.1
)

result_cols = [
    "mle_lower_ci",
    "mle_upper_ci",
    "predicted_lower_ci",
    "predicted_upper_ci",
    "p_tost",
    "equivalent"
]
predictions_testing_set[result_cols] = equivalence_test_results[result_cols]

Fisher Combined chi-square = 191.5634 with dof=24
Fisher Combined p-value = 0.0
Conclusion: If combined_p-value < alpha, the model's predicted slope is strongly supported across all simulations.


In [25]:
equivalence_test_results

Unnamed: 0_level_0,predicted_slope,predicted_lower_ci,predicted_upper_ci,mle_slope,mle_slope_sigma,mle_lower_ci,mle_upper_ci,p_tost,equivalent
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
156,-1.952308,-2.052308,-1.852308,-2.0,0.002513,-2.004134,-1.995866,0.0,True
158,-3.220071,-3.320071,-3.120071,-3.252252,0.016128,-3.278781,-3.225724,1.305631e-05,True
151,-2.470987,-2.570987,-2.370987,-2.540541,0.004658,-2.548203,-2.532878,3.164446e-11,True
157,-4.027305,-4.127305,-3.927305,-4.018018,0.034577,-4.074891,-3.961145,0.004351053,True
155,-2.491269,-2.591269,-2.391269,-2.540541,0.00485,-2.548518,-2.532563,0.0,True
162,-2.020236,-2.120236,-1.920236,-2.036036,0.004541,-2.043506,-2.028566,0.0,True
160,-4.255059,-4.355059,-4.155059,-4.252252,0.04816,-4.331468,-4.173037,0.0217879,True
161,-3.36113,-3.46113,-3.26113,-3.36036,0.020409,-3.393931,-3.32679,5.810187e-07,True
154,-2.850577,-2.950577,-2.750577,-2.891892,0.007189,-2.903718,-2.880066,1.110223e-16,True
153,-1.933949,-2.033949,-1.833949,-1.972973,0.002374,-1.976877,-1.969069,0.0,True


In [26]:
predictions_testing_set[~predictions_testing_set.equivalent].sort_values(["slope", "rim_erasure_exponent"])

Unnamed: 0_level_0,slope,rim_erasure_exponent,lifespan_model_exponent,littles_law_model_slope_prediction,mle_slope,mle_slope_sigma,mle_lower_ci,mle_upper_ci,predicted_lower_ci,predicted_upper_ci,p_tost,equivalent
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
