In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import statsmodels.formula.api as smf
from statsmodels.iolib.smpickle import load_pickle
from scipy.stats import chi2, norm

from saturation.utils import *

# Configuration variables

In [3]:
BASE_PATH = "/data/saturation/thesis_run_20250223/"

# Range of simulation IDs to be used for fitting, inclusive
MIN_SIMULATION_ID = 1
MAX_SIMULATION_ID = 150

# Filter on the simulations to be used for fitting, inclusive
MAX_SLOPE = -2

N_NSTATS = 100

# Retrieve simulation configuration

In [4]:
configs_df = read_configs_pandas(base_path=BASE_PATH)
configs_dict = configs_df.set_index("simulation_id").to_dict(orient="index")

In [5]:
steep_slope_simulation_ids = list(
    configs_df[configs_df.simulation_id.between(MIN_SIMULATION_ID, MAX_SIMULATION_ID) & (configs_df.slope <= MAX_SLOPE)].simulation_id
)
shallow_slope_simulation_ids = list(
    configs_df[configs_df.simulation_id.between(MIN_SIMULATION_ID, MAX_SIMULATION_ID) & (configs_df.slope > MAX_SLOPE)].simulation_id
)
all_simulation_ids = list(configs_dict.keys())

In [6]:
statistics_by_simulation_id = []
for simulation_id in steep_slope_simulation_ids + shallow_slope_simulation_ids:
    statistics = pd.read_parquet(f"data/statistics_{simulation_id}.parquet")
    statistics["slope"] = configs_dict[simulation_id]["slope"]
    statistics["rim_erasure_exponent"] = configs_dict[simulation_id]["rim_erasure_method"]["exponent"]
    statistics_by_simulation_id.append(statistics)
statistics = pd.concat(statistics_by_simulation_id, axis=0)

# Fit the pooled model

## Load the statistics data back from disk

In [7]:
statistics_by_simulation_id = []
for simulation_id in steep_slope_simulation_ids + shallow_slope_simulation_ids:
    statistics = pd.read_parquet(f"data/statistics_{simulation_id}.parquet")
    statistics["slope"] = configs_dict[simulation_id]["slope"]
    statistics["rim_erasure_exponent"] = configs_dict[simulation_id]["rim_erasure_method"]["exponent"]
    statistics_by_simulation_id.append(statistics)
statistics = pd.concat(statistics_by_simulation_id, axis=0)

## Fit the model, write out to disk

In [8]:
# Sample the data; 10% is nearly the max that can fit into memory
steep_slope_statistics_sample = statistics.loc[steep_slope_simulation_ids].sample(frac=0.1).copy()

model_formula = (
    "lifespan ~ 1 "
    "+ slope:rim_erasure_exponent:np.log(radius) "
    "+ rim_erasure_exponent:np.log(radius) "
    "+ np.log(-slope)"
)

lifespan_model = smf.negativebinomial(
    data=steep_slope_statistics_sample,
    formula=model_formula
).fit(
    maxiter=1000,
    method="BFGS"
)
lifespan_model.summary()

  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts)


         Current function value: 15.594769
         Iterations: 19
         Function evaluations: 88
         Gradient evaluations: 75




0,1,2,3
Dep. Variable:,lifespan,No. Observations:,10999999.0
Model:,NegativeBinomial,Df Residuals:,10999995.0
Method:,MLE,Df Model:,3.0
Date:,"Wed, 05 Mar 2025",Pseudo R-squ.:,0.0571
Time:,19:22:38,Log-Likelihood:,-171540000.0
converged:,False,LL-Null:,-181930000.0
Covariance Type:,nonrobust,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,11.9369,0.002,7376.949,0.000,11.934,11.940
slope:rim_erasure_exponent:np.log(radius),-0.9171,0.000,-1880.532,0.000,-0.918,-0.916
rim_erasure_exponent:np.log(radius),-1.6872,0.002,-1081.656,0.000,-1.690,-1.684
np.log(-slope),1.6945,0.001,1213.842,0.000,1.692,1.697
alpha,0.3188,0.000,2464.362,0.000,0.319,0.319


In [9]:
lifespan_model.save("data/pooled_lifespan_model_steep_slope.pkl")

In [10]:
# Dispersion statistic
np.sum(lifespan_model.resid_pearson**2) / lifespan_model.df_resid

0.7846409323940443

# Create the prediction dataset

## Reload the model from disk

In [11]:
lifespan_model = load_pickle("data/pooled_lifespan_model_steep_slope.pkl")

In [12]:
lifespan_model.summary()

0,1,2,3
Dep. Variable:,lifespan,No. Observations:,10999999.0
Model:,NegativeBinomial,Df Residuals:,10999995.0
Method:,MLE,Df Model:,3.0
Date:,"Wed, 05 Mar 2025",Pseudo R-squ.:,0.0571
Time:,19:22:41,Log-Likelihood:,-171540000.0
converged:,False,LL-Null:,-181930000.0
Covariance Type:,nonrobust,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,11.9369,0.002,7376.949,0.000,11.934,11.940
slope:rim_erasure_exponent:np.log(radius),-0.9171,0.000,-1880.532,0.000,-0.918,-0.916
rim_erasure_exponent:np.log(radius),-1.6872,0.002,-1081.656,0.000,-1.690,-1.684
np.log(-slope),1.6945,0.001,1213.842,0.000,1.692,1.697
alpha,0.3188,0.000,2464.362,0.000,0.319,0.319


## Predict using Little's Law (or simply production slope) for all simulations

In [13]:
# Extract coefficients
slope_exponent_radius_coeff = lifespan_model.params["slope:rim_erasure_exponent:np.log(radius)"]
exponent_radius_coeff = lifespan_model.params["rim_erasure_exponent:np.log(radius)"]

In [14]:
# Construct Little's Law model predictions for each simulation
predictions = pd.DataFrame(all_simulation_ids, columns=["simulation_id"])
predictions["slope"] = [configs_dict[x]["slope"] for x in all_simulation_ids]
predictions["rim_erasure_exponent"] = [configs_dict[x]["rim_erasure_method"]["exponent"] for x in all_simulation_ids]
predictions["lifespan_model_exponent"] = [
    (
    slope_exponent_radius_coeff * x.slope * x.rim_erasure_exponent
    + exponent_radius_coeff * x.rim_erasure_exponent
    ) if x.slope <= MAX_SLOPE else 0
    for x in predictions.itertuples()
]
predictions["littles_law_model_slope_prediction"] = predictions.slope + predictions.lifespan_model_exponent
predictions.set_index("simulation_id", inplace=True)

## Estimate slopes for each simulation using MLE

### Load states data

In [15]:
states = None
for simulation_id in all_simulation_ids:
    state = pd.read_parquet(f"data/states_{simulation_id}_{N_NSTATS}.parquet")
    if states is None:
        states = state
    else:
        states = pd.concat([states, state], axis=0)

### Estimate MLE slope and sigma for each simulation

In [16]:
for simulation_id in all_simulation_ids:
    state = states.loc[simulation_id]
    mle_slope, sigma = estimate_cumulative_slope(
        radii=state.radius,
        rmin=configs_dict[simulation_id]["rstat"],
        rmax=configs_dict[simulation_id]["rmax"],
        min_search_slope=-10.0,
        max_search_slope=-1
    )
    predictions.loc[simulation_id, "mle_slope"] = mle_slope
    predictions.loc[simulation_id, "mle_slope_sigma"] = sigma

In [17]:
predictions

Unnamed: 0_level_0,slope,rim_erasure_exponent,lifespan_model_exponent,littles_law_model_slope_prediction,mle_slope,mle_slope_sigma
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12,-4.25,0.2,0.442138,-3.807862,-3.792793,0.024017
122,-1.50,0.2,0.000000,-1.500000,-1.531532,0.003645
141,-1.00,0.1,0.000000,-1.000000,-1.045045,0.005201
28,-4.00,0.8,1.585125,-2.414875,-2.396396,0.005140
74,-2.75,0.4,0.333993,-2.416007,-2.477477,0.004375
...,...,...,...,...,...,...
139,-1.25,0.9,0.000000,-1.250000,-1.261261,0.004197
159,-4.70,1.0,2.623404,-2.076596,-2.045045,0.004070
69,-3.00,0.9,0.957841,-2.042159,-2.108108,0.002822
131,-1.25,0.1,0.000000,-1.250000,-1.297297,0.004293


## Save off predictions

In [18]:
predictions.to_parquet("data/predictions.parquet")

# Perform hypothesis testing

## Reload predictions from disk

In [19]:
predictions = pd.read_parquet("data/predictions.parquet")

## Perform the TOST

In [20]:
def tost_equivalence_test(
    *,
    mle_slope: float,
    mle_slope_sigma: float,
    predicted_slope: float,
    margin: float
) -> float:
    """
    Perform a TOST (Two One-Sided Test) equivalence check for a single simulation.

    Null hypothesis (H0): The true slope is outside ±margin of predicted_slope.
    Alternative (H1): The true slope is within ±margin of predicted_slope.

    This function returns a single p-value for the equivalence test,
    following the approach of taking the maximum of the two one-sided p-values
    (i.e., p_equiv = max(p1, p2)).

    Parameters:
    -----------
    mle_slope : float
        MLE estimate of the slope for this simulation.
    mle_slope_sigma : float
        Standard error of the MLE slope estimate.
    predicted_slope : float
        The predicted slope from the model (Little's Law).
    margin : float
        Equivalence margin (e.g., ±0.05).

    Returns:
    --------
    p_equiv : float
        A single p-value for the TOST equivalence test.
        Typically compared to alpha (e.g., 0.05).
        A smaller value indicates stronger evidence of equivalence.
    """
    # Two one-sided tests:
    #  1) slope > predicted_slope - margin
    #  2) slope < predicted_slope + margin

    z1 = (
        mle_slope
        - (predicted_slope - margin)
    ) / mle_slope_sigma
    p1 = 1.0 - norm.cdf(z1)

    z2 = (
        (predicted_slope + margin)
        - mle_slope
    ) / mle_slope_sigma
    p2 = 1.0 - norm.cdf(z2)

    # A single TOST p-value is often the max of these two p-values.
    p_equiv = max(p1, p2)

    return p_equiv


def fishers_method(
    p_values: list[float]
) -> tuple[float, float]:
    """
    Combine a list of p-values using Fisher's method.

    Returns:
    --------
    chi2_stat : float
        The combined chi-square statistic.
    combined_pval : float
        p-value for the combined test.
    """
    valid_pvals = [
        p for p in p_values
        if 0.0 < p < 1.0
    ]

    if not valid_pvals:
        # If no valid p-values, return defaults
        return 0.0, 1.0

    chi2_stat = -2.0 * np.sum(
        np.log(valid_pvals)
    )
    df = 2 * len(valid_pvals)
    combined_pval = 1.0 - chi2.cdf(
        chi2_stat,
        df
    )

    return chi2_stat, combined_pval


def run_equivalence_testing(
    *,
    df: pd.DataFrame,
    margin: float,
    alpha: float
) -> pd.DataFrame:
    """
    Main routine to run TOST per simulation, then apply Fisher's method
    to combine p-values for an overall conclusion.

    The DataFrame `df` must contain columns:
      - "littles_law_model_slope_prediction"
      - "mle_slope"
      - "mle_slope_sigma"

    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with required columns, indexed by simulation_id or similar.
    margin : float
        Equivalence margin (e.g., ±0.05).
    alpha : float
        Significance level for TOST (e.g., 0.05).

    Returns:
    --------
    results_df : pd.DataFrame
        A DataFrame with:
          - "predicted_slope"
          - "predicted_lower_ci"
          - "predicted_upper_ci"
          - "mle_slope"
          - "mle_slope_sigma"
          - "mle_lower_ci"
          - "mle_upper_ci"
          - "p_tost"
          - "equivalent"
        The function prints a Fisher combined p-value for the entire set.
    """
    # z-critical value for the (1 - 2*alpha)% CI
    # e.g., alpha=0.05 => 1 - 2*0.05=0.90 => z ~1.645
    z_crit = norm.ppf(1.0 - alpha)

    p_values = []
    dfs_list = []

    for idx, row in df.iterrows():
        predicted_slope = row["littles_law_model_slope_prediction"]
        est_slope = row["mle_slope"]
        est_sigma = row["mle_slope_sigma"]

        # 1) TOST p-value
        p_tost = tost_equivalence_test(
            mle_slope=est_slope,
            mle_slope_sigma=est_sigma,
            predicted_slope=predicted_slope,
            margin=margin
        )

        # 2) Equivalence pass/fail
        equivalent = (p_tost < alpha)

        # 3) Confidence Interval for predicted slope: simply ± margin
        predicted_lower_ci = predicted_slope - margin
        predicted_upper_ci = predicted_slope + margin

        # 4) (1 - 2*alpha)% CI for MLE slope
        # e.g. for alpha=0.05 => 90% CI
        mle_lower_ci = est_slope - z_crit * est_sigma
        mle_upper_ci = est_slope + z_crit * est_sigma

        p_values.append(p_tost)
        dfs_list.append({
            "simulation_id": idx,
            "predicted_slope": predicted_slope,
            "predicted_lower_ci": predicted_lower_ci,
            "predicted_upper_ci": predicted_upper_ci,
            "mle_slope": est_slope,
            "mle_slope_sigma": est_sigma,
            "mle_lower_ci": mle_lower_ci,
            "mle_upper_ci": mle_upper_ci,
            "p_tost": p_tost,
            "equivalent": equivalent
        })

    results_df = pd.DataFrame(dfs_list).set_index("simulation_id")

    # Combine p-values via Fisher
    chi2_stat, combined_pval = fishers_method(p_values)

    dof = 2 * len(p_values)
    print(f"Fisher Combined chi-square = {chi2_stat:.4f} with dof={dof}")
    print(f"Fisher Combined p-value = {combined_pval}")
    print("Conclusion: If combined_p-value < alpha, the model's predicted slope is strongly supported across all simulations.")

    return results_df

In [34]:
predictions_training_set = predictions.loc[steep_slope_simulation_ids + shallow_slope_simulation_ids].copy()
equivalence_test_results = run_equivalence_testing(
    df=predictions_training_set,
    alpha=0.05,
    margin=0.1
)

result_cols = [
    "mle_lower_ci",
    "mle_upper_ci",
    "predicted_lower_ci",
    "predicted_upper_ci",
    "p_tost",
    "equivalent"
]
predictions_training_set[result_cols] = equivalence_test_results[result_cols]

Fisher Combined chi-square = 1907.7410 with dof=300
Fisher Combined p-value = 0.0
Conclusion: If combined_p-value < alpha, the model's predicted slope is strongly supported across all simulations.


In [35]:
equivalence_test_results

Unnamed: 0_level_0,predicted_slope,predicted_lower_ci,predicted_upper_ci,mle_slope,mle_slope_sigma,mle_lower_ci,mle_upper_ci,p_tost,equivalent
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
12,-3.807862,-3.907862,-3.707862,-3.792793,0.024017,-3.832297,-3.753288,2.029116e-04,True
28,-2.414875,-2.514875,-2.314875,-2.396396,0.005140,-2.404851,-2.387942,0.000000e+00,True
74,-2.416007,-2.516007,-2.316007,-2.477477,0.004375,-2.484673,-2.470282,0.000000e+00,True
107,-1.897010,-1.997010,-1.797010,-1.927928,0.002948,-1.932778,-1.923078,0.000000e+00,True
101,-1.985287,-2.085287,-1.885287,-2.063063,0.003459,-2.068752,-2.057374,6.586720e-11,True
...,...,...,...,...,...,...,...,...,...
145,-1.000000,-1.100000,-0.900000,-1.036036,0.005124,-1.044465,-1.027607,0.000000e+00,True
123,-1.500000,-1.600000,-1.400000,-1.549550,0.003686,-1.555613,-1.543486,0.000000e+00,True
115,-1.750000,-1.850000,-1.650000,-1.774775,0.003276,-1.780163,-1.769387,0.000000e+00,True
139,-1.250000,-1.350000,-1.150000,-1.261261,0.004197,-1.268164,-1.254358,0.000000e+00,True


In [36]:
predictions_training_set[~predictions_training_set.equivalent].sort_values(["slope", "rim_erasure_exponent"])

Unnamed: 0_level_0,slope,rim_erasure_exponent,lifespan_model_exponent,littles_law_model_slope_prediction,mle_slope,mle_slope_sigma,mle_lower_ci,mle_upper_ci,predicted_lower_ci,predicted_upper_ci,p_tost,equivalent
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


In [24]:
def significance_marker(p: float) -> str:
    """Assign significance markers based on p-value thresholds."""
    if p < 0.001:
        return "*"
    elif p < 0.05:
        return "x"
    else:
        return ""

def format_pvalue(p: float) -> str:
    """Format p-value, aligning numbers neatly even with significance markers."""
    marker = significance_marker(p)
    if p < 0.001:
        return marker
    else:
        return f"{p:<5.3f}"

def create_pvalue_table(
    df: pd.DataFrame,
    *,
    field: str,
    exponent_digits: int = 1,
    slope_digits: int = 2
) -> str:
    """Generate a formatted LaTeX table summarizing p-values significance."""
    pivot_table = df.rename(columns={"rim_erasure_exponent": "nu"}).pivot_table(
        index="slope",
        columns="nu",
        values=field,
        aggfunc="mean"
    )

    pivot_table.index = pivot_table.index.map(lambda x: fr"\textbf{{{x:.{slope_digits}f}}}")
    pivot_table.columns = pivot_table.columns.map(lambda x: fr"\textbf{{{x:.{exponent_digits}f}}}")

    formatted_table = pivot_table.map(
        lambda p: fr"\textbf{{{format_pvalue(p)}}}" if significance_marker(p) else format_pvalue(p) if pd.notna(p) else ""
    )

    latex_table = formatted_table.to_latex(
        caption="",
        label="tab:pvalue_summary",
        na_rep="",
        escape=False,
        column_format="r" + "r" * formatted_table.shape[1],
        index_names=False,
        index=True
    )

    # Add clearer headers for exponent and slope
    latex_table = latex_table.replace(
        r"\toprule",
        r"\toprule \\"
        + "\n"
        + r"\bprod & \multicolumn{"
        + f"{formatted_table.shape[1]}"
        + r"}{c}{\textbf{\eexp}} \\"
        + "\n"
        rf"\cmidrule(lr){{2-{formatted_table.shape[1] + 1}}}"
    )

    return latex_table

In [25]:
print(create_pvalue_table(predictions_training_set, field="p_tost"))

\begin{table}
\label{tab:pvalue_summary}
\begin{tabular}{rrrrrrrrrrr}
\toprule \\
\bprod & \multicolumn{10}{c}{\textbf{\eexp}} \\
\cmidrule(lr){2-11}
nu & \textbf{0.1} & \textbf{0.2} & \textbf{0.3} & \textbf{0.4} & \textbf{0.5} & \textbf{0.6} & \textbf{0.7} & \textbf{0.8} & \textbf{0.9} & \textbf{1.0} \\
\midrule
\textbf{-4.50} & \textbf{0.044} & \textbf{0.002} & \textbf{0.002} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} \\
\textbf{-4.25} & \textbf{0.006} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} \\
\textbf{-4.00} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} \\
\textbf{-3.75} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} \\
\textbf{-3.50} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{*} & \textbf{

# Hypothesis test on the testing set

In [26]:
testing_set_simulation_ids = [x.simulation_id for x in predictions.reset_index().itertuples() if x.simulation_id not in (steep_slope_simulation_ids + shallow_slope_simulation_ids)]

In [27]:
predictions_testing_set = predictions.loc[testing_set_simulation_ids].copy()
equivalence_test_results = run_equivalence_testing(
    df=predictions_testing_set,
    alpha=0.05,
    margin=0.1
)

result_cols = [
    "mle_lower_ci",
    "mle_upper_ci",
    "predicted_lower_ci",
    "predicted_upper_ci",
    "p_tost",
    "equivalent"
]
predictions_testing_set[result_cols] = equivalence_test_results[result_cols]

Fisher Combined chi-square = 257.2391 with dof=24
Fisher Combined p-value = 0.0
Conclusion: If combined_p-value < alpha, the model's predicted slope is strongly supported across all simulations.


In [28]:
equivalence_test_results

Unnamed: 0_level_0,predicted_slope,predicted_lower_ci,predicted_upper_ci,mle_slope,mle_slope_sigma,mle_lower_ci,mle_upper_ci,p_tost,equivalent
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
156,-1.948162,-2.048162,-1.848162,-2.0,0.002513,-2.004134,-1.995866,0.0,True
158,-3.257128,-3.357128,-3.157128,-3.252252,0.016128,-3.278781,-3.225724,1.839973e-09,True
151,-2.464183,-2.564183,-2.364183,-2.540541,0.004658,-2.548203,-2.532878,1.936492e-07,True
157,-4.044149,-4.144149,-3.944149,-4.018018,0.034577,-4.074891,-3.961145,0.01632377,True
155,-2.488989,-2.588989,-2.388989,-2.540541,0.00485,-2.548518,-2.532563,0.0,True
162,-2.101454,-2.201454,-2.001454,-2.036036,0.004541,-2.043506,-2.028566,1.321165e-14,True
160,-4.275364,-4.375364,-4.175364,-4.252252,0.04816,-4.331468,-4.173037,0.05518451,False
161,-3.4058,-3.5058,-3.3058,-3.36036,0.020409,-3.393931,-3.32679,0.003755503,True
154,-2.84954,-2.94954,-2.74954,-2.891892,0.007189,-2.903718,-2.880066,5.551115e-16,True
153,-1.906731,-2.006731,-1.806731,-1.972973,0.002374,-1.976877,-1.969069,0.0,True


In [29]:
predictions_testing_set[~predictions_testing_set.equivalent].sort_values(["slope", "rim_erasure_exponent"])

Unnamed: 0_level_0,slope,rim_erasure_exponent,lifespan_model_exponent,littles_law_model_slope_prediction,mle_slope,mle_slope_sigma,mle_lower_ci,mle_upper_ci,predicted_lower_ci,predicted_upper_ci,p_tost,equivalent
simulation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
160,-5.0,0.25,0.724636,-4.275364,-4.252252,0.04816,-4.331468,-4.173037,-4.375364,-4.175364,0.055185,False


In [30]:
print(create_pvalue_table(predictions_testing_set, field="p_tost", exponent_digits=2, slope_digits=2))

\begin{table}
\label{tab:pvalue_summary}
\begin{tabular}{rrrr}
\toprule \\
\bprod & \multicolumn{3}{c}{\textbf{\eexp}} \\
\cmidrule(lr){2-4}
nu & \textbf{0.25} & \textbf{0.55} & \textbf{1.00} \\
\midrule
\textbf{-5.00} & 0.055 & \textbf{0.004} & \textbf{*} \\
\textbf{-4.70} & \textbf{0.016} & \textbf{*} & \textbf{*} \\
\textbf{-3.15} & \textbf{*} & \textbf{*} & \textbf{*} \\
\textbf{-2.65} & \textbf{*} & \textbf{*} & \textbf{*} \\
\bottomrule
\end{tabular}
\end{table}



# Comparison of number of craters to different studies

## This thesis

In [49]:
# Total N_g for all simulations
states.groupby("simulation_id").agg("max").sum().crater_id / 1e9

1813.675037854

In [50]:
# Number of simulations
states.index.drop_duplicates().shape[0]

162

## Woronow (1977)

In [63]:
slopes = [-3, -2, -1.5]
dmin = 10
F = 0.5
dstat = dmin / F
rmax = 160
nstat = 1000

In [67]:
# Pareto CDF
sum([(dmin / dstat)**slope * nstat for slope in slopes])

18000.0

## Squyres (1999)

In [71]:
slope = -2.73
n_repeats = 5
n_betas = 5
n_zetas = 4
rmins = [10, 30]
rstat = 30
rmax = 1000
nstat = 4000

In [72]:
# Pareto CDF times nstat
sum(
    (rmin / rstat)**slope * nstat * n_repeats * n_betas * n_zetas
    for rmin in rmins
)

8427882.67369005

In [73]:
n_repeats * len(rmins) * n_betas * n_zetas

200

In [74]:
n_repeats * len(rmins) * n_betas * n_zetas * nstat

800000

## Kirchoff (2018)

In [66]:
slopes = [-1, -2]
rmults = [1.3, 1.5, 1.7, 1.9]
mrps = [.3, .4, .5]
rstats = [3, 6, 9, 12, 15]
rmin = 2.5
rmax = 2500
nstat = 3000

In [68]:
# Pareto CDF
sum(
    [
        (rmin / rstat)**slope * nstat
        for x in slopes
        for rstat in rstats
        for _ in mrps
        for _ in rmults
    ]
)

18081744.141249813

In [70]:
5 * 3 * 4 * 2 * nstat

360000