In [1]:
import math
import platform

import numpy as np

import plotly.graph_objects as go
import os
import plotly.io as pio

pio.templates.default = "plotly_white"
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")

np.set_printoptions(precision=4, suppress=True)
rng = np.random.default_rng(7)

print("Python", platform.python_version())
print("NumPy ", np.__version__)
try:
    import plotly

    print("Plotly", plotly.__version__)
except Exception:
    pass
try:
    import scipy

    print("SciPy ", scipy.__version__)
except Exception:
    pass


Python 3.12.9
NumPy  1.26.2
Plotly 6.5.2
SciPy  1.15.0


In [2]:
# Example data: fill volumes (ml). The label says 250ml.
mu0 = 250.0

# Synthetic sample: true mean slightly above 250, unknown variance
x = rng.normal(loc=252.0, scale=4.0, size=20)

print("n=", x.size)
print("mean=", float(x.mean()))
print("std=", float(x.std(ddof=1)))
x[:10]


n= 20
mean= 250.7343483866156
std= 3.124737477155004


array([252.0049, 253.195 , 250.9034, 248.4376, 250.1813, 248.0334,
       252.2406, 257.3609, 250.0312, 249.5181])

In [3]:
# Always visualize the sample (outliers + skewness matter)
fig = go.Figure()
fig.add_trace(
    go.Violin(
        y=x,
        box_visible=True,
        meanline_visible=True,
        points="all",
        jitter=0.2,
        name="sample",
    )
)
fig.add_shape(
    type="line",
    x0=0,
    x1=1,
    xref="paper",
    y0=mu0,
    y1=mu0,
    line=dict(color="rgba(214, 39, 40, 1)", dash="dash", width=2),
)
fig.add_annotation(
    x=0.98,
    y=mu0,
    xref="paper",
    text="μ0",
    showarrow=False,
    yshift=10,
    font=dict(color="rgba(214, 39, 40, 1)"),
)
fig.update_layout(title="Sample vs reference μ0", yaxis_title="measurement (ml)")
fig.show()


In [4]:
def student_t_pdf(x: np.ndarray, df: int) -> np.ndarray:
    """Student t PDF computed from the definition (NumPy + standard library only)."""
    x = np.asarray(x, dtype=float)
    df = int(df)
    if df <= 0:
        raise ValueError("df must be a positive integer")

    log_norm = math.lgamma((df + 1) / 2) - (
        0.5 * math.log(df * math.pi) + math.lgamma(df / 2)
    )
    return np.exp(log_norm) * (1 + (x**2) / df) ** (-(df + 1) / 2)


def normal_pdf(x: np.ndarray) -> np.ndarray:
    x = np.asarray(x, dtype=float)
    return (1 / np.sqrt(2 * np.pi)) * np.exp(-0.5 * x**2)


def ttest_1samp_numpy(
    x: np.ndarray,
    mu0: float,
    *,
    alternative: str = "two-sided",
    alpha: float = 0.05,
    n_mc: int = 300_000,
    seed: int = 123,
) -> dict:
    """One-sample t-test with a NumPy-only Monte Carlo p-value.

    Parameters
    - x: sample (1D array)
    - mu0: null mean
    - alternative: 'two-sided', 'greater', 'less'
    - alpha: significance level for CI and decision
    - n_mc: Monte Carlo sample size for approximating p-value and t critical values
    """
    x = np.asarray(x, dtype=float)
    x = x[~np.isnan(x)]

    n = int(x.size)
    if n < 2:
        raise ValueError("Need at least 2 non-NaN observations.")

    df = n - 1
    mean = float(x.mean())
    s = float(x.std(ddof=1))
    se = s / np.sqrt(n) if s > 0 else 0.0

    if se == 0.0:
        t_stat = float(np.inf * np.sign(mean - mu0) if mean != mu0 else 0.0)
        p_value = float(0.0 if mean != mu0 else 1.0)
        t_crit = float("nan")
        ci = (mean, mean)
        cohen_d = float(np.inf * np.sign(mean - mu0) if mean != mu0 else 0.0)
        decision = "reject H0" if p_value <= alpha else "fail to reject H0"
        return {
            "n": n,
            "df": df,
            "mu0": float(mu0),
            "mean": mean,
            "std": s,
            "se": se,
            "t_stat": t_stat,
            "p_value": p_value,
            "alpha": float(alpha),
            "alternative": alternative,
            "t_crit": t_crit,
            "ci": (float(ci[0]), float(ci[1])),
            "cohen_d": cohen_d,
            "decision": decision,
            "mc_samples": int(n_mc),
            "mc_seed": int(seed),
        }

    t_stat = float((mean - mu0) / se)
    cohen_d = float((mean - mu0) / s)

    rng_local = np.random.default_rng(seed)
    t_null = rng_local.standard_t(df, size=int(n_mc))

    if alternative == "two-sided":
        p_value = float(np.mean(np.abs(t_null) >= abs(t_stat)))
        t_crit = float(np.quantile(t_null, 1 - alpha / 2))
        ci = (mean - t_crit * se, mean + t_crit * se)
    elif alternative == "greater":
        p_value = float(np.mean(t_null >= t_stat))
        t_crit = float(np.quantile(t_null, 1 - alpha))
        ci = (mean - t_crit * se, np.inf)
    elif alternative == "less":
        p_value = float(np.mean(t_null <= t_stat))
        t_crit = float(np.quantile(t_null, 1 - alpha))
        ci = (-np.inf, mean + t_crit * se)
    else:
        raise ValueError("alternative must be 'two-sided', 'greater', or 'less'")

    decision = "reject H0" if p_value <= alpha else "fail to reject H0"

    return {
        "n": n,
        "df": df,
        "mu0": float(mu0),
        "mean": mean,
        "std": s,
        "se": float(se),
        "t_stat": t_stat,
        "p_value": p_value,
        "alpha": float(alpha),
        "alternative": alternative,
        "t_crit": t_crit,
        "ci": (float(ci[0]), float(ci[1])),
        "cohen_d": cohen_d,
        "decision": decision,
        "mc_samples": int(n_mc),
        "mc_seed": int(seed),
    }


In [5]:
res = ttest_1samp_numpy(x, mu0, alternative="two-sided", alpha=0.05, n_mc=500_000, seed=42)

print(f"n={res['n']}, mean={res['mean']:.3f}, std={res['std']:.3f}, SE={res['se']:.3f}")
print(f"t={res['t_stat']:.3f} (df={res['df']}), p≈{res['p_value']:.4f}, alpha={res['alpha']}")
print(f"95% CI for μ: [{res['ci'][0]:.3f}, {res['ci'][1]:.3f}]")
print(f"Cohen's d (one-sample): {res['cohen_d']:.3f}")
print("Decision:", res["decision"])


n=20, mean=250.734, std=3.125, SE=0.699
t=1.051 (df=19), p≈0.3066, alpha=0.05
95% CI for μ: [249.270, 252.199]
Cohen's d (one-sample): 0.235
Decision: fail to reject H0


In [6]:
# Optional validation against SciPy (production-grade distribution functions)
try:
    from scipy.stats import ttest_1samp

    scipy_res = ttest_1samp(x, popmean=mu0, alternative="two-sided")
    print("SciPy t:", float(scipy_res.statistic))
    print("SciPy p:", float(scipy_res.pvalue))
except Exception as e:
    print("SciPy check skipped:", e)


SciPy t: 1.0510021553137605
SciPy p: 0.30644215697136534


In [7]:
t_obs = res["t_stat"]
df = res["df"]
tcrit = res["t_crit"]

xmax = max(6.0, abs(t_obs) + 1.0, abs(tcrit) + 1.0)
xx = np.linspace(-xmax, xmax, 3001)
yy = student_t_pdf(xx, df)

abs_t = abs(t_obs)
mask_left = xx <= -abs_t
mask_right = xx >= abs_t

fig = go.Figure()
fig.add_trace(go.Scatter(x=xx, y=yy, mode="lines", name=f"t pdf (df={df})"))
fig.add_trace(
    go.Scatter(
        x=xx[mask_left],
        y=yy[mask_left],
        mode="lines",
        line=dict(width=0),
        fill="tozeroy",
        name="p-value tail",
        showlegend=False,
        fillcolor="rgba(214, 39, 40, 0.35)",
    )
)
fig.add_trace(
    go.Scatter(
        x=xx[mask_right],
        y=yy[mask_right],
        mode="lines",
        line=dict(width=0),
        fill="tozeroy",
        showlegend=False,
        fillcolor="rgba(214, 39, 40, 0.35)",
    )
)

ymax = float(yy.max())
for xline, dash, color, width in [
    (t_obs, "dash", "rgba(214, 39, 40, 1)", 2),
    (tcrit, "dot", "rgba(0, 0, 0, 0.6)", 1),
    (-tcrit, "dot", "rgba(0, 0, 0, 0.6)", 1),
]:
    fig.add_shape(
        type="line",
        x0=xline,
        x1=xline,
        y0=0,
        y1=ymax,
        xref="x",
        yref="y",
        line=dict(color=color, dash=dash, width=width),
    )

fig.update_layout(
    title=f"Two-sided p-value as tail area (t={t_obs:.3f}, p≈{res['p_value']:.4f})",
    xaxis_title="t",
    yaxis_title="density",
)
fig.show()


In [8]:
dfs = [1, 2, 5, 10, 30, 100]
xx = np.linspace(-5, 5, 2001)

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=xx,
        y=normal_pdf(xx),
        mode="lines",
        name="Normal(0,1)",
        line=dict(color="black", dash="dash"),
    )
)

for i, df_ in enumerate(dfs):
    fig.add_trace(
        go.Scatter(
            x=xx,
            y=student_t_pdf(xx, df_),
            mode="lines",
            name=f"t(df={df_})",
            visible=(i == 0),
        )
    )

steps = []
for i, df_ in enumerate(dfs):
    visible = [True] + [False] * len(dfs)
    visible[1 + i] = True
    steps.append(
        dict(
            method="update",
            args=[{"visible": visible}, {"title": f"Student t vs Normal — df={df_}"}],
            label=str(df_),
        )
    )

fig.update_layout(
    title=f"Student t vs Normal — df={dfs[0]}",
    xaxis_title="x",
    yaxis_title="density",
    sliders=[
        dict(
            active=0,
            currentvalue={"prefix": "df: "},
            pad={"t": 30},
            steps=steps,
        )
    ],
)
fig.show()


In [9]:
rng_sim = np.random.default_rng(123)

B = 50_000
n = res["n"]
df = n - 1

mu0_sim = 0.0
x0 = rng_sim.normal(loc=mu0_sim, scale=1.0, size=(B, n))
t_stats = (x0.mean(axis=1) - mu0_sim) / (x0.std(axis=1, ddof=1) / np.sqrt(n))

xx = np.linspace(-6, 6, 2001)
yy = student_t_pdf(xx, df)

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=t_stats,
        nbinsx=80,
        histnorm="probability density",
        name="Simulated t-stat",
        opacity=0.65,
    )
)
fig.add_trace(
    go.Scatter(x=xx, y=yy, mode="lines", name=f"t pdf (df={df})", line=dict(width=3))
)

fig.update_layout(
    barmode="overlay",
    title=f"t-statistic under H0 matches t(df={df})",
    xaxis_title="t",
    yaxis_title="density",
)
fig.show()


In [10]:
alpha = 0.05

# Generic setup for power: mu0=0, sigma=1 so δ is an effect size in "sigma units"
mu0_power = 0.0
sigma_power = 1.0
deltas = [0.2, 0.5, 0.8]

n_grid = np.array([5, 8, 12, 20, 30, 40, 60, 80, 100])

B_power = 12_000
B_crit = 120_000

rng_power = np.random.default_rng(202)

power = {d: [] for d in deltas}

for n in n_grid:
    df = n - 1
    t_null = rng_power.standard_t(df, size=B_crit)
    tcrit = float(np.quantile(t_null, 1 - alpha / 2))

    for d in deltas:
        x_alt = rng_power.normal(
            loc=mu0_power + d * sigma_power,
            scale=sigma_power,
            size=(B_power, n),
        )
        t_alt = (x_alt.mean(axis=1) - mu0_power) / (
            x_alt.std(axis=1, ddof=1) / np.sqrt(n)
        )
        power[d].append(float(np.mean(np.abs(t_alt) >= tcrit)))

fig = go.Figure()
for d in deltas:
    fig.add_trace(
        go.Scatter(
            x=n_grid,
            y=power[d],
            mode="lines+markers",
            name=f"δ={d}σ",
        )
    )

fig.update_layout(
    title="Estimated power vs sample size (two-sided t-test, α=0.05)",
    xaxis_title="n",
    yaxis_title="power",
    yaxis=dict(range=[0, 1]),
)
fig.show()
