In [1]:
import numpy as np
import plotly.graph_objects as go
import os
import plotly.io as pio
from plotly.subplots import make_subplots

pio.templates.default = "plotly_white"
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")

np.set_printoptions(precision=4, suppress=True)

SEED_DATA = 42
SEED_BOOTSTRAP = 123

rng_data = np.random.default_rng(SEED_DATA)


In [2]:
sample = rng_data.exponential(scale=1.0, size=40)
observed_mean = float(np.mean(sample))

n_boot_demo = 5_000
rng_demo = np.random.default_rng(SEED_BOOTSTRAP)

idx = rng_demo.integers(0, sample.size, size=(n_boot_demo, sample.size))
bootstrap_means = sample[idx].mean(axis=1)

fig = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=("Observed sample", "Bootstrap distribution of the mean"),
)

fig.add_trace(go.Histogram(x=sample, nbinsx=30, name="sample"), row=1, col=1)
fig.add_trace(go.Histogram(x=bootstrap_means, nbinsx=40, name="boot means"), row=1, col=2)

fig.add_vline(
    x=observed_mean,
    line_width=3,
    line_color="crimson",
    annotation_text=f"observed mean = {observed_mean:.3f}",
    annotation_position="top",
    row=1,
    col=2,
)

fig.update_layout(
    height=380,
    width=950,
    showlegend=False,
    title_text="Bootstrap intuition: resample → recompute → distribution",
)
fig.update_xaxes(title_text="value", row=1, col=1)
fig.update_xaxes(title_text="bootstrap mean", row=1, col=2)
fig.update_yaxes(title_text="count", row=1, col=1)
fig.update_yaxes(title_text="count", row=1, col=2)
fig.show()


In [3]:
def bootstrap_diff_in_means(sample_a, sample_b, *, n_boot=20_000, rng):
    """Bootstrap distribution of mean(sample_b) - mean(sample_a)."""
    sample_a = np.asarray(sample_a, dtype=float)
    sample_b = np.asarray(sample_b, dtype=float)

    n_a = sample_a.size
    n_b = sample_b.size

    idx_a = rng.integers(0, n_a, size=(n_boot, n_a))
    idx_b = rng.integers(0, n_b, size=(n_boot, n_b))

    boot_a = sample_a[idx_a].mean(axis=1)
    boot_b = sample_b[idx_b].mean(axis=1)

    return boot_b - boot_a


def bootstrap_null_diff_in_means(sample_a, sample_b, *, delta0=0.0, n_boot=20_000, rng):
    """Null bootstrap distribution for H0: mean(B) - mean(A) = delta0.

    Strategy: shift both groups so their means satisfy H0, then resample.
    """
    sample_a = np.asarray(sample_a, dtype=float)
    sample_b = np.asarray(sample_b, dtype=float)

    n_a = sample_a.size
    n_b = sample_b.size

    pooled = np.concatenate([sample_a, sample_b])
    pooled_mean = float(np.mean(pooled))

    # Pick (mu_a0, mu_b0) that preserve pooled mean and enforce mu_b0 - mu_a0 = delta0.
    total = n_a + n_b
    mu_a0 = pooled_mean - (n_b / total) * delta0
    mu_b0 = pooled_mean + (n_a / total) * delta0

    sample_a_null = sample_a - np.mean(sample_a) + mu_a0
    sample_b_null = sample_b - np.mean(sample_b) + mu_b0

    idx_a = rng.integers(0, n_a, size=(n_boot, n_a))
    idx_b = rng.integers(0, n_b, size=(n_boot, n_b))

    boot_a = sample_a_null[idx_a].mean(axis=1)
    boot_b = sample_b_null[idx_b].mean(axis=1)

    return boot_b - boot_a


def bootstrap_p_value(null_stats, observed_stat, *, delta0=0.0, alternative="two-sided"):
    """Compute a bootstrap p-value given a null distribution of the statistic."""
    null_stats = np.asarray(null_stats, dtype=float)
    observed_stat = float(observed_stat)

    if alternative not in {"two-sided", "greater", "less"}:
        raise ValueError("alternative must be one of: 'two-sided', 'greater', 'less'")

    if alternative == "two-sided":
        extreme = np.abs(null_stats - delta0) >= abs(observed_stat - delta0)
    elif alternative == "greater":
        extreme = null_stats >= observed_stat
    else:  # "less"
        extreme = null_stats <= observed_stat

    # +1 correction: avoids returning exactly 0 for finite B.
    return (int(np.sum(extreme)) + 1) / (null_stats.size + 1)


def bootstrap_ci_percentile(bootstrap_stats, *, conf_level=0.95):
    """Percentile bootstrap confidence interval."""
    bootstrap_stats = np.asarray(bootstrap_stats, dtype=float)
    alpha = 1.0 - float(conf_level)
    low, high = np.quantile(bootstrap_stats, [alpha / 2, 1 - alpha / 2])
    return float(low), float(high)


In [4]:
control = rng_data.exponential(scale=1.0, size=50)
treatment = rng_data.exponential(scale=1.0, size=45) + 0.35

observed_diff = float(np.mean(treatment) - np.mean(control))
observed_diff


0.31111429489836495

In [5]:
fig = go.Figure()
fig.add_trace(
    go.Violin(
        y=control,
        name="Control",
        box_visible=True,
        meanline_visible=True,
        points="all",
        jitter=0.25,
    )
)
fig.add_trace(
    go.Violin(
        y=treatment,
        name="Treatment",
        box_visible=True,
        meanline_visible=True,
        points="all",
        jitter=0.25,
    )
)

fig.update_layout(
    title="Raw data (skewed): control vs treatment",
    yaxis_title="metric value",
    height=420,
    width=750,
)
fig.show()


In [6]:
n_boot = 30_000

null_stats = bootstrap_null_diff_in_means(
    control,
    treatment,
    delta0=0.0,
    n_boot=n_boot,
    rng=np.random.default_rng(SEED_BOOTSTRAP),
)

p_two_sided = bootstrap_p_value(null_stats, observed_diff, alternative="two-sided")
p_greater = bootstrap_p_value(null_stats, observed_diff, alternative="greater")

print(f"Observed mean difference (treatment - control): {observed_diff:.4f}")
print(f"Bootstrap p-value (two-sided): {p_two_sided:.4f}")
print(f"Bootstrap p-value (greater, i.e. treatment > control): {p_greater:.4f}")


Observed mean difference (treatment - control): 0.3111
Bootstrap p-value (two-sided): 0.0867
Bootstrap p-value (greater, i.e. treatment > control): 0.0404


In [7]:
extreme = np.abs(null_stats) >= abs(observed_diff)

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=null_stats[~extreme],
        nbinsx=70,
        name="null distribution",
        marker_color="#4C78A8",
        opacity=0.85,
    )
)
fig.add_trace(
    go.Histogram(
        x=null_stats[extreme],
        nbinsx=70,
        name="as/extreme as observed",
        marker_color="#E45756",
        opacity=0.95,
    )
)

fig.add_vline(
    x=observed_diff,
    line_width=3,
    line_color="crimson",
    annotation_text=f"observed = {observed_diff:.3f}",
    annotation_position="top",
)

fig.update_layout(
    barmode="overlay",
    title=f"Null distribution via bootstrap (two-sided p ≈ {p_two_sided:.4f})",
    xaxis_title="mean difference under H0",
    yaxis_title="count",
    height=430,
    width=900,
)
fig.show()


In [8]:
boot_stats = bootstrap_diff_in_means(
    control,
    treatment,
    n_boot=n_boot,
    rng=np.random.default_rng(SEED_BOOTSTRAP),
)

ci_low, ci_high = bootstrap_ci_percentile(boot_stats, conf_level=0.95)
boot_se = float(np.std(boot_stats, ddof=1))

print(f"Observed mean difference: {observed_diff:.4f}")
print(f"Bootstrap SE (approx): {boot_se:.4f}")
print(f"95% percentile bootstrap CI: [{ci_low:.4f}, {ci_high:.4f}]")


Observed mean difference: 0.3111
Bootstrap SE (approx): 0.1816
95% percentile bootstrap CI: [-0.0519, 0.6593]


In [9]:
fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=boot_stats,
        nbinsx=70,
        name="bootstrap effect distribution",
        marker_color="#4C78A8",
        opacity=0.9,
    )
)

fig.add_vline(
    x=0.0,
    line_width=2,
    line_color="black",
    line_dash="dash",
    annotation_text="null (0)",
    annotation_position="top",
)
fig.add_vline(
    x=observed_diff,
    line_width=3,
    line_color="crimson",
    annotation_text=f"observed = {observed_diff:.3f}",
    annotation_position="top",
)
fig.add_vline(
    x=ci_low,
    line_width=3,
    line_color="#72B7B2",
    annotation_text=f"2.5% = {ci_low:.3f}",
    annotation_position="bottom",
)
fig.add_vline(
    x=ci_high,
    line_width=3,
    line_color="#72B7B2",
    annotation_text=f"97.5% = {ci_high:.3f}",
    annotation_position="bottom",
)

fig.update_layout(
    title="Standard bootstrap distribution for the effect + 95% CI",
    xaxis_title="mean difference (treatment - control)",
    yaxis_title="count",
    height=430,
    width=900,
)
fig.show()
