In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
import plotly.io as pio

from scipy import stats, special

# Plotly rendering (CKC convention)
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")

# Reproducibility
rng = np.random.default_rng(7)

np.set_printoptions(precision=4, suppress=True)


In [2]:
def beta_pdf(x: np.ndarray, alpha: float, beta: float) -> np.ndarray:
    '''Numerically stable beta PDF via log-space (uses SciPy special functions).'''
    x = np.asarray(x, dtype=float)
    log_pdf = (
        (alpha - 1) * np.log(x)
        + (beta - 1) * np.log1p(-x)
        - special.betaln(alpha, beta)
    )
    return np.exp(log_pdf)


def beta_cdf(x: np.ndarray, alpha: float, beta: float) -> np.ndarray:
    '''Beta CDF via regularized incomplete beta I_x(alpha, beta).'''
    x = np.asarray(x, dtype=float)
    return special.betainc(alpha, beta, x)


# Quick sanity check: PDF integrates to ~1
alpha0, beta0 = 2.0, 5.0
xgrid = np.linspace(1e-6, 1 - 1e-6, 200_000)
area = np.trapz(beta_pdf(xgrid, alpha0, beta0), xgrid)
area


0.9999999999225003

In [3]:
def beta_moments(alpha: float, beta: float) -> dict:
    a, b = float(alpha), float(beta)
    mean = a / (a + b)
    var = a * b / ((a + b) ** 2 * (a + b + 1))
    skew = 2 * (b - a) * np.sqrt(a + b + 1) / ((a + b + 2) * np.sqrt(a * b))
    excess_kurt = (
        6
        * (((a - b) ** 2) * (a + b + 1) - a * b * (a + b + 2))
        / (a * b * (a + b + 2) * (a + b + 3))
    )
    mode = np.nan
    if a > 1 and b > 1:
        mode = (a - 1) / (a + b - 2)

    mgf = lambda t: special.hyp1f1(a, a + b, t)

    entropy = (
        special.betaln(a, b)
        - (a - 1) * special.digamma(a)
        - (b - 1) * special.digamma(b)
        + (a + b - 2) * special.digamma(a + b)
    )

    return {
        "mean": mean,
        "var": var,
        "skew": skew,
        "excess_kurtosis": excess_kurt,
        "mode": mode,
        "entropy": entropy,
        "mgf": mgf,
    }


m = beta_moments(alpha0, beta0)
{k: v for k, v in m.items() if k != "mgf"}


{'mean': 0.2857142857142857,
 'var': 0.025510204081632654,
 'skew': 0.5962847939999439,
 'excess_kurtosis': -0.12,
 'mode': 0.2,
 'entropy': -0.48453071499548805}

In [4]:
# Monte Carlo check of mean/variance + MGF at a few t
n = 200_000
samples_scipy = stats.beta(alpha0, beta0).rvs(size=n, random_state=rng)

mc_mean = samples_scipy.mean()
mc_var = samples_scipy.var(ddof=0)

mc_mgf_1 = np.mean(np.exp(1.0 * samples_scipy))
mc_mgf_m1 = np.mean(np.exp(-1.0 * samples_scipy))

(
    m["mean"],
    mc_mean,
    m["var"],
    mc_var,
    m["mgf"](1.0),
    mc_mgf_1,
    m["mgf"](-1.0),
    mc_mgf_m1,
)


(0.2857142857142857,
 0.2850010733170977,
 0.025510204081632654,
 0.02540504391457564,
 1.3483340379497217,
 1.3473002240829173,
 0.7608141393691706,
 0.7613179409507954)

In [5]:
x = np.linspace(1e-4, 1 - 1e-4, 600)

param_sets = [
    (0.5, 0.5, "U-shaped (0.5,0.5)"),
    (1.0, 1.0, "Uniform (1,1)"),
    (2.0, 2.0, "Symmetric peak (2,2)"),
    (2.0, 5.0, "Skewed right (2,5)"),
    (5.0, 2.0, "Skewed left (5,2)"),
    (8.0, 1.5, "Mass near 1 (8,1.5)"),
]

fig = go.Figure()
for a, b, label in param_sets:
    fig.add_trace(go.Scatter(x=x, y=beta_pdf(x, a, b), mode="lines", name=label))

fig.update_layout(
    title="Beta PDF for different (α, β)",
    xaxis_title="x",
    yaxis_title="density",
    width=900,
    height=450,
)
fig


In [6]:
# Same mean, different concentration κ
m_fixed = 0.3
kappas = [2, 5, 20, 100]

fig = go.Figure()
for kappa in kappas:
    a = kappa * m_fixed
    b = kappa * (1 - m_fixed)
    fig.add_trace(go.Scatter(x=x, y=beta_pdf(x, a, b), mode="lines", name=f"κ={kappa}"))

fig.update_layout(
    title="Same mean (m=0.3), increasing concentration κ",
    xaxis_title="x",
    yaxis_title="density",
    width=900,
    height=420,
)
fig


In [7]:
def beta_loglikelihood(x: np.ndarray, alpha: float, beta: float) -> float:
    x = np.asarray(x, dtype=float)
    if np.any((x <= 0) | (x >= 1)):
        return -np.inf

    n = x.size
    return (
        -n * special.betaln(alpha, beta)
        + (alpha - 1) * np.sum(np.log(x))
        + (beta - 1) * np.sum(np.log1p(-x))
    )


# Example log-likelihood value
beta_loglikelihood(samples_scipy[:1000], alpha0, beta0)


493.07429490684376

In [8]:
def gamma_rvs_numpy(shape: float, size: int, rng: np.random.Generator) -> np.ndarray:
    '''Sample Gamma(shape, scale=1) using NumPy only (Marsaglia-Tsang).

    Parameters
    ----------
    shape:
        k > 0
    size:
        number of samples
    rng:
        NumPy Generator
    '''

    k = float(shape)
    if k <= 0:
        raise ValueError("shape must be > 0")

    # k < 1: boost to k+1 and apply power transform
    if k < 1:
        g = gamma_rvs_numpy(k + 1.0, size, rng)
        u = rng.random(size)
        return g * (u ** (1.0 / k))

    # k >= 1: Marsaglia-Tsang
    d = k - 1.0 / 3.0
    c = 1.0 / np.sqrt(9.0 * d)

    out = np.empty(size, dtype=float)
    filled = 0

    while filled < size:
        n = size - filled
        x = rng.standard_normal(n)
        v = (1.0 + c * x)
        v = v * v * v  # (1 + c x)^3
        u = rng.random(n)

        positive = v > 0

        # First (cheap) acceptance
        accept = positive & (u < 1.0 - 0.0331 * (x**4))

        # Second acceptance (log test) - compute log(v) only where v > 0 to avoid warnings
        log_v = np.zeros_like(v)
        log_v[positive] = np.log(v[positive])

        accept2 = positive & (~accept) & (
            np.log(u) < 0.5 * x * x + d * (1.0 - v + log_v)
        )

        accept = accept | accept2
        accepted = d * v[accept]

        take = min(accepted.size, n)
        out[filled : filled + take] = accepted[:take]
        filled += take

    return out


def beta_rvs_numpy(alpha: float, beta: float, size: int, rng: np.random.Generator) -> np.ndarray:
    '''Sample Beta(alpha, beta) using Gamma ratio with NumPy-only Gamma sampler.'''
    g1 = gamma_rvs_numpy(alpha, size, rng)
    g2 = gamma_rvs_numpy(beta, size, rng)
    return g1 / (g1 + g2)


# Monte Carlo validation against theory
n = 200_000
samples_numpy = beta_rvs_numpy(alpha0, beta0, n, rng)

np.mean(samples_numpy), np.var(samples_numpy), m["mean"], m["var"]


(0.2857694499926869,
 0.025682506385920862,
 0.2857142857142857,
 0.025510204081632654)

In [9]:
# Compare NumPy-only sampler to SciPy sampler (quick KS test)
ks = stats.ks_2samp(samples_numpy[:20_000], samples_scipy[:20_000])
ks


KstestResult(statistic=0.00824999999999998, pvalue=0.5014359645922872, statistic_location=0.2744678864589129, statistic_sign=-1)

In [10]:
# PDF + histogram (Monte Carlo)
x = np.linspace(1e-4, 1 - 1e-4, 800)

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=samples_numpy,
        nbinsx=60,
        histnorm="probability density",
        name="Monte Carlo (NumPy-only)",
        opacity=0.55,
    )
)
fig.add_trace(
    go.Scatter(
        x=x,
        y=stats.beta(alpha0, beta0).pdf(x),
        mode="lines",
        name="True PDF (SciPy)",
        line=dict(width=3),
    )
)

fig.update_layout(
    title=f"Beta({alpha0}, {beta0}): histogram vs PDF",
    xaxis_title="x",
    yaxis_title="density",
    width=900,
    height=420,
)
fig


In [11]:
# CDF: theoretical vs empirical
x = np.linspace(0, 1, 600)

emp_x = np.sort(samples_numpy)
emp_cdf = np.arange(1, emp_x.size + 1) / emp_x.size

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=stats.beta(alpha0, beta0).cdf(x), mode="lines", name="True CDF"))
fig.add_trace(
    go.Scatter(
        x=emp_x[::200],
        y=emp_cdf[::200],
        mode="markers",
        name="Empirical CDF (subsampled)",
        marker=dict(size=4, opacity=0.6),
    )
)

fig.update_layout(
    title=f"Beta({alpha0}, {beta0}): theoretical CDF vs empirical CDF",
    xaxis_title="x",
    yaxis_title="CDF",
    width=900,
    height=420,
)
fig


In [12]:
dist = stats.beta(alpha0, beta0)  # loc=0, scale=1 by default

x = np.linspace(0, 1, 6)

pdf = dist.pdf(x)
cdf = dist.cdf(x)
samples = dist.rvs(size=5, random_state=rng)

pdf, cdf, samples


(array([0.    , 2.4576, 1.5552, 0.4608, 0.0384, 0.    ]),
 array([0.    , 0.3446, 0.7667, 0.959 , 0.9984, 1.    ]),
 array([0.3445, 0.4805, 0.0498, 0.2403, 0.4281]))

In [13]:
# Fitting (MLE) with SciPy
# If you KNOW the data live on [0, 1], it's common to fix loc=0 and scale=1.

a_hat, b_hat, loc_hat, scale_hat = stats.beta.fit(samples_numpy[:10_000], floc=0, fscale=1)
a_hat, b_hat, loc_hat, scale_hat


(1.9849242586340088, 4.951405202339398, 0, 1)

In [14]:
# Example: Clopper–Pearson interval
n = 100
k = 37
alpha_level = 0.05

if k == 0:
    cp_low = 0.0
else:
    cp_low = stats.beta.ppf(alpha_level / 2, k, n - k + 1)

if k == n:
    cp_high = 1.0
else:
    cp_high = stats.beta.ppf(1 - alpha_level / 2, k + 1, n - k)

(cp_low, cp_high)


(0.2755665796145515, 0.47235164055168316)

In [15]:
# Example: Bayesian update for a Bernoulli probability
alpha_prior, beta_prior = 2.0, 2.0

alpha_post = alpha_prior + k
beta_post = beta_prior + (n - k)

prior = stats.beta(alpha_prior, beta_prior)
post = stats.beta(alpha_post, beta_post)

x = np.linspace(1e-4, 1 - 1e-4, 600)

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=prior.pdf(x), mode="lines", name=f"Prior Beta({alpha_prior:.0f},{beta_prior:.0f})"))
fig.add_trace(go.Scatter(x=x, y=post.pdf(x), mode="lines", name=f"Posterior Beta({alpha_post:.0f},{beta_post:.0f})", line=dict(width=3)))

fig.update_layout(
    title=f"Bayesian update (n={n}, k={k})",
    xaxis_title="p",
    yaxis_title="density",
    width=900,
    height=420,
)
fig


In [16]:
# Posterior probability of beating a threshold (Bayesian hypothesis-style query)
threshold = 0.4
post_prob = 1 - post.cdf(threshold)
post_mean = post.mean()
post_prob, post_mean


(0.29530027558732863, 0.375)