In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
import plotly.io as pio

from scipy import special, stats

pio.templates.default = "plotly_white"
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")

np.set_printoptions(precision=6, suppress=True)
rng = np.random.default_rng(42)


In [2]:
def _check_ab(a: float, b: float) -> None:
    if not (np.isfinite(a) and np.isfinite(b) and a < b):
        raise ValueError("Require finite endpoints with a < b.")


def arcsine_pdf(x, a: float = 0.0, b: float = 1.0):
    '''Arcsine pdf on [a,b]. Vectorized over x.'''
    _check_ab(a, b)
    x = np.asarray(x, dtype=float)

    out = np.zeros_like(x, dtype=float)
    interior = (x > a) & (x < b)
    boundary = (x == a) | (x == b)

    out[boundary] = np.inf
    out[interior] = 1.0 / (np.pi * np.sqrt((x[interior] - a) * (b - x[interior])))
    return out


def arcsine_logpdf(x, a: float = 0.0, b: float = 1.0):
    '''Numerically friendlier log-pdf (still +inf at boundaries).'''
    _check_ab(a, b)
    x = np.asarray(x, dtype=float)

    out = np.full_like(x, -np.inf, dtype=float)
    interior = (x > a) & (x < b)
    boundary = (x == a) | (x == b)

    out[boundary] = np.inf
    out[interior] = (
        -np.log(np.pi)
        - 0.5 * np.log(x[interior] - a)
        - 0.5 * np.log(b - x[interior])
    )
    return out


def arcsine_cdf(x, a: float = 0.0, b: float = 1.0):
    '''Arcsine CDF on [a,b].'''
    _check_ab(a, b)
    x = np.asarray(x, dtype=float)

    z = (x - a) / (b - a)
    z = np.clip(z, 0.0, 1.0)  # protects sqrt/arcsin from tiny floating-point drift

    out = (2.0 / np.pi) * np.arcsin(np.sqrt(z))
    out = np.where(x <= a, 0.0, out)
    out = np.where(x >= b, 1.0, out)
    return out


def arcsine_ppf(u, a: float = 0.0, b: float = 1.0):
    '''Inverse CDF (percent point function).'''
    _check_ab(a, b)
    u = np.asarray(u, dtype=float)
    if np.any((u < 0.0) | (u > 1.0)):
        raise ValueError("u must be in [0,1].")
    return a + (b - a) * np.sin(0.5 * np.pi * u) ** 2


def arcsine_rvs(size=None, a: float = 0.0, b: float = 1.0, rng: np.random.Generator | None = None):
    '''NumPy-only sampling via inverse transform.'''
    _check_ab(a, b)
    if rng is None:
        rng = np.random.default_rng()
    u = rng.random(size=size)
    return arcsine_ppf(u, a=a, b=b)


# Quick sanity check against SciPy on [0,1]
x_grid = np.linspace(1e-6, 1 - 1e-6, 5)
np.c_[x_grid, arcsine_pdf(x_grid), stats.arcsine.pdf(x_grid)]


array([[  0.000001, 318.310045, 318.310045],
       [  0.25    ,   0.735105,   0.735105],
       [  0.5     ,   0.63662 ,   0.63662 ],
       [  0.75    ,   0.735105,   0.735105],
       [  0.999999, 318.310045, 318.310045]])

In [3]:
def arcsine_mean(a: float = 0.0, b: float = 1.0) -> float:
    _check_ab(a, b)
    return 0.5 * (a + b)


def arcsine_var(a: float = 0.0, b: float = 1.0) -> float:
    _check_ab(a, b)
    return (b - a) ** 2 / 8.0


def arcsine_mgf(t, a: float = 0.0, b: float = 1.0):
    '''MGF using modified Bessel I0.'''
    _check_ab(a, b)
    t = np.asarray(t, dtype=float)
    return np.exp(t * 0.5 * (a + b)) * special.i0(t * 0.5 * (b - a))


def arcsine_cf(t, a: float = 0.0, b: float = 1.0):
    '''Characteristic function using Bessel J0.'''
    _check_ab(a, b)
    t = np.asarray(t, dtype=float)
    return np.exp(1j * t * 0.5 * (a + b)) * special.j0(t * 0.5 * (b - a))


def arcsine_entropy(a: float = 0.0, b: float = 1.0) -> float:
    _check_ab(a, b)
    return float(np.log((b - a) * np.pi / 4.0))


# Monte Carlo check on [0,1]
n_mc = 200_000
x_mc = arcsine_rvs(n_mc, rng=rng)

print("Monte Carlo mean:", x_mc.mean(), "| theory:", arcsine_mean())
print("Monte Carlo var :", x_mc.var(), "| theory:", arcsine_var())
print("Entropy theory  :", arcsine_entropy())


Monte Carlo mean: 0.4999078439779247 | theory: 0.5
Monte Carlo var : 0.12498062340322307 | theory: 0.125
Entropy theory  : -0.2415644752704905


In [4]:
intervals = [(0.0, 1.0), (-1.0, 1.0), (2.0, 5.0)]

fig = go.Figure()

for a, b in intervals:
    x = np.linspace(a + 1e-4 * (b - a), b - 1e-4 * (b - a), 800)
    fig.add_trace(
        go.Scatter(
            x=x,
            y=arcsine_pdf(x, a=a, b=b),
            name=f"pdf on [{a:g}, {b:g}]",
        )
    )

fig.update_layout(
    title="Arcsine pdf for different endpoints (note the endpoint spikes)",
    xaxis_title="x",
    yaxis_title="density",
)
fig.show()


In [5]:
# Sampling demo
a, b = 0.0, 1.0
x = arcsine_rvs(10, a=a, b=b, rng=rng)
x


array([0.35102 , 0.333278, 0.957626, 0.620713, 0.864426, 0.331615,
       0.148247, 0.814071, 0.448362, 0.69346 ])

In [6]:
# PDF and CDF on [0,1]
eps = 1e-4
x = np.linspace(eps, 1 - eps, 1000)

fig_pdf = go.Figure()
fig_pdf.add_trace(go.Scatter(x=x, y=arcsine_pdf(x), name="arcsine pdf"))
fig_pdf.add_trace(go.Scatter(x=x, y=np.ones_like(x), name="uniform(0,1) pdf", line=dict(dash="dash")))
fig_pdf.update_layout(title="PDF on [0,1] (arcsine vs uniform)", xaxis_title="x", yaxis_title="density")
fig_pdf.show()

fig_cdf = go.Figure()
fig_cdf.add_trace(go.Scatter(x=x, y=arcsine_cdf(x), name="arcsine cdf"))
fig_cdf.add_trace(go.Scatter(x=x, y=x, name="uniform(0,1) cdf", line=dict(dash="dash")))
fig_cdf.update_layout(title="CDF on [0,1] (arcsine vs uniform)", xaxis_title="x", yaxis_title="F(x)")
fig_cdf.show()


In [7]:
# Monte Carlo samples vs pdf
n = 60_000
samples = arcsine_rvs(n, rng=rng)

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=samples,
        nbinsx=80,
        histnorm="probability density",
        name="samples (hist)",
        opacity=0.6,
    )
)
fig.add_trace(go.Scatter(x=x, y=arcsine_pdf(x), name="theoretical pdf", line=dict(color="black")))
fig.update_layout(
    title="Monte Carlo histogram with theoretical pdf overlay",
    xaxis_title="x",
    yaxis_title="density",
    barmode="overlay",
)
fig.show()

print("sample mean/var:", samples.mean(), samples.var())


sample mean/var: 0.5012310506354244 0.12544470668500474


In [8]:
# SciPy: pdf/cdf/rvs
x = np.linspace(1e-4, 1 - 1e-4, 1000)

pdf_scipy = stats.arcsine.pdf(x)
cdf_scipy = stats.arcsine.cdf(x)

# Identity: arcsine == Beta(1/2, 1/2)
pdf_beta = stats.beta(a=0.5, b=0.5).pdf(x)

print("max |pdf_scipy - pdf_beta|:", np.max(np.abs(pdf_scipy - pdf_beta)))

# Sampling
s_scipy = stats.arcsine.rvs(size=5, random_state=rng)
s_numpy = arcsine_rvs(5, rng=rng)
print("SciPy rvs:", s_scipy)
print("NumPy rvs:", s_numpy)

# Fitting loc/scale (note: likelihood is tricky near the endpoints)
data = arcsine_rvs(2_000, a=2.0, b=5.0, rng=rng)
loc_hat, scale_hat = stats.arcsine.fit(data)
print("fit loc, scale:", loc_hat, scale_hat)
print("true loc, scale:", 2.0, 3.0)


max |pdf_scipy - pdf_beta|: 3.552713678800501e-15
SciPy rvs: [0.425294 0.712055 0.864634 0.234072 0.38168 ]
NumPy rvs: [0.638791 0.463073 0.730777 0.226557 0.552068]
fit loc, scale: 2.0000041654452665 3.00059414578592
true loc, scale: 2.0 3.0


In [9]:
n_paths = 4000
n_steps = 600

steps = rng.choice([-1, 1], size=(n_paths, n_steps))
paths = np.cumsum(steps, axis=1)

frac_positive = (paths > 0).mean(axis=1)

ks = stats.kstest(frac_positive, stats.arcsine.cdf)
print("KS statistic:", ks.statistic)
print("KS p-value   :", ks.pvalue)

x = np.linspace(1e-4, 1 - 1e-4, 800)

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=frac_positive,
        nbinsx=60,
        histnorm="probability density",
        name="random-walk fractions",
        opacity=0.6,
    )
)
fig.add_trace(go.Scatter(x=x, y=stats.arcsine.pdf(x), name="arcsine pdf", line=dict(color="black")))
fig.update_layout(
    title="Fraction of time positive in a symmetric random walk (simulation)",
    xaxis_title="fraction of steps with S_t > 0",
    yaxis_title="density",
    barmode="overlay",
)
fig.show()


KS statistic: 0.02575
KS p-value   : 0.009764424160149908


In [10]:
n, k = 20, 3

prior_jeffreys = stats.beta(0.5, 0.5)
prior_uniform = stats.beta(1.0, 1.0)

post_jeffreys = stats.beta(k + 0.5, n - k + 0.5)
post_uniform = stats.beta(k + 1.0, n - k + 1.0)

x = np.linspace(1e-4, 1 - 1e-4, 1000)

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=prior_jeffreys.pdf(x), name="Jeffreys prior Beta(1/2,1/2)", line=dict(dash="dash")))
fig.add_trace(go.Scatter(x=x, y=prior_uniform.pdf(x), name="Uniform prior Beta(1,1)", line=dict(dash="dash")))
fig.add_trace(go.Scatter(x=x, y=post_jeffreys.pdf(x), name=f"Posterior (Jeffreys), k={k}, n={n}"))
fig.add_trace(go.Scatter(x=x, y=post_uniform.pdf(x), name=f"Posterior (uniform), k={k}, n={n}"))

ci_low, ci_high = post_jeffreys.ppf([0.025, 0.975])
fig.add_vline(x=ci_low, line=dict(color="black", dash="dot"))
fig.add_vline(x=ci_high, line=dict(color="black", dash="dot"))

fig.update_layout(
    title="Jeffreys (arcsine) prior and resulting posterior for a Binomial proportion",
    xaxis_title="p",
    yaxis_title="density",
)
fig.show()

print("Jeffreys posterior mean:", post_jeffreys.mean())
print("Jeffreys 95% credible interval:", (ci_low, ci_high))


Jeffreys posterior mean: 0.16666666666666666
Jeffreys 95% credible interval: (0.04413134197515587, 0.348577710858454)


In [11]:
n = 50
m = 30_000

p_arcsine = stats.arcsine.rvs(size=m, random_state=rng)
p_uniform = rng.random(size=m)

k_arcsine = rng.binomial(n, p_arcsine)
k_uniform = rng.binomial(n, p_uniform)

fig = go.Figure()
fig.add_trace(go.Histogram(x=k_arcsine, histnorm="probability", name="p ~ arcsine", opacity=0.6, nbinsx=n + 1))
fig.add_trace(go.Histogram(x=k_uniform, histnorm="probability", name="p ~ uniform", opacity=0.6, nbinsx=n + 1))
fig.update_layout(
    title=f"Counts from Binomial(n={n}, p) with different priors over p",
    xaxis_title="k (number of successes)",
    yaxis_title="probability",
    barmode="overlay",
)
fig.show()

print("P(k=0)  arcsine vs uniform:", np.mean(k_arcsine == 0), np.mean(k_uniform == 0))
print("P(k=n)  arcsine vs uniform:", np.mean(k_arcsine == n), np.mean(k_uniform == n))


P(k=0)  arcsine vs uniform: 0.08146666666666667 0.0201
P(k=n)  arcsine vs uniform: 0.07906666666666666 0.020866666666666665


In [12]:
# Numerical illustration: pdf spikes and logpdf is usually safer
x_test = np.array([0.0, 1e-12, 0.5, 1 - 1e-12, 1.0])
print("x:", x_test)
print("pdf:", stats.arcsine.pdf(x_test))
print("logpdf:", stats.arcsine.logpdf(x_test))


x: [0.  0.  0.5 1.  1. ]
pdf: [          inf 318309.886184      0.63662  318313.407023           inf]
logpdf: [      inf 12.670781 -0.451583 12.670792       inf]
