In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
import plotly.io as pio

import scipy
from scipy import special, stats

pio.templates.default = "plotly_white"
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")

np.set_printoptions(precision=6, suppress=True)
rng = np.random.default_rng(42)

print("numpy:", np.__version__)
print("scipy:", scipy.__version__)


numpy: 1.26.2
scipy: 1.15.0


In [2]:
def _check_c(c: float) -> float:
    c = float(c)
    if not (np.isfinite(c) and c > 0):
        raise ValueError("Require a finite shape parameter c > 0.")
    return c


def rdist_logpdf(x, c: float):
    '''Log-PDF of the canonical rdist(c) on [-1, 1]. Vectorized over x.'''
    c = _check_c(c)
    a = 0.5 * c

    x = np.asarray(x, dtype=float)
    out = np.full_like(x, -np.inf, dtype=float)

    inside = (x > -1.0) & (x < 1.0)
    xi = x[inside]

    # log f(x|c) = (a-1) * log(1-x^2) - log B(1/2, a)
    out[inside] = (a - 1.0) * np.log1p(-(xi * xi)) - special.betaln(0.5, a)

    # Values at exactly x=±1 do not affect probabilities; we special-case c=2 (uniform).
    edges = np.isclose(np.abs(x), 1.0)
    if np.any(edges) and np.isclose(c, 2.0):
        out[edges] = -np.log(2.0)

    return out


def rdist_pdf(x, c: float):
    return np.exp(rdist_logpdf(x, c))


def rdist_cdf(x, c: float):
    '''CDF of the canonical rdist(c) on [-1, 1]. Vectorized over x.'''
    c = _check_c(c)
    a = 0.5 * c

    x = np.asarray(x, dtype=float)
    y = np.clip((x + 1.0) / 2.0, 0.0, 1.0)
    out = special.betainc(a, a, y)

    out = np.where(x <= -1.0, 0.0, out)
    out = np.where(x >= 1.0, 1.0, out)

    return out


def rdist_rvs_numpy(c: float, size=1, rng=None):
    '''NumPy-only sampler for rdist(c) using Beta(a,a) with a=c/2.

    Uses the identity:
        If G1,G2 ~ Gamma(a,1) iid, then G1/(G1+G2) ~ Beta(a,a).
        Then X = 2*Beta(a,a) - 1.
    '''
    c = _check_c(c)
    a = 0.5 * c

    rng = np.random.default_rng() if rng is None else rng
    g1 = rng.gamma(shape=a, scale=1.0, size=size)
    g2 = rng.gamma(shape=a, scale=1.0, size=size)

    y = g1 / (g1 + g2)
    return 2.0 * y - 1.0


In [3]:
def rdist_mean(c: float) -> float:
    _check_c(c)
    return 0.0


def rdist_var(c):
    c = np.asarray(c, dtype=float)
    if np.any(~np.isfinite(c)) or np.any(c <= 0):
        raise ValueError("Require finite c > 0.")
    return 1.0 / (c + 1.0)


def rdist_excess_kurtosis(c: float) -> float:
    c = _check_c(c)
    return -6.0 / (c + 3.0)


def rdist_even_moment(n: int, c: float) -> float:
    '''Return E[X^(2n)] for X ~ rdist(c) (n >= 0).'''
    c = _check_c(c)
    n = int(n)
    if n < 0:
        raise ValueError("Require n >= 0.")
    a = 0.5 * c
    return float(np.exp(special.betaln(n + 0.5, a) - special.betaln(0.5, a)))


def rdist_mgf(t, c: float):
    '''MGF M_X(t) using the Bessel-I representation (real, even in t).'''
    c = _check_c(c)
    a = 0.5 * c
    nu = a - 0.5

    t = np.asarray(t, dtype=float)
    out = np.ones_like(t, dtype=float)

    mask = t != 0
    abs_t = np.abs(t[mask])
    out[mask] = special.gamma(a + 0.5) * (2.0 / abs_t) ** nu * special.iv(nu, abs_t)

    return out


def rdist_cf(w, c: float):
    '''Characteristic function φ_X(ω) using the Bessel-J representation (real, even in ω).'''
    c = _check_c(c)
    a = 0.5 * c
    nu = a - 0.5

    w = np.asarray(w, dtype=float)
    out = np.ones_like(w, dtype=float)

    mask = w != 0
    abs_w = np.abs(w[mask])
    out[mask] = special.gamma(a + 0.5) * (2.0 / abs_w) ** nu * special.jv(nu, abs_w)

    return out


def rdist_entropy(c: float) -> float:
    '''Differential entropy of rdist(c) on [-1,1].'''
    c = _check_c(c)
    a = 0.5 * c
    return float(
        np.log(2.0)
        + special.betaln(a, a)
        - 2.0 * (a - 1.0) * special.digamma(a)
        + (2.0 * a - 2.0) * special.digamma(2.0 * a)
    )


# Quick numerical cross-check against SciPy
c0 = 5.0
x0 = np.linspace(-0.999, 0.999, 7)

print("max |pdf - scipy|:", np.max(np.abs(rdist_pdf(x0, c0) - stats.rdist.pdf(x0, c0))))
print("max |cdf - scipy|:", np.max(np.abs(rdist_cdf(x0, c0) - stats.rdist.cdf(x0, c0))))

print("mean,var,skew,kurt(excess) (formula):", rdist_mean(c0), rdist_var(c0), 0.0, rdist_excess_kurtosis(c0))
print("mean,var,skew,kurt(excess) (scipy):  ", stats.rdist.stats(c0, moments="mvsk"))

print("entropy (formula):", rdist_entropy(c0))
print("entropy (scipy):  ", stats.rdist.entropy(c0))

# Monte Carlo check
n_mc = 200_000
s = rdist_rvs_numpy(c0, size=n_mc, rng=rng)
print("MC mean/var:", s.mean(), s.var(ddof=0))
print("MGF(t=1) formula vs MC:", float(rdist_mgf(1.0, c0)), float(np.mean(np.exp(1.0 * s))))


max |pdf - scipy|: 2.220446049250313e-16
max |cdf - scipy|: 0.0
mean,var,skew,kurt(excess) (formula): 0.0 0.16666666666666666 0.0 -0.75
mean,var,skew,kurt(excess) (scipy):   (0.0, 0.16666666666666663, 0.0, -0.7499999999999991)
entropy (formula): 0.49334217451750995
entropy (scipy):   0.4933421745175011
MC mean/var: -0.0004876065533899298 0.1668664435284834
MGF(t=1) formula vs MC: 1.0859813581363065 1.0855688443043081


In [4]:
# How the PDF changes with c
cs = [0.5, 1.0, 2.0, 3.0, 10.0]
eps = 1e-4
x = np.linspace(-1 + eps, 1 - eps, 1500)

fig = go.Figure()
for c in cs:
    fig.add_trace(go.Scatter(x=x, y=rdist_pdf(x, c), name=f"c={c}"))

fig.update_layout(
    title="R distribution PDF for different c",
    xaxis_title="x",
    yaxis_title="density",
)
fig.show()

# Variance as a function of c
c_grid = np.linspace(0.2, 20, 200)
fig2 = px.line(
    x=c_grid,
    y=rdist_var(c_grid),
    labels={"x": "c", "y": "Var(X)"},
    title="Variance Var(X)=1/(c+1)",
)
fig2.show()


In [5]:
def rdist_loglik(c: float, x: np.ndarray) -> float:
    c = _check_c(c)
    x = np.asarray(x, dtype=float)
    if np.any((x <= -1.0) | (x >= 1.0)):
        return -np.inf
    return float(np.sum(rdist_logpdf(x, c)))


def rdist_mom_c(x: np.ndarray) -> float:
    '''Method-of-moments estimate from Var(X)=1/(c+1).'''
    x = np.asarray(x, dtype=float)
    v = float(np.var(x, ddof=0))
    if v <= 0:
        return np.inf
    return max(1e-8, 1.0 / v - 1.0)


# Compare MOM vs SciPy's MLE fit on synthetic data
c_true = 8.0
n = 4000
x = stats.rdist.rvs(c_true, size=n, random_state=rng)

c_mom = rdist_mom_c(x)
(c_mle, loc_mle, scale_mle) = stats.rdist.fit(x, floc=0, fscale=1)

print("true c:", c_true)
print("MOM c :", c_mom)
print("MLE c :", c_mle)

print("loglik(c_true):", rdist_loglik(c_true, x))
print("loglik(c_mom): ", rdist_loglik(c_mom, x))
print("loglik(c_mle): ", rdist_loglik(c_mle, x))


true c: 8.0
MOM c : 7.837035002401219
MLE c : 7.830761718750015
loglik(c_true): -1271.0385836930934
loglik(c_mom):  -1270.5214543060163
loglik(c_mle):  -1270.5207357833792


In [6]:
# Sampling demo
c_demo = 1.5
samples = rdist_rvs_numpy(c_demo, size=10, rng=rng)
print(samples)

# Basic sanity: samples lie in [-1,1]
print("min/max:", samples.min(), samples.max())


[-0.687984 -0.570477  0.755541 -0.147472  0.250969 -0.816589  0.7435
 -0.996741 -0.828464  0.881516]
min/max: -0.9967410604892193 0.8815164217779459


In [7]:
# PDF and CDF for a chosen c
c_vis = 5.0
x = np.linspace(-1 + 1e-4, 1 - 1e-4, 1500)

fig_pdf = go.Figure()
fig_pdf.add_trace(go.Scatter(x=x, y=rdist_pdf(x, c_vis), name="pdf (formula)"))
fig_pdf.add_trace(go.Scatter(x=x, y=stats.rdist.pdf(x, c_vis), name="pdf (scipy)", line=dict(dash="dash")))
fig_pdf.update_layout(title=f"PDF for c={c_vis}", xaxis_title="x", yaxis_title="density")
fig_pdf.show()

fig_cdf = go.Figure()
fig_cdf.add_trace(go.Scatter(x=x, y=rdist_cdf(x, c_vis), name="cdf (formula)"))
fig_cdf.add_trace(go.Scatter(x=x, y=stats.rdist.cdf(x, c_vis), name="cdf (scipy)", line=dict(dash="dash")))
fig_cdf.update_layout(title=f"CDF for c={c_vis}", xaxis_title="x", yaxis_title="F(x)")
fig_cdf.show()

# Monte Carlo histogram
n_hist = 80_000
s = rdist_rvs_numpy(c_vis, size=n_hist, rng=rng)

fig_hist = go.Figure()
fig_hist.add_trace(
    go.Histogram(
        x=s,
        nbinsx=90,
        histnorm="probability density",
        name="samples (hist)",
        opacity=0.6,
    )
)
fig_hist.add_trace(go.Scatter(x=x, y=rdist_pdf(x, c_vis), name="theoretical pdf", line=dict(color="black")))
fig_hist.update_layout(
    title=f"Monte Carlo samples vs pdf (c={c_vis})",
    xaxis_title="x",
    yaxis_title="density",
    barmode="overlay",
)
fig_hist.show()


In [8]:
# Basic SciPy usage
c = 3.0
x = np.linspace(-0.999, 0.999, 9)

print("pdf:", stats.rdist.pdf(x, c))
print("cdf:", stats.rdist.cdf(x, c))
print("rvs:", stats.rdist.rvs(c, size=5, random_state=rng))

# Fit example: recover c when loc/scale are known
c_true = 6.0
data = stats.rdist.rvs(c_true, size=3000, random_state=rng)
(c_hat, loc_hat, scale_hat) = stats.rdist.fit(data, floc=0, fscale=1)
print("true c:", c_true)
print("fit  c:", c_hat)

# Identity check: (X+1)/2 ~ Beta(c/2, c/2)
y = (data + 1) / 2
pdf_beta = stats.beta.pdf((x + 1) / 2, a=c_true / 2, b=c_true / 2) / 2  # Jacobian factor
pdf_r = stats.rdist.pdf(x, c_true)
print("max |pdf_r - transformed_beta_pdf|:", np.max(np.abs(pdf_r - pdf_beta)))


pdf: [0.028463 0.421625 0.551513 0.616446 0.63662  0.616446 0.551513 0.421625
 0.028463]
cdf: [0.000019 0.072463 0.195777 0.342673 0.5      0.657327 0.804223 0.927537
 0.999981]
rvs: [ 0.022809 -0.961042 -0.883764 -0.258481 -0.646669]
true c: 6.0
fit  c: 6.054687500000011
max |pdf_r - transformed_beta_pdf|: 3.3306690738754696e-16


In [9]:
# 10.1 Likelihood ratio test: H0 c=2 (uniform) vs H1 c!=2

def rdist_loglik_scipy(c: float, x: np.ndarray) -> float:
    return float(np.sum(stats.rdist.logpdf(x, c)))


c_true = 5.0
n = 2000
x = stats.rdist.rvs(c_true, size=n, random_state=rng)

c_hat, _, _ = stats.rdist.fit(x, floc=0, fscale=1)

ll_hat = rdist_loglik_scipy(c_hat, x)
ll_null = rdist_loglik_scipy(2.0, x)

lr_stat = 2.0 * (ll_hat - ll_null)
p_value = stats.chi2.sf(lr_stat, df=1)

print("true c:", c_true)
print("MLE  c:", c_hat)
print("LR stat:", lr_stat)
print("approx p-value (chi^2_1):", p_value)


true c: 5.0
MLE  c: 5.0624023437500085
LR stat: 817.0196089693518
approx p-value (chi^2_1): 1.0758209264796379e-179


In [10]:
# 10.2 Bayesian modeling (simple grid posterior)
# Prior: rho ~ rdist(c_prior)
# Likelihood: y | rho ~ Normal(rho, sigma^2)

r = np.linspace(-1 + 1e-4, 1 - 1e-4, 2000)

y_obs = 0.55
sigma = 0.12

fig = go.Figure()

for c_prior in [1.0, 5.0, 20.0]:
    prior = rdist_pdf(r, c_prior)
    like = stats.norm.pdf(y_obs, loc=r, scale=sigma)
    post_unnorm = prior * like
    post = post_unnorm / np.trapz(post_unnorm, r)

    fig.add_trace(go.Scatter(x=r, y=prior, name=f"prior c={c_prior}", line=dict(dash="dash")))
    fig.add_trace(go.Scatter(x=r, y=post, name=f"posterior c={c_prior}"))

fig.update_layout(
    title=f"Grid posterior for rho in [-1,1] (y={y_obs}, sigma={sigma})",
    xaxis_title="rho",
    yaxis_title="density",
)
fig.show()


In [11]:
# 10.3 Generative modeling: coordinate of a random direction on S^{d-1}

d = 10
c_sphere = d - 1

n = 120_000
z = rng.normal(size=(n, d))
z = z / np.linalg.norm(z, axis=1, keepdims=True)
x1 = z[:, 0]

x_grid = np.linspace(-1 + 1e-4, 1 - 1e-4, 1500)

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=x1,
        nbinsx=90,
        histnorm="probability density",
        name=f"sphere coord (d={d})",
        opacity=0.6,
    )
)
fig.add_trace(
    go.Scatter(
        x=x_grid,
        y=stats.rdist.pdf(x_grid, c_sphere),
        name=f"rdist(c={c_sphere}) pdf",
        line=dict(color="black"),
    )
)
fig.update_layout(
    title="First coordinate of a random unit vector matches rdist",
    xaxis_title="x1",
    yaxis_title="density",
    barmode="overlay",
)
fig.show()


In [12]:
# Numerical stability: pdf vs logpdf near the boundary for large c
c_big = 50.0
x_edge = np.array([0.0, 0.9, 0.99, 0.999, 0.9999])

pdf_direct = rdist_pdf(x_edge, c_big)
logpdf = rdist_logpdf(x_edge, c_big)

print("x:", x_edge)
print("pdf:", pdf_direct)
print("logpdf:", logpdf)
print("exp(logpdf):", np.exp(logpdf))


x: [0.     0.9    0.99   0.999  0.9999]
pdf: [2.806879 0.       0.       0.       0.      ]
logpdf: [   1.032073  -38.825476  -92.97678  -148.130524 -203.381763]
exp(logpdf): [2.806879 0.       0.       0.       0.      ]
