In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
import plotly.io as pio

from scipy import optimize, special, stats
import scipy

pio.templates.default = "plotly_white"
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")

np.set_printoptions(precision=6, suppress=True)
rng = np.random.default_rng(42)

# Reproducibility / environment info
import sys
import plotly

print("python:", sys.version.split()[0])
print("numpy :", np.__version__)
print("scipy :", scipy.__version__)
print("plotly:", plotly.__version__)


python: 3.12.9
numpy : 1.26.2
scipy : 1.15.0
plotly: 6.5.2


In [2]:
def _validate_c(c: float) -> float:
    c = float(c)
    if not np.isfinite(c) or c <= 0:
        raise ValueError(f"c must be finite and > 0, got {c!r}")
    return c


def bradford_pdf(x: np.ndarray, c: float) -> np.ndarray:
    '''Bradford(c) PDF on [0,1].'''
    c = _validate_c(c)
    x = np.asarray(x, dtype=float)
    L = np.log1p(c)
    out = np.zeros_like(x)
    mask = (0.0 <= x) & (x <= 1.0)
    out[mask] = c / (L * (1.0 + c * x[mask]))
    return out


def bradford_cdf(x: np.ndarray, c: float) -> np.ndarray:
    '''Bradford(c) CDF on [0,1].'''
    c = _validate_c(c)
    x = np.asarray(x, dtype=float)
    L = np.log1p(c)
    out = np.zeros_like(x)
    out[x >= 1.0] = 1.0
    mask = (0.0 <= x) & (x < 1.0)
    out[mask] = np.log1p(c * x[mask]) / L
    return out


def bradford_ppf(u: np.ndarray, c: float) -> np.ndarray:
    '''Bradford(c) quantile function (inverse CDF) for u in [0,1].'''
    c = _validate_c(c)
    u = np.asarray(u, dtype=float)
    if np.any((u < 0) | (u > 1)):
        raise ValueError("u must be in [0,1]")

    # Q(u) = ((1+c)^u - 1) / c = expm1(u * log1p(c)) / c
    return np.expm1(u * np.log1p(c)) / c


In [3]:
# Sanity checks: PDF integrates to ~1 and matches SciPy
c = 7.5
x_grid = np.linspace(0, 1, 20001)

pdf_vals = bradford_pdf(x_grid, c)
area = np.trapz(pdf_vals, x_grid)
print("∫ pdf dx ≈", area)

dist = stats.bradford(c)
max_abs_pdf_diff = np.max(np.abs(pdf_vals - dist.pdf(x_grid)))
max_abs_cdf_diff = np.max(np.abs(bradford_cdf(x_grid, c) - dist.cdf(x_grid)))
print("max |pdf - scipy|:", max_abs_pdf_diff)
print("max |cdf - scipy|:", max_abs_cdf_diff)


∫ pdf dx ≈ 1.0000000054000913
max |pdf - scipy|: 8.881784197001252e-16
max |cdf - scipy|: 2.220446049250313e-16


In [4]:
def bradford_raw_moments_1_to_4(c: float) -> tuple[float, float, float, float]:
    '''Return (E[X], E[X^2], E[X^3], E[X^4]) for X ~ Bradford(c) on [0,1].'''
    c = _validate_c(c)
    L = np.log1p(c)

    ex1 = 1.0 / L - 1.0 / c
    ex2 = 1.0 / (2.0 * L) - 1.0 / (c * L) + 1.0 / (c**2)
    ex3 = 1.0 / (3.0 * L) - 1.0 / (2.0 * c * L) + 1.0 / (c**2 * L) - 1.0 / (c**3)
    ex4 = (
        1.0 / (4.0 * L)
        - 1.0 / (3.0 * c * L)
        + 1.0 / (2.0 * c**2 * L)
        - 1.0 / (c**3 * L)
        + 1.0 / (c**4)
    )
    return ex1, ex2, ex3, ex4


def bradford_mean_var(c: float) -> tuple[float, float]:
    c = _validate_c(c)
    L = np.log1p(c)
    mean = 1.0 / L - 1.0 / c
    var = ((c + 2.0) * L - 2.0 * c) / (2.0 * c * L**2)
    return mean, var


def bradford_skew_kurtosis_excess(c: float) -> tuple[float, float]:
    '''Return (skewness, excess kurtosis) via the first four raw moments.'''
    ex1, ex2, ex3, ex4 = bradford_raw_moments_1_to_4(c)

    mean = ex1
    var = ex2 - mean**2
    sigma = np.sqrt(var)

    mu3 = ex3 - 3 * mean * ex2 + 2 * mean**3
    mu4 = ex4 - 4 * mean * ex3 + 6 * mean**2 * ex2 - 3 * mean**4

    skew = mu3 / sigma**3
    excess_kurt = mu4 / var**2 - 3.0
    return skew, excess_kurt


def bradford_entropy(c: float) -> float:
    c = _validate_c(c)
    L = np.log1p(c)
    return np.log(L / c) + 0.5 * L


def bradford_mgf(t: np.ndarray, c: float) -> np.ndarray:
    '''MGF M(t) using SciPy's Ei implementation (works for real or complex t).'''
    c = _validate_c(c)
    t = np.asarray(t)
    L = np.log1p(c)

    # Ei(0) = -inf, but the limit M(0) = 1; handle t=0 explicitly.
    out = np.empty_like(t, dtype=np.result_type(t, float))
    mask0 = t == 0
    out[mask0] = 1.0
    mask = ~mask0
    if np.any(mask):
        tt = t[mask]
        out[mask] = np.exp(-tt / c) / L * (special.expi(tt * (1.0 + c) / c) - special.expi(tt / c))
    return out

c = 7.5
mean, var = bradford_mean_var(c)
skew, excess_kurt = bradford_skew_kurtosis_excess(c)

print("mean   :", mean)
print("var    :", var)
print("skew   :", skew)
print("ex.kurt:", excess_kurt)
print("entropy:", bradford_entropy(c))

# Compare to SciPy's built-in stats
m, v, s, k = stats.bradford(c).stats(moments="mvsk")
print("
SciPy mvsk:", float(m), float(v), float(s), float(k))

# Quick check of MGF at t=0 (should be 1)
print("
M(0) ≈", bradford_mgf(0.0, c))

SyntaxError: unterminated string literal (detected at line 77) (1927523450.py, line 77)

In [5]:
# Visualize how the PDF and CDF change with c
c_values = [0.2, 1.0, 5.0, 20.0]
x = np.linspace(0, 1, 400)

fig_pdf = go.Figure()
fig_cdf = go.Figure()

for c in c_values:
    fig_pdf.add_trace(go.Scatter(x=x, y=bradford_pdf(x, c), mode="lines", name=f"c={c}"))
    fig_cdf.add_trace(go.Scatter(x=x, y=bradford_cdf(x, c), mode="lines", name=f"c={c}"))

fig_pdf.update_layout(title="Bradford PDF on [0,1]", xaxis_title="x", yaxis_title="f(x)")
fig_cdf.update_layout(title="Bradford CDF on [0,1]", xaxis_title="x", yaxis_title="F(x)")

fig_pdf.show()
fig_cdf.show()


In [6]:
# How mean/variance evolve with c
c_grid = np.logspace(-2, 2, 250)  # 0.01 to 100
means = np.array([bradford_mean_var(c)[0] for c in c_grid])
vars_ = np.array([bradford_mean_var(c)[1] for c in c_grid])

fig = go.Figure()
fig.add_trace(go.Scatter(x=c_grid, y=means, mode="lines", name="mean"))
fig.add_trace(go.Scatter(x=c_grid, y=vars_, mode="lines", name="variance", yaxis="y2"))

fig.update_layout(
    title="Mean and variance vs c",
    xaxis_title="c (log scale)",
    xaxis_type="log",
    yaxis=dict(title="mean"),
    yaxis2=dict(title="variance", overlaying="y", side="right"),
)
fig.show()


NameError: name 'bradford_mean_var' is not defined

In [7]:
def bradford_loglik(c: float, x: np.ndarray) -> float:
    c = _validate_c(c)
    x = np.asarray(x, dtype=float)
    if np.any((x < 0) | (x > 1)):
        raise ValueError("All x must be in [0,1] for the standard Bradford distribution")

    L = np.log1p(c)
    return x.size * np.log(c) - x.size * np.log(L) - np.sum(np.log1p(c * x))


def bradford_mle_c(x: np.ndarray, c_init: float = 1.0) -> float:
    x = np.asarray(x, dtype=float)

    def nll(log_c: float) -> float:
        c = np.exp(log_c)
        return -bradford_loglik(c, x)

    res = optimize.minimize(nll, x0=np.log(c_init), method="BFGS")
    if not res.success:
        raise RuntimeError(f"MLE optimization failed: {res.message}")
    return float(np.exp(res.x[0]))


# Demonstrate MLE recovery on simulated data
c_true = 8.0
n = 2000
x_sim = bradford_ppf(rng.random(n), c_true)

c_hat = bradford_mle_c(x_sim, c_init=5.0)
print("c_true:", c_true)
print("c_hat :", c_hat)

# SciPy fit: fixing loc=0, scale=1 focuses fit on shape parameter
c_scipy, loc_scipy, scale_scipy = stats.bradford.fit(x_sim, floc=0, fscale=1)
print("c_fit (scipy):", c_scipy)


c_true: 8.0
c_hat : 7.768058113979663
c_fit (scipy): 7.768066406250014



Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)



In [8]:
def bradford_rvs_numpy(c: float, size: int | tuple[int, ...], rng: np.random.Generator) -> np.ndarray:
    c = _validate_c(c)
    u = rng.random(size)
    return np.expm1(u * np.log1p(c)) / c


c = 10.0
samples = bradford_rvs_numpy(c, size=200_000, rng=rng)

mean_theory, var_theory = bradford_mean_var(c)
print("empirical mean:", samples.mean())
print("theory mean   :", mean_theory)
print("empirical var :", samples.var())
print("theory var    :", var_theory)


NameError: name 'bradford_mean_var' is not defined

In [9]:
# Histogram (Monte Carlo) + theoretical PDF overlay
c = 10.0
n = 50_000
x = bradford_rvs_numpy(c, size=n, rng=rng)

x_plot = np.linspace(0, 1, 400)

hist = px.histogram(
    x,
    nbins=60,
    histnorm="probability density",
    opacity=0.6,
    title=f"Monte Carlo samples vs PDF (c={c})",
)

hist.add_trace(go.Scatter(x=x_plot, y=bradford_pdf(x_plot, c), mode="lines", name="theoretical pdf"))
hist.update_layout(xaxis_title="x", yaxis_title="density")
hist.show()


In [10]:
# Empirical CDF vs theoretical CDF
c = 10.0
x = bradford_rvs_numpy(c, size=30_000, rng=rng)

x_sorted = np.sort(x)
emp_cdf = np.arange(1, x_sorted.size + 1) / x_sorted.size

fig = go.Figure()
fig.add_trace(go.Scatter(x=x_sorted, y=emp_cdf, mode="lines", name="empirical CDF"))

x_plot = np.linspace(0, 1, 400)
fig.add_trace(go.Scatter(x=x_plot, y=bradford_cdf(x_plot, c), mode="lines", name="theoretical CDF"))

fig.update_layout(title=f"Empirical vs theoretical CDF (c={c})", xaxis_title="x", yaxis_title="CDF")
fig.show()


In [11]:
c = 4.0
x = np.linspace(0, 1, 6)

dist = stats.bradford(c)
print("pdf:", dist.pdf(x))
print("cdf:", dist.cdf(x))
print("rvs:", dist.rvs(size=5, random_state=rng))

# Fit on synthetic data (standard support): fix loc and scale
x_sim = dist.rvs(size=5000, random_state=rng)

c_hat, loc_hat, scale_hat = stats.bradford.fit(x_sim, floc=0, fscale=1)
print("
true c:", c)
print("fit  c:", c_hat)


SyntaxError: unterminated string literal (detected at line 13) (4288739442.py, line 13)

In [12]:
# A) Likelihood ratio test example: H0: c = c0
from scipy.stats import chi2

c0 = 6.0
c_true = 10.0
n = 3000

x = stats.bradford(c_true).rvs(size=n, random_state=rng)

c_hat = bradford_mle_c(x, c_init=c0)
ll_hat = bradford_loglik(c_hat, x)
ll_0 = bradford_loglik(c0, x)

lrt = 2 * (ll_hat - ll_0)
p_value = 1 - chi2.cdf(lrt, df=1)

print("c0   :", c0)
print("c_hat:", c_hat)
print("LRT statistic:", lrt)
print("p-value ~", p_value)



Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)


Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)



RuntimeError: MLE optimization failed: Desired error not necessarily achieved due to precision loss.

In [13]:
# B) Bayesian modeling: Bradford prior for p in a Binomial model
# Observations: k successes out of n
n = 50
k = 3

c_prior = 12.0  # prior mass concentrated near 0

p_grid = np.linspace(1e-6, 1 - 1e-6, 4000)
prior = bradford_pdf(p_grid, c_prior)

# Binomial likelihood up to proportionality: p^k (1-p)^(n-k)
log_like = k * np.log(p_grid) + (n - k) * np.log1p(-p_grid)
like = np.exp(log_like - log_like.max())

posterior_unnorm = prior * like
posterior = posterior_unnorm / np.trapz(posterior_unnorm, p_grid)

post_mean = np.trapz(p_grid * posterior, p_grid)
post_map = p_grid[np.argmax(posterior)]

print("posterior mean:", post_mean)
print("posterior MAP :", post_map)

fig = go.Figure()
fig.add_trace(go.Scatter(x=p_grid, y=prior / np.trapz(prior, p_grid), mode="lines", name="prior (normalized)"))
fig.add_trace(go.Scatter(x=p_grid, y=posterior, mode="lines", name="posterior"))
fig.update_layout(
    title=f"Bradford prior (c={c_prior}) updated by Binomial(n={n}, k={k})",
    xaxis_title="p",
    yaxis_title="density",
)
fig.show()


posterior mean: 0.06932741764435588
posterior MAP : 0.05276408527131783


In [14]:
# C) Generative modeling: placing Bradford on an arbitrary interval [a, b]
a, b = 2.0, 7.0
c = 8.0

# Sample X on [0,1], then map to [a,b]
x = stats.bradford(c).rvs(size=50_000, random_state=rng)
y = a + (b - a) * x

fig = px.histogram(y, nbins=60, histnorm="probability density", title=f"Bradford(c={c}) scaled to [{a}, {b}]")
fig.update_layout(xaxis_title="y", yaxis_title="density")
fig.show()
