In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
import plotly.io as pio

from scipy import stats, special

# Plotly rendering (CKC convention)
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")

# Reproducibility
rng = np.random.default_rng(7)

np.set_printoptions(precision=6, suppress=True)


In [2]:
def _check_ab(a: float, b: float) -> None:
    a = float(a)
    b = float(b)
    if not (np.isfinite(a) and np.isfinite(b)):
        raise ValueError('a and b must be finite.')
    if a <= 0:
        raise ValueError('a must be > 0.')
    if b <= a:
        raise ValueError('b must be > a.')


def loguniform_pdf(x: np.ndarray, a: float, b: float) -> np.ndarray:
    '''PDF of LogUniform(a,b): f(x)=1/(x*log(b/a)) on [a,b].'''
    _check_ab(a, b)
    x = np.asarray(x, dtype=float)
    log_b_over_a = np.log(b) - np.log(a)

    pdf = np.zeros_like(x, dtype=float)
    mask = (x >= a) & (x <= b)
    pdf[mask] = 1.0 / (x[mask] * log_b_over_a)
    return pdf


def loguniform_logpdf(x: np.ndarray, a: float, b: float) -> np.ndarray:
    '''Log-PDF of LogUniform(a,b) on [a,b] (returns -inf outside support).'''
    _check_ab(a, b)
    x = np.asarray(x, dtype=float)
    log_b_over_a = np.log(b) - np.log(a)

    logpdf = np.full_like(x, fill_value=-np.inf, dtype=float)
    mask = (x >= a) & (x <= b)
    logpdf[mask] = -np.log(x[mask]) - np.log(log_b_over_a)
    return logpdf


def loguniform_cdf(x: np.ndarray, a: float, b: float) -> np.ndarray:
    '''CDF of LogUniform(a,b).'''
    _check_ab(a, b)
    x = np.asarray(x, dtype=float)
    log_b_over_a = np.log(b) - np.log(a)

    cdf = np.zeros_like(x, dtype=float)
    cdf[x >= b] = 1.0

    mask = (x >= a) & (x < b)
    cdf[mask] = (np.log(x[mask]) - np.log(a)) / log_b_over_a
    return cdf


def loguniform_ppf(u: np.ndarray, a: float, b: float) -> np.ndarray:
    '''Quantile function (inverse CDF) of LogUniform(a,b).'''
    _check_ab(a, b)
    u = np.asarray(u, dtype=float)
    if np.any((u < 0) | (u > 1)):
        raise ValueError('u must be in [0,1].')

    log_a, log_b = np.log(a), np.log(b)
    return np.exp(log_a + u * (log_b - log_a))


# Quick sanity check: PDF integrates to ~1
a0, b0 = 1e-2, 1e2
xgrid = np.geomspace(a0, b0, 200_000)
area = np.trapz(loguniform_pdf(xgrid, a0, b0), xgrid)
area


1.0000000003534637

In [3]:
def loguniform_raw_moment(k: float, a: float, b: float) -> float:
    '''Raw moment E[X^k] for X~LogUniform(a,b).

    Uses log-space to avoid forming (b/a) directly.
    '''
    _check_ab(a, b)
    k = float(k)
    if k == 0:
        return 1.0

    log_a, log_b = np.log(a), np.log(b)
    L = log_b - log_a

    return (np.exp(k * log_b) - np.exp(k * log_a)) / (k * L)


def loguniform_entropy(a: float, b: float) -> float:
    '''Differential entropy in nats.'''
    _check_ab(a, b)
    log_a, log_b = np.log(a), np.log(b)
    L = log_b - log_a
    return 0.5 * (log_a + log_b) + np.log(L)


def loguniform_mgf(t: np.ndarray, a: float, b: float):
    '''MGF M(t)=E[e^{tX}] via the exponential integral Ei.'''
    _check_ab(a, b)
    t = np.asarray(t, dtype=float)
    L = np.log(b) - np.log(a)

    out = (special.expi(t * b) - special.expi(t * a)) / L
    out = np.where(t == 0, 1.0, out)
    return out.item() if out.ndim == 0 else out


def loguniform_cf(t: np.ndarray, a: float, b: float):
    '''Characteristic function φ(t)=E[e^{itX}] via Ei.'''
    _check_ab(a, b)
    t = np.asarray(t, dtype=float)
    L = np.log(b) - np.log(a)

    z = 1j * t
    out = (special.expi(z * b) - special.expi(z * a)) / L
    out = np.where(t == 0, 1.0 + 0j, out)
    return out.item() if out.ndim == 0 else out


def loguniform_moments(a: float, b: float) -> dict:
    '''Mean/variance/skewness/excess kurtosis + entropy + MGF/CF callables.'''
    _check_ab(a, b)

    m1 = loguniform_raw_moment(1, a, b)
    m2 = loguniform_raw_moment(2, a, b)
    m3 = loguniform_raw_moment(3, a, b)
    m4 = loguniform_raw_moment(4, a, b)

    var = m2 - m1**2
    mu3 = m3 - 3 * m1 * m2 + 2 * m1**3
    mu4 = m4 - 4 * m1 * m3 + 6 * (m1**2) * m2 - 3 * m1**4

    skew = mu3 / (var ** 1.5)
    excess_kurt = mu4 / (var**2) - 3.0

    mgf = lambda t: loguniform_mgf(t, a, b)
    cf = lambda t: loguniform_cf(t, a, b)

    return {
        'mean': m1,
        'var': var,
        'skew': skew,
        'excess_kurtosis': excess_kurt,
        'median': np.sqrt(a * b),
        'entropy': loguniform_entropy(a, b),
        'mgf': mgf,
        'cf': cf,
    }


m = loguniform_moments(a0, b0)
{k: v for k, v in m.items() if k not in {'mgf', 'cf'}}


{'mean': 10.85627631137654,
 'var': 425.00936160142885,
 'skew': 2.4046918148882312,
 'excess_kurtosis': 5.220825298069535,
 'median': 1.0,
 'entropy': 2.2203268063678467}

In [4]:
# Monte Carlo check (using SciPy's sampler)
n = 200_000
samples_scipy = stats.loguniform(a0, b0).rvs(size=n, random_state=rng)

mc_mean = samples_scipy.mean()
mc_var = samples_scipy.var(ddof=0)
mc_skew = stats.skew(samples_scipy, bias=True)
mc_excess_kurt = stats.kurtosis(samples_scipy, fisher=True, bias=True)

# MGF at a couple of small t (avoid overflow from e^{tX} when b is large)
t1, t2 = 0.05, -0.05
mc_mgf_t1 = np.mean(np.exp(t1 * samples_scipy))
mc_mgf_t2 = np.mean(np.exp(t2 * samples_scipy))

(
    m['mean'],
    mc_mean,
    m['var'],
    mc_var,
    m['skew'],
    mc_skew,
    m['excess_kurtosis'],
    mc_excess_kurt,
    m['mgf'](t1),
    mc_mgf_t1,
    m['mgf'](t2),
    mc_mgf_t2,
)


(10.85627631137654,
 10.811263090405161,
 425.00936160142885,
 420.44185606541447,
 2.4046918148882312,
 2.4113563079778877,
 5.220825298069535,
 5.272143004630019,
 5.125593645982451,
 5.064962298664382,
 0.7625167097977015,
 0.7624525613686373)

In [5]:
# Same lower bound a, different upper bounds b
params = [(1e-2, 1e0), (1e-2, 1e2), (1e-2, 1e4)]
a_min = min(a for a, _ in params)
b_max = max(b for _, b in params)

x = np.geomspace(a_min, b_max, 2000)

fig = go.Figure()
for a, b in params:
    fig.add_trace(
        go.Scatter(
            x=x,
            y=loguniform_pdf(x, a, b),
            mode='lines',
            name=f'a={a:g}, b={b:g}',
        )
    )

fig.update_layout(
    title='Loguniform PDF: effect of widening [a,b]',
    yaxis_title='density',
    legend_title='parameters',
)
fig.update_xaxes(title='x', type='log')
fig


In [6]:
# In log-space the density is constant
# If Y = log X then Y ~ Uniform(log a, log b).

fig = go.Figure()
for a, b in params:
    log_a, log_b = np.log(a), np.log(b)
    y = np.linspace(log_a, log_b, 200)
    const = 1.0 / (log_b - log_a)
    fig.add_trace(
        go.Scatter(
            x=y,
            y=np.full_like(y, const),
            mode='lines',
            name=f'log a={log_a:.2f}, log b={log_b:.2f}',
        )
    )

fig.update_layout(
    title='Density of Y = log X is uniform',
    xaxis_title='y = log x',
    yaxis_title='density',
)
fig


In [7]:
def loguniform_loglik(data: np.ndarray, a: float, b: float) -> float:
    '''Log-likelihood for i.i.d. data under LogUniform(a,b).'''
    _check_ab(a, b)
    x = np.asarray(data, dtype=float)
    if np.any((x < a) | (x > b)):
        return -np.inf

    n = x.size
    L = np.log(b) - np.log(a)
    return -np.sum(np.log(x)) - n * np.log(L)


data = stats.loguniform(a0, b0).rvs(size=2_000, random_state=rng)

a_hat = data.min()
b_hat = data.max()

ll_mle = loguniform_loglik(data, a_hat, b_hat)
ll_expanded = loguniform_loglik(data, 0.9 * a_hat, 1.1 * b_hat)
ll_shrunk_invalid = loguniform_loglik(data, 1.1 * a_hat, 0.9 * b_hat)  # infeasible

ll_mle, ll_expanded, ll_shrunk_invalid


(-4559.564681994808, -4602.7252905037785, -inf)

In [8]:
def loguniform_rvs_numpy(a: float, b: float, size: int, rng: np.random.Generator) -> np.ndarray:
    '''NumPy-only sampling from LogUniform(a,b) via inverse CDF in log-space.'''
    _check_ab(a, b)
    u = rng.uniform(0.0, 1.0, size=size)
    log_a, log_b = np.log(a), np.log(b)
    return np.exp(log_a + u * (log_b - log_a))


samples_np = loguniform_rvs_numpy(a0, b0, size=200_000, rng=rng)

# Monte Carlo check vs theory
(
    m['mean'],
    samples_np.mean(),
    m['var'],
    samples_np.var(ddof=0),
)


(10.85627631137654, 10.789027117487663, 425.00936160142885, 420.90882792302494)

In [9]:
a1, b1 = 1e-3, 1e3
samples = loguniform_rvs_numpy(a1, b1, size=60_000, rng=rng)

x = np.geomspace(a1, b1, 1500)

# PDF + histogram (log-x axis)
fig_pdf = go.Figure()
fig_pdf.add_trace(go.Scatter(x=x, y=loguniform_pdf(x, a1, b1), mode='lines', name='theory'))
fig_pdf.add_trace(
    go.Histogram(
        x=samples,
        nbinsx=70,
        histnorm='probability density',
        name='samples',
        opacity=0.55,
    )
)
fig_pdf.update_layout(title='Loguniform PDF (log-x axis)', yaxis_title='density')
fig_pdf.update_xaxes(title='x', type='log')
fig_pdf


In [10]:
# CDF + empirical CDF
xs = np.sort(samples)
ecdf = np.arange(1, xs.size + 1) / xs.size

fig_cdf = go.Figure()
fig_cdf.add_trace(go.Scatter(x=x, y=loguniform_cdf(x, a1, b1), mode='lines', name='theory'))
fig_cdf.add_trace(go.Scatter(x=xs, y=ecdf, mode='lines', name='empirical', line=dict(width=1)))
fig_cdf.update_layout(title='CDF vs empirical CDF', yaxis_title='F(x)')
fig_cdf.update_xaxes(title='x', type='log')
fig_cdf


In [11]:
# Check uniformity in log-space
log_samples = np.log(samples)
log_a, log_b = np.log(a1), np.log(b1)

fig_log = px.histogram(
    x=log_samples,
    nbins=60,
    histnorm='probability density',
    title='Histogram of log(X) (should be uniform)',
    labels={'x': 'log x'},
)

const = 1.0 / (log_b - log_a)
fig_log.add_trace(
    go.Scatter(
        x=[log_a, log_b],
        y=[const, const],
        mode='lines',
        name='uniform density',
    )
)
fig_log


In [12]:
dist = stats.loguniform(a0, b0)  # loc=0, scale=1 by default
x_test = np.array([a0, np.sqrt(a0 * b0), b0])

pdf = dist.pdf(x_test)
cdf = dist.cdf(x_test)
ppf = dist.ppf(np.array([0.0, 0.5, 1.0]))
samples_scipy_small = dist.rvs(size=5, random_state=rng)

pdf, cdf, ppf, samples_scipy_small


(array([10.857362,  0.108574,  0.001086]),
 array([0. , 0.5, 1. ]),
 array([  0.01,   1.  , 100.  ]),
 array([ 0.199854,  0.361304,  4.721396, 13.553983,  0.034943]))

In [13]:
# Equivalence to reciprocal
recip = stats.reciprocal(a0, b0)
np.allclose(dist.pdf(xgrid), recip.pdf(xgrid)), np.allclose(dist.cdf(xgrid), recip.cdf(xgrid))


(True, True)

In [14]:
# Fitting (MLE) with SciPy
# If you KNOW loc=0 and scale=1 are appropriate, it's common to fix them.

data_fit = dist.rvs(size=20_000, random_state=rng)
a_hat, b_hat, loc_hat, scale_hat = stats.loguniform.fit(data_fit, floc=0, fscale=1)

(a_hat, b_hat, loc_hat, scale_hat), (data_fit.min(), data_fit.max())


((0.01001312790477221, 99.9984421995704, 0, 1),
 (0.01001312790477221, 99.9984421995704))

In [15]:
# 10.1 Hypothesis test demo: KS test in log-space (parameters known)
a_test, b_test = 1e-3, 1e2
x = stats.loguniform(a_test, b_test).rvs(size=2_000, random_state=rng)
y = np.log(x)

u = stats.uniform(loc=np.log(a_test), scale=np.log(b_test) - np.log(a_test))
D, p_value = stats.kstest(y, u.cdf)
D, p_value


(0.011160865291876099, 0.9621323409117997)

In [16]:
# 10.2 Bayesian modeling demo: loguniform prior on sigma in a Normal(0, sigma^2)

sigma_true = 0.3
n = 200
obs = rng.normal(0.0, sigma_true, size=n)
S = np.sum(obs**2)

sigma_min, sigma_max = 1e-3, 3.0
sigma_grid = np.geomspace(sigma_min, sigma_max, 2000)

# Proper loguniform prior on [sigma_min, sigma_max]
log_prior = -np.log(sigma_grid) - np.log(np.log(sigma_max / sigma_min))

# Log-likelihood up to an additive constant (drop -(n/2)log(2π))
log_lik = -n * np.log(sigma_grid) - 0.5 * S / (sigma_grid**2)

log_post = log_prior + log_lik
post_unnorm = np.exp(log_post - log_post.max())
Z = np.trapz(post_unnorm, sigma_grid)
post = post_unnorm / Z

# Posterior mean and a 90% credible interval
cdf = np.cumsum(0.5 * (post[1:] + post[:-1]) * np.diff(sigma_grid))
cdf = np.concatenate([[0.0], cdf])

q05 = np.interp(0.05, cdf, sigma_grid)
q95 = np.interp(0.95, cdf, sigma_grid)
post_mean = np.trapz(sigma_grid * post, sigma_grid)
post_mode = sigma_grid[np.argmax(post)]

post_mean, post_mode, (q05, q95)


(0.2998699911173841,
 0.2974883391590261,
 (0.27616277085842267, 0.3257213674590149))

In [17]:
fig_post = go.Figure()
fig_post.add_trace(go.Scatter(x=sigma_grid, y=post, mode='lines', name='posterior'))
fig_post.add_vline(x=sigma_true, line_dash='dash', line_color='black', annotation_text='true σ')
fig_post.add_vrect(x0=q05, x1=q95, opacity=0.15, fillcolor='blue', line_width=0, annotation_text='90% CI')

fig_post.update_layout(title='Posterior over σ with LogUniform prior', yaxis_title='density')
fig_post.update_xaxes(title='σ', type='log')
fig_post


In [18]:
# 10.3 Generative modeling / hyperparameter sampling: learning rates across decades
lr = loguniform_rvs_numpy(1e-5, 1e-1, size=10_000, rng=rng)

fig_lr = px.histogram(
    x=np.log10(lr),
    nbins=60,
    title='log10(learning rate) sampled LogUniform(1e-5, 1e-1)',
    labels={'x': 'log10(lr)'},
)
fig_lr


In [19]:
# Numerical pitfall demo: power form can overflow when b/a is huge

a_big, b_big = 1e-200, 1e200
u = rng.uniform(size=5)

x_logspace = np.exp(np.log(a_big) + u * (np.log(b_big) - np.log(a_big)))

# This overflows because (b/a) is inf in float64
x_power = a_big * (b_big / a_big) ** u

x_logspace, x_power


(array([3.746323e-040, 1.919517e-194, 1.269969e-157, 3.665470e+127,
        5.658093e-176]),
 array([inf, inf, inf, inf, inf]))