In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
import plotly.io as pio

from scipy import stats
from scipy.special import gamma

pio.templates.default = "plotly_white"
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")
np.set_printoptions(precision=6, suppress=True)

rng = np.random.default_rng(42)

In [2]:
def weibull_max_logpdf(x, c, loc=0.0, scale=1.0):
    '''Log-PDF of weibull_max(c, loc, scale) with support (-inf, loc].'''
    x = np.asarray(x, dtype=float)
    c = float(c)
    loc = float(loc)
    scale = float(scale)

    if c <= 0 or scale <= 0:
        raise ValueError("c must be > 0 and scale must be > 0")

    y = (loc - x) / scale  # y >= 0 on the support
    out = np.full_like(x, -np.inf, dtype=float)

    # Special-case c=1 to avoid 0 * log(0) at the endpoint.
    if np.isclose(c, 1.0):
        mask = y >= 0
        out[mask] = -np.log(scale) - y[mask]
        return out

    mask_pos = y > 0
    out[mask_pos] = np.log(c / scale) + (c - 1) * np.log(y[mask_pos]) - y[mask_pos] ** c

    mask_zero = y == 0
    if np.any(mask_zero):
        out[mask_zero] = np.inf if c < 1 else -np.inf

    return out


def weibull_max_pdf(x, c, loc=0.0, scale=1.0):
    return np.exp(weibull_max_logpdf(x, c, loc=loc, scale=scale))


def weibull_max_cdf(x, c, loc=0.0, scale=1.0):
    '''CDF of weibull_max(c, loc, scale).'''
    x = np.asarray(x, dtype=float)
    c = float(c)
    loc = float(loc)
    scale = float(scale)

    if c <= 0 or scale <= 0:
        raise ValueError("c must be > 0 and scale must be > 0")

    y = (loc - x) / scale
    out = np.ones_like(x, dtype=float)
    mask = y >= 0
    out[mask] = np.exp(-(y[mask] ** c))
    return out


def weibull_max_ppf(p, c, loc=0.0, scale=1.0):
    '''Quantile function (inverse CDF).'''
    p = np.asarray(p, dtype=float)
    c = float(c)
    loc = float(loc)
    scale = float(scale)

    if c <= 0 or scale <= 0:
        raise ValueError("c must be > 0 and scale must be > 0")
    if np.any((p < 0) | (p > 1)):
        raise ValueError("p must be in [0, 1]")

    out = np.full_like(p, np.nan, dtype=float)
    out[p == 0] = -np.inf
    out[p == 1] = loc

    mask = (p > 0) & (p < 1)
    out[mask] = loc - scale * (-np.log(p[mask])) ** (1.0 / c)
    return out


def weibull_max_entropy(c, scale=1.0):
    '''Differential entropy H(X) (independent of loc).'''
    c = float(c)
    scale = float(scale)
    if c <= 0 or scale <= 0:
        raise ValueError("c must be > 0 and scale must be > 0")

    return 1.0 + np.log(scale / c) + np.euler_gamma * (1.0 - 1.0 / c)


def weibull_max_moments(c, loc=0.0, scale=1.0):
    '''Return (mean, variance, skewness, excess_kurtosis).'''
    c = float(c)
    loc = float(loc)
    scale = float(scale)
    if c <= 0 or scale <= 0:
        raise ValueError("c must be > 0 and scale must be > 0")

    g1 = gamma(1.0 + 1.0 / c)
    g2 = gamma(1.0 + 2.0 / c)
    g3 = gamma(1.0 + 3.0 / c)
    g4 = gamma(1.0 + 4.0 / c)

    mean = loc - scale * g1
    var = scale**2 * (g2 - g1**2)

    mu3 = g3 - 3 * g2 * g1 + 2 * g1**3
    skew = -mu3 / (g2 - g1**2) ** 1.5

    mu4 = g4 - 4 * g3 * g1 + 6 * g2 * g1**2 - 3 * g1**4
    excess_kurt = mu4 / (g2 - g1**2) ** 2 - 3

    return mean, var, skew, excess_kurt

In [3]:
c_ex, loc_ex, scale_ex = 1.7, 2.0, 1.5

mean_th, var_th, skew_th, exkurt_th = weibull_max_moments(c_ex, loc=loc_ex, scale=scale_ex)
entropy_th = weibull_max_entropy(c_ex, scale=scale_ex)

dist = stats.weibull_max(c_ex, loc=loc_ex, scale=scale_ex)
mean_s, var_s, skew_s, exkurt_s = dist.stats(moments="mvsk")
entropy_s = dist.entropy()

{
    "mean (theory)": mean_th,
    "mean (scipy)": float(mean_s),
    "var (theory)": var_th,
    "var (scipy)": float(var_s),
    "skew (theory)": skew_th,
    "skew (scipy)": float(skew_s),
    "excess_kurtosis (theory)": exkurt_th,
    "excess_kurtosis (scipy)": float(exkurt_s),
    "entropy (theory)": entropy_th,
    "entropy (scipy)": float(entropy_s),
}

{'mean (theory)': 0.6616332462510139,
 'mean (scipy)': 0.6616332462510139,
 'var (theory)': 0.656673361303171,
 'var (scipy)': 0.656673361303171,
 'skew (theory)': -0.865023442264403,
 'skew (scipy)': -0.8650234422644058,
 'excess_kurtosis (theory)': 0.7723789795422564,
 'excess_kurtosis (scipy)': 0.7723789795422564,
 'entropy (theory)': 1.1125138955348604,
 'entropy (scipy)': 1.1125138955348604}

In [4]:
# Shape changes (standardized loc=0, scale=1)
c_values = [0.6, 1.0, 1.8, 4.0]
x = np.linspace(-6, -1e-6, 800)

fig = go.Figure()
for c in c_values:
    fig.add_trace(go.Scatter(x=x, y=weibull_max_pdf(x, c), mode="lines", name=f"c={c}"))

fig.update_layout(
    title="weibull_max PDF shape for different c (loc=0, scale=1)",
    xaxis_title="x",
    yaxis_title="pdf(x)",
)
fig.show()

In [5]:
def weibull_max_loglik(x, c, loc=0.0, scale=1.0):
    x = np.asarray(x, dtype=float)
    return float(np.sum(weibull_max_logpdf(x, c, loc=loc, scale=scale)))


# Sanity check: log-likelihood matches SciPy's logpdf sum
c_true, loc_true, scale_true = 1.8, 2.0, 1.2
n = 5000

x_synth = stats.weibull_max(c_true, loc=loc_true, scale=scale_true).rvs(
    size=n, random_state=rng
)
ll_manual = weibull_max_loglik(x_synth, c_true, loc=loc_true, scale=scale_true)
ll_scipy = float(
    np.sum(stats.weibull_max(c_true, loc=loc_true, scale=scale_true).logpdf(x_synth))
)
ll_manual, ll_scipy

(-4263.328347635146, -4263.328347635146)

In [6]:
def weibull_max_rvs_numpy(c, loc=0.0, scale=1.0, size=1, rng=None):
    '''NumPy-only sampler via E ~ Exp(1), X = loc - scale * E**(1/c).'''
    if rng is None:
        rng = np.random.default_rng()

    c = float(c)
    loc = float(loc)
    scale = float(scale)
    if c <= 0 or scale <= 0:
        raise ValueError("c must be > 0 and scale must be > 0")

    e = rng.exponential(scale=1.0, size=size)  # Exp(1)
    return loc - scale * e ** (1.0 / c)


# Monte Carlo check
c_mc, loc_mc, scale_mc = 0.7, 1.5, 0.8
n_mc = 200_000

x_mc = weibull_max_rvs_numpy(c_mc, loc=loc_mc, scale=scale_mc, size=n_mc, rng=rng)

mean_th, var_th, *_ = weibull_max_moments(c_mc, loc=loc_mc, scale=scale_mc)
mean_mc = float(np.mean(x_mc))
var_mc = float(np.var(x_mc, ddof=0))

{
    "mean (MC)": mean_mc,
    "mean (theory)": mean_th,
    "var (MC)": var_mc,
    "var (theory)": var_th,
}

{'mean (MC)': 0.48703560503710946,
 'mean (theory)': 0.48734119515417307,
 'var (MC)': 2.2115187094928475,
 'var (theory)': 2.1931747543380187}

In [7]:
def ecdf(samples):
    x = np.sort(np.asarray(samples))
    y = np.arange(1, x.size + 1) / x.size
    return x, y


c_viz, loc_viz, scale_viz = 1.2, 2.0, 1.0
n_viz = 80_000

samples = weibull_max_rvs_numpy(c_viz, loc=loc_viz, scale=scale_viz, size=n_viz, rng=rng)

# pick a finite plotting window using quantiles (support extends to -inf)
x_lo = float(np.quantile(samples, 0.001))
x_hi = float(np.quantile(samples, 0.999))
x_grid = np.linspace(x_lo, min(x_hi, loc_viz - 1e-6), 700)

pdf_grid = weibull_max_pdf(x_grid, c_viz, loc=loc_viz, scale=scale_viz)
cdf_grid = weibull_max_cdf(x_grid, c_viz, loc=loc_viz, scale=scale_viz)

# PDF + histogram
fig_pdf = go.Figure()
fig_pdf.add_trace(
    go.Histogram(
        x=samples,
        histnorm="probability density",
        nbinsx=80,
        name="samples",
        opacity=0.55,
    )
)
fig_pdf.add_trace(go.Scatter(x=x_grid, y=pdf_grid, mode="lines", name="theory pdf"))
fig_pdf.update_layout(
    title="weibull_max: histogram vs PDF",
    xaxis_title="x",
    yaxis_title="density",
)
fig_pdf.show()

# CDF + ECDF
x_ecdf, y_ecdf = ecdf(samples)
fig_cdf = go.Figure()
fig_cdf.add_trace(go.Scatter(x=x_grid, y=cdf_grid, mode="lines", name="theory cdf"))
fig_cdf.add_trace(
    go.Scatter(
        x=x_ecdf[::200],
        y=y_ecdf[::200],
        mode="markers",
        name="empirical cdf",
        marker=dict(size=4),
    )
)
fig_cdf.update_layout(
    title="weibull_max: empirical CDF vs CDF",
    xaxis_title="x",
    yaxis_title="F(x)",
)
fig_cdf.show()

In [8]:
c_true, loc_true, scale_true = 1.5, 3.0, 0.9

dist = stats.weibull_max(c_true, loc=loc_true, scale=scale_true)

x = np.linspace(loc_true - 5 * scale_true, loc_true - 1e-6, 500)
pdf = dist.pdf(x)
cdf = dist.cdf(x)

samples_scipy = dist.rvs(size=5000, random_state=rng)

# MLE fit (free loc)
c_hat, loc_hat, scale_hat = stats.weibull_max.fit(samples_scipy)

# If you know the upper endpoint in advance, fixing loc can be much more stable.
c_hat_fix, loc_hat_fix, scale_hat_fix = stats.weibull_max.fit(samples_scipy, floc=loc_true)

{
    "true": (c_true, loc_true, scale_true),
    "fit (free loc)": (c_hat, loc_hat, scale_hat),
    "fit (fixed loc=true loc)": (c_hat_fix, loc_hat_fix, scale_hat_fix),
}

{'true': (1.5, 3.0, 0.9),
 'fit (free loc)': (1.5248013159497331,
  2.9960156700152405,
  0.8989292504413816),
 'fit (fixed loc=true loc)': (1.5377865960944537, 3.0, 0.9045985622077737)}

In [9]:
# (A) KS test + parametric bootstrap (illustration)

c0, loc0, scale0 = 1.4, 2.0, 1.0
n0 = 400

d0 = stats.weibull_max(c0, loc=loc0, scale=scale0)
data0 = d0.rvs(size=n0, random_state=rng)

# KS test when the null distribution is fully specified
ks_stat, ks_p = stats.kstest(data0, d0.cdf)

# Now pretend loc is known but (c, scale) are estimated and calibrate with a bootstrap
c_hat, loc_hat, scale_hat = stats.weibull_max.fit(data0, floc=loc0)
d_hat = stats.weibull_max(c_hat, loc=loc0, scale=scale_hat)
ks_obs, _ = stats.kstest(data0, d_hat.cdf)

B = 200
ks_boot = np.empty(B)
for b in range(B):
    sim = d_hat.rvs(size=n0, random_state=rng)
    c_b, _, scale_b = stats.weibull_max.fit(sim, floc=loc0)
    d_b = stats.weibull_max(c_b, loc=loc0, scale=scale_b)
    ks_boot[b] = stats.kstest(sim, d_b.cdf).statistic

p_boot = float(np.mean(ks_boot >= ks_obs))

{
    "KS (known params) statistic": float(ks_stat),
    "KS (known params) pvalue": float(ks_p),
    "KS (fit params) statistic": float(ks_obs),
    "bootstrap pvalue (approx)": p_boot,
}

{'KS (known params) statistic': 0.02737955313267515,
 'KS (known params) pvalue': 0.9170449942340866,
 'KS (fit params) statistic': 0.023270299995085675,
 'bootstrap pvalue (approx)': 0.855}

In [10]:
# (B) Simple grid posterior for (c, scale) when loc is known

loc_known = 2.0
x = data0

y = loc_known - x
if np.any(y < 0):
    raise ValueError("Data must satisfy x <= loc_known")


# Log-likelihood for y ~ Weibull_min(c, scale)
def weibull_min_loglik(y, c, scale):
    y = np.asarray(y, dtype=float)
    c = float(c)
    scale = float(scale)
    if c <= 0 or scale <= 0:
        return -np.inf
    if np.any(y < 0):
        return -np.inf

    # sum log(c/scale) + (c-1) log(y/scale) - (y/scale)^c
    # handle y=0 safely (log 0); in continuous data this is usually not hit.
    logy = np.where(y > 0, np.log(y), -np.inf)
    return float(
        y.size * (np.log(c) - np.log(scale))
        + (c - 1) * np.sum(logy - np.log(scale))
        - np.sum((y / scale) ** c)
    )


# Priors: independent log-normal on c and scale (mildly informative)
def lognormal_logpdf(z, mu, sigma):
    z = np.asarray(z, dtype=float)
    if np.any(z <= 0):
        return -np.inf
    return float(
        -np.sum(np.log(z))
        - z.size * np.log(sigma * np.sqrt(2 * np.pi))
        - 0.5 * np.sum(((np.log(z) - mu) / sigma) ** 2)
    )


c_grid = np.linspace(0.3, 4.0, 220)
scale_grid = np.linspace(0.2, 2.5, 220)

log_post = np.empty((c_grid.size, scale_grid.size))
for i, c_val in enumerate(c_grid):
    for j, s_val in enumerate(scale_grid):
        ll = weibull_min_loglik(y, c_val, s_val)
        lp = lognormal_logpdf(np.array([c_val]), mu=np.log(1.2), sigma=0.7) + lognormal_logpdf(
            np.array([s_val]), mu=np.log(1.0), sigma=0.7
        )
        log_post[i, j] = ll + lp

# Stabilize and normalize
log_post -= np.max(log_post)
post = np.exp(log_post)
post /= np.sum(post)

# MAP estimate
idx = np.unravel_index(np.argmax(post), post.shape)
c_map = float(c_grid[idx[0]])
scale_map = float(scale_grid[idx[1]])

# Posterior means (grid approximation)
c_mean = float(np.sum(c_grid[:, None] * post))
scale_mean = float(np.sum(scale_grid[None, :] * post))

fig = px.imshow(
    post,
    x=scale_grid,
    y=c_grid,
    origin="lower",
    aspect="auto",
    labels={"x": "scale", "y": "c", "color": "posterior mass"},
    title="Grid posterior over (c, scale) with known loc",
)
fig.show()

{
    "MAP (c, scale)": (c_map, scale_map),
    "posterior mean (c, scale)": (c_mean, scale_mean),
}

{'MAP (c, scale)': (1.3812785388127855, 0.9876712328767123),
 'posterior mean (c, scale)': (1.3877289041166823, 0.994075094693863)}

In [11]:
# (C) Generative example: one-sided errors bounded above

x = np.linspace(0, 10, 250)
true_curve = 2.0 + np.sin(x)

# noise <= 0 with tunable left tail
noise = weibull_max_rvs_numpy(c=1.3, loc=0.0, scale=0.35, size=x.size, rng=rng)
obs = true_curve + noise

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=true_curve, mode="lines", name="true curve"))
fig.add_trace(
    go.Scatter(
        x=x,
        y=obs,
        mode="markers",
        name="observations",
        marker=dict(size=5, opacity=0.7),
    )
)
fig.update_layout(
    title="One-sided (upper-bounded) noise via weibull_max",
    xaxis_title="x",
    yaxis_title="y",
)
fig.show()