In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os
import plotly.io as pio

from scipy import optimize, special, stats

# Plotly rendering (CKC convention)
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")

# Reproducibility
rng = np.random.default_rng(7)

np.set_printoptions(precision=6, suppress=True)

In [2]:
def genhalflogistic_support(c: float) -> tuple[float, float]:
    if c <= 0:
        raise ValueError("c must be > 0")
    return 0.0, 1.0 / c


def genhalflogistic_logpdf(x: np.ndarray, c: float) -> np.ndarray:
    '''Log-PDF of the standardized genhalflogistic distribution.

    Returns -inf outside [0, 1/c].
    '''
    x = np.asarray(x, dtype=float)
    if c <= 0:
        return np.full_like(x, -np.inf)

    a, b = genhalflogistic_support(c)
    out = np.full_like(x, -np.inf)

    mask = (x >= a) & (x <= b)
    xm = x[mask]
    u = 1.0 - c * xm  # in [0, 1]

    # Use log-space for stability near the boundary u -> 0.
    log_u = np.log(u)
    log_t = (1.0 / c) * log_u

    log_pdf = np.log(2.0) + (1.0 / c - 1.0) * log_u - 2.0 * np.log1p(np.exp(log_t))

    out[mask] = log_pdf
    return out


def genhalflogistic_pdf(x: np.ndarray, c: float) -> np.ndarray:
    return np.exp(genhalflogistic_logpdf(x, c))


def genhalflogistic_cdf(x: np.ndarray, c: float) -> np.ndarray:
    x = np.asarray(x, dtype=float)
    if c <= 0:
        return np.full_like(x, np.nan)

    a, b = genhalflogistic_support(c)
    out = np.zeros_like(x)

    out[x >= b] = 1.0

    mask = (x > a) & (x < b)
    xm = x[mask]
    u = 1.0 - c * xm  # in (0, 1)

    t = np.exp((1.0 / c) * np.log(u))  # (1 - cx)^(1/c)
    out[mask] = (1.0 - t) / (1.0 + t)
    return out


def genhalflogistic_ppf(q: np.ndarray, c: float) -> np.ndarray:
    q = np.asarray(q, dtype=float)
    if c <= 0:
        return np.full_like(q, np.nan)

    if np.any((q < 0) | (q > 1)):
        raise ValueError("q must be in [0, 1]")

    # log((1-q)/(1+q)) computed stably
    log_r = np.log1p(-q) - np.log1p(q)
    r_pow = np.exp(c * log_r)
    return (1.0 - r_pow) / c


# Quick correctness check against SciPy (a few points)
c0 = 0.8
x_test = np.array([0.0, 0.05, 0.2, 0.8 / c0])

np.column_stack(
    [
        x_test,
        genhalflogistic_pdf(x_test, c0),
        stats.genhalflogistic.pdf(x_test, c0),
        genhalflogistic_cdf(x_test, c0),
        stats.genhalflogistic.cdf(x_test, c0),
    ]
)

array([[0.      , 0.5     , 0.5     , 0.      , 0.      ],
       [0.05    , 0.520494, 0.520494, 0.025508, 0.025508],
       [0.2     , 0.588225, 0.588225, 0.108542, 0.108542],
       [1.      , 1.040529, 1.040529, 0.76406 , 0.76406 ]])

In [3]:
def Ey_pow(a: np.ndarray) -> np.ndarray:
    '''E[Y^a] for Y ~ 2/(1+Y)^2 on (0,1), valid for a > 0.'''
    a = np.asarray(a, dtype=float)
    if np.any(a <= 0):
        raise ValueError("a must be > 0")
    return -1.0 + a * (special.digamma((a + 1.0) / 2.0) - special.digamma(a / 2.0))


def genhalflogistic_raw_moment(n: int, c: float) -> float:
    '''Raw moment E[X^n] for standardized X ~ genhalflogistic(c).'''
    if n < 0:
        raise ValueError("n must be >= 0")
    if c <= 0:
        raise ValueError("c must be > 0")

    from math import comb

    js = np.arange(n + 1)
    coeff = np.array([(-1) ** j * comb(n, j) for j in js], dtype=float)

    ey = np.empty_like(js, dtype=float)
    ey[0] = 1.0
    if n >= 1:
        ey[1:] = Ey_pow(c * js[1:])

    return float(coeff @ ey / (c**n))


def genhalflogistic_moments(c: float) -> dict:
    m1 = genhalflogistic_raw_moment(1, c)
    m2 = genhalflogistic_raw_moment(2, c)
    m3 = genhalflogistic_raw_moment(3, c)
    m4 = genhalflogistic_raw_moment(4, c)

    var = m2 - m1**2
    std = np.sqrt(var)

    mu3 = m3 - 3 * m1 * m2 + 2 * m1**3
    mu4 = m4 - 4 * m1 * m3 + 6 * (m1**2) * m2 - 3 * m1**4

    skew = mu3 / (std**3)
    ex_kurt = mu4 / (var**2) - 3

    entropy = 2.0 - (2.0 * c + 1.0) * np.log(2.0)

    return {
        'mean': m1,
        'var': var,
        'skew': skew,
        'excess_kurtosis': ex_kurt,
        'entropy': entropy,
    }


for c in [0.2, 0.5, 1.0, 2.0]:
    m = genhalflogistic_moments(c)
    print(f"c={c:>4}: mean={m['mean']:.6f}, var={m['var']:.6f}, skew={m['skew']:.4f}, ex.kurt={m['excess_kurtosis']:.4f}")

c= 0.2: mean=1.116864, var=0.611420, skew=0.7568, ex.kurt=0.1124
c= 0.5: mean=0.858407, var=0.241944, skew=0.1301, ex.kurt=-0.9674
c= 1.0: mean=0.613706, var=0.078188, skew=-0.4861, ex.kurt=-0.9072
c= 2.0: mean=0.386294, var=0.017443, skew=-1.2410, ex.kurt=0.4707


In [4]:
# Compare our moment formulas to SciPy's numerical mean/var/entropy.

c_check = 0.8
m = genhalflogistic_moments(c_check)

dist = stats.genhalflogistic(c_check)
print('mean  (formula):', m['mean'])
print('mean  (SciPy):  ', dist.mean())
print('var   (formula):', m['var'])
print('var   (SciPy):  ', dist.var())
print('entropy (formula):', m['entropy'])
print('entropy (SciPy):  ', dist.entropy())

mean  (formula): 0.6935424053619349
mean  (SciPy):   0.6935424053619348
var   (formula): 0.11739913996796586
var   (SciPy):   0.11739913996796691
entropy (formula): 0.1978173305441422
entropy (SciPy):   0.1978173305441422


In [5]:
# Compare shapes using the normalized coordinate z = c x in [0,1].
# (Different c values have different x-supports, so z makes comparisons easier.)

c_vals = [0.2, 0.5, 1.0, 2.0, 5.0]

z = np.linspace(0, 1 - 1e-6, 800)

fig = go.Figure()
for c in c_vals:
    x = z / c
    pdf_z = genhalflogistic_pdf(x, c) / c  # density of Z=cX
    fig.add_trace(go.Scatter(x=z, y=pdf_z, mode='lines', name=f'c={c}'))

fig.update_layout(
    title='Shape comparison via Z = cX (support fixed to [0,1])',
    xaxis_title='z = c x',
    yaxis_title='density of Z',
    width=900,
    height=420,
)
fig

In [6]:
def genhalflogistic_loglikelihood(x: np.ndarray, c: float) -> float:
    x = np.asarray(x, dtype=float)
    if c <= 0:
        return -np.inf
    if np.any((x < 0) | (x > 1.0 / c)):
        return -np.inf
    return float(np.sum(genhalflogistic_logpdf(x, c)))


def genhalflogistic_mle_c(x: np.ndarray) -> float:
    '''1D MLE for c in the standardized model (loc=0, scale=1).'''
    x = np.asarray(x, dtype=float)
    x_max = float(np.max(x))

    # Feasible region: (0, 1/x_max). Keep away from the boundary for numerical stability.
    upper = (1.0 / x_max) * (1.0 - 1e-6)

    def nll(c: float) -> float:
        ll = genhalflogistic_loglikelihood(x, c)
        return np.inf if not np.isfinite(ll) else -ll

    res = optimize.minimize_scalar(nll, bounds=(1e-6, upper), method='bounded')
    return float(res.x)


# Demo: recover c from simulated data
c_true = 0.8
x_sim = stats.genhalflogistic.rvs(c_true, size=2000, random_state=rng)

c_hat = genhalflogistic_mle_c(x_sim)

print('c_true:', c_true)
print('c_hat (MLE, loc=0, scale=1):', c_hat)

c_true: 0.8
c_hat (MLE, loc=0, scale=1): 0.8009576229790984


In [7]:
def genhalflogistic_rvs_numpy(c: float, size: int, rng: np.random.Generator) -> np.ndarray:
    if c <= 0:
        raise ValueError('c must be > 0')
    u = rng.random(size)
    return genhalflogistic_ppf(u, c)


# Compare NumPy-only sampler to SciPy sampler
c0 = 0.8
n = 50_000

samples_numpy = genhalflogistic_rvs_numpy(c0, n, rng)
samples_scipy = stats.genhalflogistic.rvs(c0, size=n, random_state=rng)

# Quick distributional check: KS test against the known CDF (valid when parameters are fixed)
ks_numpy = stats.kstest(samples_numpy, stats.genhalflogistic(c0).cdf)
ks_scipy = stats.kstest(samples_scipy, stats.genhalflogistic(c0).cdf)

print('KS (NumPy sampler) :', ks_numpy)
print('KS (SciPy sampler) :', ks_scipy)

print('Sample mean (NumPy):', samples_numpy.mean())
print('Theoretical mean   :', genhalflogistic_moments(c0)['mean'])

KS (NumPy sampler) : KstestResult(statistic=0.004323522460566553, pvalue=0.30646164988542945, statistic_location=0.8517412564229457, statistic_sign=-1)
KS (SciPy sampler) : KstestResult(statistic=0.0038695613076692448, pvalue=0.4413362972326398, statistic_location=1.0575559553964073, statistic_sign=1)
Sample mean (NumPy): 0.6955189103487112
Theoretical mean   : 0.6935424053619349


In [8]:
# PDF + histogram (Monte Carlo)
c0 = 0.8

# Avoid the boundary x=1/c0 where the density may spike (c>1) or be sensitive numerically.
x_max = np.nextafter(1.0 / c0, 0.0)
x = np.linspace(0.0, x_max, 800)

fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=samples_numpy,
        nbinsx=70,
        histnorm='probability density',
        name='Monte Carlo (NumPy-only)',
        opacity=0.55,
    )
)
fig.add_trace(
    go.Scatter(
        x=x,
        y=genhalflogistic_pdf(x, c0),
        mode='lines',
        name='True PDF',
        line=dict(width=3),
    )
)

fig.update_layout(
    title=f"genhalflogistic(c={c0}): histogram vs PDF",
    xaxis_title='x',
    yaxis_title='density',
    width=900,
    height=420,
)
fig

In [9]:
# CDF: theoretical vs empirical
c0 = 0.8
x = np.linspace(0.0, 1.0 / c0, 600)

emp_x = np.sort(samples_numpy)
emp_cdf = np.arange(1, emp_x.size + 1) / emp_x.size

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=genhalflogistic_cdf(x, c0), mode='lines', name='True CDF'))
fig.add_trace(
    go.Scatter(
        x=emp_x[::400],
        y=emp_cdf[::400],
        mode='markers',
        name='Empirical CDF (subsampled)',
        marker=dict(size=4, opacity=0.6),
    )
)

fig.update_layout(
    title=f"genhalflogistic(c={c0}): theoretical CDF vs empirical CDF",
    xaxis_title='x',
    yaxis_title='CDF',
    width=900,
    height=420,
)
fig

In [10]:
c0 = 0.8

dist = stats.genhalflogistic(c0, loc=0, scale=1)

x = np.linspace(0, 1.0 / c0, 8)
print('x:', x)
print('pdf:', dist.pdf(x))
print('cdf:', dist.cdf(x))
print('ppf:', dist.ppf([0.1, 0.5, 0.9]))

# Random variates
r = dist.rvs(size=5, random_state=rng)
print('rvs:', r)

x: [0.       0.178571 0.357143 0.535714 0.714286 0.892857 1.071429 1.25    ]
pdf: [0.5      0.577952 0.669933 0.77612  0.892186 1.000554 1.039049 0.      ]
cdf: [0.       0.096047 0.207249 0.336163 0.485046 0.654412 0.838528 1.      ]
ppf: [0.185392 0.730945 1.131448]
rvs: [0.997256 0.890909 1.170096 1.209137 0.418321]


In [11]:
# Fitting (MLE) with SciPy
import warnings

c_true = 0.8
x = stats.genhalflogistic.rvs(c_true, size=1500, random_state=rng)

# Unconstrained fit (estimates c, loc, scale)
# SciPy may emit RuntimeWarnings internally during optimization when trying invalid parameter values.
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    c_hat, loc_hat, scale_hat = stats.genhalflogistic.fit(x)

# Fit with loc=0, scale=1 fixed to match the standardized model
c_hat_fixed, loc_fixed, scale_fixed = stats.genhalflogistic.fit(x, floc=0, fscale=1)

print('true params:', (c_true, 0.0, 1.0))
print('fit (free):', (c_hat, loc_hat, scale_hat))
print('fit (fixed loc=0, scale=1):', (c_hat_fixed, loc_fixed, scale_fixed))

true params: (0.8, 0.0, 1.0)
fit (free): (0.8303250105495901, 0.00041257333440255226, 1.036444080494296)
fit (fixed loc=0, scale=1): (0.8006713867187498, 0, 1)


In [12]:
# Example: Likelihood ratio test for H0: c = c0 (standardized model)

c_true = 0.8
x = stats.genhalflogistic.rvs(c_true, size=1200, random_state=rng)

c_mle = genhalflogistic_mle_c(x)

# Null value must be feasible: c0 < 1/max(x) (because support is [0, 1/c0])
c_upper = 1.0 / float(np.max(x))

c0 = 0.6
if c0 >= c_upper:
    c0 = 0.9 * c_upper

ll_mle = genhalflogistic_loglikelihood(x, c_mle)
ll_0 = genhalflogistic_loglikelihood(x, c0)

lrt = 2 * (ll_mle - ll_0)
p_value = stats.chi2.sf(lrt, df=1)

print('c_true:', c_true)
print('c_mle :', c_mle)
print('c0 (null):', c0)
print('LRT statistic:', lrt)
print('Approx p-value (chi^2_1):', p_value)

# Note: because the support depends on c, small-sample behavior can deviate from the chi-square approximation.

c_true: 0.8
c_mle : 0.8003833488776558
c0 (null): 0.6
LRT statistic: 502.6391867823797
Approx p-value (chi^2_1): 2.5336085331253253e-111


In [13]:
# Example: Bayesian posterior over c via a simple grid (standardized model)

x = stats.genhalflogistic.rvs(0.8, size=400, random_state=rng)
x_max = float(np.max(x))

# Feasible c range: (0, 1/x_max)
c_grid = np.linspace(1e-3, (1.0 / x_max) * 0.999, 800)

# Prior: Gamma(shape=k, scale=theta)
k, theta = 2.0, 1.0
log_prior = stats.gamma(a=k, scale=theta).logpdf(c_grid)

log_like = np.array([genhalflogistic_loglikelihood(x, c) for c in c_grid])
log_post_unnorm = log_prior + log_like

# Normalize in a stable way
log_post_unnorm -= np.max(log_post_unnorm)
post_unnorm = np.exp(log_post_unnorm)

# Approximate continuous normalization using trapezoidal rule
Z = np.trapz(post_unnorm, c_grid)
post = post_unnorm / Z

# Posterior summaries
cdf_post = np.cumsum((post[:-1] + post[1:]) / 2 * np.diff(c_grid))
cdf_post = np.concatenate([[0.0], cdf_post])

post_mean = float(np.trapz(c_grid * post, c_grid))
ci_low = float(np.interp(0.025, cdf_post, c_grid))
ci_high = float(np.interp(0.975, cdf_post, c_grid))

print('posterior mean:', post_mean)
print('95% credible interval:', (ci_low, ci_high))

fig = go.Figure()
fig.add_trace(go.Scatter(x=c_grid, y=post, mode='lines', name='posterior density'))
fig.add_vline(x=post_mean, line_dash='dash', line_color='black', annotation_text='mean')
fig.add_vrect(x0=ci_low, x1=ci_high, fillcolor='gray', opacity=0.15, line_width=0)
fig.update_layout(
    title='Posterior over c (grid approximation)',
    xaxis_title='c',
    yaxis_title='density',
    width=900,
    height=420,
)
fig

posterior mean: 0.8007104667025451
95% credible interval: (0.7937395928868959, 0.8035088739427898)


In [14]:
# Example: Generating bounded synthetic data
# Suppose we want a bounded positive feature in [0, B].

B = 3.0
c = 1.0  # standardized upper bound is 1/c = 1

# Use scale=B so support becomes [0, B]
x_bounded = stats.genhalflogistic.rvs(c, loc=0, scale=B, size=10_000, random_state=rng)

print('min/max:', x_bounded.min(), x_bounded.max())

fig = px.histogram(x_bounded, nbins=60, histnorm='probability density', title='Bounded synthetic feature in [0, B]')
fig.update_layout(width=900, height=420, xaxis_title='x')
fig

min/max: 6.972138548466678e-05 2.9999955881114526
