In [1]:
import math

import numpy as np

import scipy
from scipy import special, stats

import plotly
import plotly.express as px
import plotly.graph_objects as go
import os
import plotly.io as pio

pio.templates.default = "plotly_white"
pio.renderers.default = os.environ.get("PLOTLY_RENDERER", "notebook")

SEED = 7
rng = np.random.default_rng(SEED)

np.set_printoptions(precision=6, suppress=True)

print("python", __import__("sys").version.split()[0])
print("numpy ", np.__version__)
print("scipy ", scipy.__version__)
print("plotly", plotly.__version__)


python 3.12.9
numpy  1.26.2
scipy  1.15.0
plotly 6.5.2


In [2]:
def nig_validate(alpha: float, beta: float, delta: float) -> None:
    if not (alpha > 0):
        raise ValueError("alpha must be > 0")
    if not (delta > 0):
        raise ValueError("delta must be > 0")
    if not (abs(beta) < alpha):
        raise ValueError("need |beta| < alpha so gamma = sqrt(alpha^2 - beta^2) is real")


def nig_gamma(alpha: float, beta: float) -> float:
    nig_validate(alpha, beta, delta=1.0)
    return float(math.sqrt(alpha * alpha - beta * beta))


def nig_logpdf(x: np.ndarray, alpha: float, beta: float, delta: float, mu: float) -> np.ndarray:
    '''Log-PDF using a numerically stable Bessel-K computation.

    Uses scipy.special.kve(1, z) = exp(z) * K_1(z) to avoid underflow for large z.
    '''
    nig_validate(alpha, beta, delta)
    x = np.asarray(x, dtype=float)

    xm = x - mu
    s2 = delta * delta + xm * xm
    s = np.sqrt(s2)

    gamma = math.sqrt(alpha * alpha - beta * beta)
    z = alpha * s

    # log K1(z) via scaled Bessel: K1(z) = exp(-z) * kve(1,z)
    log_k1 = np.log(special.kve(1.0, z)) - z

    return (
        math.log(alpha * delta)
        - math.log(math.pi)
        - np.log(s)
        + delta * gamma
        + beta * xm
        + log_k1
    )


def nig_pdf(x: np.ndarray, alpha: float, beta: float, delta: float, mu: float) -> np.ndarray:
    return np.exp(nig_logpdf(x, alpha, beta, delta, mu))


def to_scipy_params(alpha: float, beta: float, delta: float, mu: float) -> tuple[float, float, float, float]:
    '''Map (alpha,beta,delta,mu) to SciPy's (a,b,loc,scale).'''
    nig_validate(alpha, beta, delta)
    a = alpha * delta
    b = beta * delta
    return float(a), float(b), float(mu), float(delta)


def from_scipy_params(a: float, b: float, loc: float, scale: float) -> tuple[float, float, float, float]:
    '''Map SciPy's (a,b,loc,scale) to (alpha,beta,delta,mu).'''
    if not (scale > 0):
        raise ValueError("scale must be > 0")
    alpha = a / scale
    beta = b / scale
    delta = scale
    mu = loc
    nig_validate(alpha, beta, delta)
    return float(alpha), float(beta), float(delta), float(mu)


# Quick sanity check: our PDF matches SciPy's parameter mapping
from scipy.stats import norminvgauss

alpha, beta, delta, mu = 2.5, 0.8, 1.2, -0.3
a, b, loc, scale = to_scipy_params(alpha, beta, delta, mu)
rv = norminvgauss(a, b, loc=loc, scale=scale)

xg = np.linspace(rv.ppf(0.001), rv.ppf(0.999), 400)
max_abs_diff = float(np.max(np.abs(rv.pdf(xg) - nig_pdf(xg, alpha, beta, delta, mu))))
max_abs_diff


7.771561172376096e-16

In [3]:
def nig_mean(alpha: float, beta: float, delta: float, mu: float) -> float:
    nig_validate(alpha, beta, delta)
    gamma = math.sqrt(alpha * alpha - beta * beta)
    return float(mu + delta * beta / gamma)


def nig_var(alpha: float, beta: float, delta: float) -> float:
    nig_validate(alpha, beta, delta)
    gamma = math.sqrt(alpha * alpha - beta * beta)
    return float(delta * alpha * alpha / (gamma**3))


def nig_skew(alpha: float, beta: float, delta: float) -> float:
    nig_validate(alpha, beta, delta)
    gamma = math.sqrt(alpha * alpha - beta * beta)
    return float(3.0 * beta / (alpha * math.sqrt(delta * gamma)))


def nig_excess_kurt(alpha: float, beta: float, delta: float) -> float:
    nig_validate(alpha, beta, delta)
    gamma = math.sqrt(alpha * alpha - beta * beta)
    return float(3.0 * (1.0 + 4.0 * (beta * beta) / (alpha * alpha)) / (delta * gamma))


def nig_mgf(t: np.ndarray, alpha: float, beta: float, delta: float, mu: float) -> np.ndarray:
    '''MGF evaluated on an array t (real); returns nan outside the domain.'''
    nig_validate(alpha, beta, delta)
    t = np.asarray(t, dtype=float)
    gamma = math.sqrt(alpha * alpha - beta * beta)

    inside = alpha * alpha - (beta + t) ** 2
    out = np.full_like(t, np.nan, dtype=float)
    mask = inside > 0
    out[mask] = np.exp(mu * t[mask] + delta * (gamma - np.sqrt(inside[mask])))
    return out


def nig_cf(u: np.ndarray, alpha: float, beta: float, delta: float, mu: float) -> np.ndarray:
    '''Characteristic function for real u.'''
    nig_validate(alpha, beta, delta)
    u = np.asarray(u, dtype=float)
    gamma = math.sqrt(alpha * alpha - beta * beta)
    inner = np.sqrt(alpha * alpha - (beta + 1j * u) ** 2)
    return np.exp(1j * mu * u + delta * (gamma - inner))


alpha, beta, delta, mu = 2.5, 0.8, 1.2, -0.3
{
    "mean": nig_mean(alpha, beta, delta, mu),
    "var": nig_var(alpha, beta, delta),
    "skew": nig_skew(alpha, beta, delta),
    "excess_kurt": nig_excess_kurt(alpha, beta, delta),
}


{'mean': 0.10531231768391913,
 'var': 0.5644389450812155,
 'skew': 0.569429411031021,
 'excess_kurt': 1.4878339661647202}

In [4]:
# Entropy (Monte Carlo estimate): H(X) = -E[log f(X)]
# We'll reuse the NumPy-only sampler later, but SciPy's sampler works too.

from scipy.stats import norminvgauss

alpha, beta, delta, mu = 2.0, 0.5, 1.3, -0.2
a, b, loc, scale = to_scipy_params(alpha, beta, delta, mu)
rv = norminvgauss(a, b, loc=loc, scale=scale)

x = rv.rvs(size=50_000, random_state=rng)
entropy_mc = float(-np.mean(nig_logpdf(x, alpha, beta, delta, mu)))
entropy_mc


1.2272073243850334

In [5]:
def nig_loglik(alpha: float, beta: float, delta: float, mu: float, x: np.ndarray) -> float:
    return float(np.sum(nig_logpdf(x, alpha, beta, delta, mu)))


alpha, beta, delta, mu = 2.5, 0.8, 1.2, -0.3
a, b, loc, scale = to_scipy_params(alpha, beta, delta, mu)
rv = stats.norminvgauss(a, b, loc=loc, scale=scale)

x = rv.rvs(size=2_000, random_state=rng)
ll_true = nig_loglik(alpha, beta, delta, mu, x)

# Compare to a slightly misspecified parameter (lower beta)
ll_alt = nig_loglik(alpha, beta * 0.7, delta, mu, x)

{"loglik_true": ll_true, "loglik_alt": ll_alt, "diff": ll_true - ll_alt}


{'loglik_true': -2185.389754329737,
 'loglik_alt': -2212.5723866013323,
 'diff': 27.18263227159514}

In [6]:
def sample_invgauss_msh(size: int, nu: float, lam: float, rng: np.random.Generator) -> np.ndarray:
    '''Sample IG(nu, lam) using the Michael–Schucany–Haas method.

    Parameterization: mean = nu, shape = lam.
    '''
    if not (nu > 0):
        raise ValueError("nu must be > 0")
    if not (lam > 0):
        raise ValueError("lam must be > 0")

    v = rng.normal(size=size)
    y = v * v

    nu2 = nu * nu

    x = (
        nu
        + (nu2 * y) / (2.0 * lam)
        - (nu / (2.0 * lam)) * np.sqrt(4.0 * nu * lam * y + nu2 * y * y)
    )

    u = rng.uniform(size=size)
    return np.where(u <= nu / (nu + x), x, nu2 / x)


def sample_nig(size: int, alpha: float, beta: float, delta: float, mu: float, rng: np.random.Generator) -> np.ndarray:
    nig_validate(alpha, beta, delta)
    gamma = math.sqrt(alpha * alpha - beta * beta)
    nu = delta / gamma
    lam = delta * delta

    v = sample_invgauss_msh(size=size, nu=nu, lam=lam, rng=rng)
    z = rng.normal(size=size)
    return mu + beta * v + np.sqrt(v) * z


# Quick simulation check: sample moments vs formulas
alpha, beta, delta, mu = 2.0, 0.5, 1.3, -0.2
x = sample_nig(size=200_000, alpha=alpha, beta=beta, delta=delta, mu=mu, rng=rng)

{
    "sample_mean": float(x.mean()),
    "theory_mean": nig_mean(alpha, beta, delta, mu),
    "sample_var": float(x.var(ddof=0)),
    "theory_var": nig_var(alpha, beta, delta),
}


{'sample_mean': 0.13341510615123034,
 'theory_mean': 0.13565855667130944,
 'sample_var': 0.7147789033673897,
 'theory_var': 0.7160715875654602}

In [7]:
def plot_nig_pdfs(param_sets: list[dict], q_low: float = 0.001, q_high: float = 0.999) -> go.Figure:
    from scipy.stats import norminvgauss

    fig = go.Figure()

    for ps in param_sets:
        alpha, beta, delta, mu = ps["alpha"], ps["beta"], ps["delta"], ps["mu"]
        a, b, loc, scale = to_scipy_params(alpha, beta, delta, mu)
        rv = norminvgauss(a, b, loc=loc, scale=scale)

        xs = np.linspace(rv.ppf(q_low), rv.ppf(q_high), 600)
        ys = nig_pdf(xs, alpha, beta, delta, mu)

        label = f"α={alpha:g}, β={beta:g}, δ={delta:g}, μ={mu:g}"
        fig.add_trace(go.Scatter(x=xs, y=ys, mode="lines", name=label))

    fig.update_layout(title="Normal Inverse Gaussian PDFs", xaxis_title="x", yaxis_title="f(x)")
    return fig


param_sets = [
    {"alpha": 3.0, "beta": 0.0, "delta": 1.0, "mu": 0.0},  # symmetric, lighter tails
    {"alpha": 1.6, "beta": 0.0, "delta": 1.0, "mu": 0.0},  # symmetric, heavier tails
    {"alpha": 2.0, "beta": 0.7, "delta": 1.0, "mu": 0.0},  # right-skew
    {"alpha": 2.0, "beta": -0.7, "delta": 1.0, "mu": 0.0},  # left-skew
]

fig = plot_nig_pdfs(param_sets)
fig.show()


In [8]:
# PDF + CDF for one parameter set (CDF via SciPy)
from scipy.stats import norminvgauss

alpha, beta, delta, mu = 2.0, 0.7, 1.2, -0.3
a, b, loc, scale = to_scipy_params(alpha, beta, delta, mu)
rv = norminvgauss(a, b, loc=loc, scale=scale)

xs = np.linspace(rv.ppf(0.001), rv.ppf(0.999), 700)

fig_pdf = go.Figure()
fig_pdf.add_trace(go.Scatter(x=xs, y=rv.pdf(xs), mode="lines", name="pdf"))
fig_pdf.update_layout(title="NIG PDF", xaxis_title="x", yaxis_title="f(x)")

fig_cdf = go.Figure()
fig_cdf.add_trace(go.Scatter(x=xs, y=rv.cdf(xs), mode="lines", name="cdf"))
fig_cdf.update_layout(title="NIG CDF", xaxis_title="x", yaxis_title="F(x)")

fig_pdf.show()
fig_cdf.show()


In [9]:
# Monte Carlo samples vs PDF
alpha, beta, delta, mu = 2.0, 0.7, 1.2, -0.3
a, b, loc, scale = to_scipy_params(alpha, beta, delta, mu)
rv = stats.norminvgauss(a, b, loc=loc, scale=scale)

n = 80_000
x = sample_nig(size=n, alpha=alpha, beta=beta, delta=delta, mu=mu, rng=rng)

xs = np.linspace(rv.ppf(0.001), rv.ppf(0.999), 600)

fig = px.histogram(
    x,
    nbins=90,
    histnorm="probability density",
    title=f"Monte Carlo samples vs PDF (n={n:,})",
    labels={"value": "x"},
)
fig.add_trace(go.Scatter(x=xs, y=rv.pdf(xs), mode="lines", name="theoretical pdf"))
fig.update_layout(yaxis_title="density")
fig.show()


In [10]:
# Empirical CDF vs theoretical CDF
alpha, beta, delta, mu = 2.0, 0.7, 1.2, -0.3
a, b, loc, scale = to_scipy_params(alpha, beta, delta, mu)
rv = stats.norminvgauss(a, b, loc=loc, scale=scale)

n = 30_000
x = sample_nig(size=n, alpha=alpha, beta=beta, delta=delta, mu=mu, rng=rng)

xs = np.sort(x)
ys = np.arange(1, n + 1) / n

xg = np.linspace(rv.ppf(0.001), rv.ppf(0.999), 600)

fig = go.Figure()
fig.add_trace(go.Scatter(x=xs, y=ys, mode="lines", name="empirical CDF"))
fig.add_trace(go.Scatter(x=xg, y=rv.cdf(xg), mode="lines", name="theoretical CDF"))
fig.update_layout(title="Empirical CDF vs theoretical CDF", xaxis_title="x", yaxis_title="F(x)")
fig.show()


In [11]:
from scipy.stats import norminvgauss

alpha, beta, delta, mu = 2.2, 0.6, 1.4, -0.1
a, b, loc, scale = to_scipy_params(alpha, beta, delta, mu)

rv = norminvgauss(a, b, loc=loc, scale=scale)

# Basic API
x0 = np.array([-2.0, 0.0, 1.0])
out = {
    "pdf(x0)": rv.pdf(x0),
    "cdf(x0)": rv.cdf(x0),
    "mean": rv.mean(),
    "var": rv.var(),
}
out


{'pdf(x0)': array([0.008404, 0.519158, 0.28318 ]),
 'cdf(x0)': array([0.002845, 0.371315, 0.820179]),
 'mean': 0.29686269665968856,
 'var': 0.7145890817830703}

In [12]:
# Fit example: recover parameters from synthetic data
alpha, beta, delta, mu = 2.0, 0.5, 1.3, -0.2
a_true, b_true, loc_true, scale_true = to_scipy_params(alpha, beta, delta, mu)

rv_true = norminvgauss(a_true, b_true, loc=loc_true, scale=scale_true)
data = rv_true.rvs(size=3_000, random_state=rng)

a_hat, b_hat, loc_hat, scale_hat = norminvgauss.fit(data)

{
    "true": (a_true, b_true, loc_true, scale_true),
    "hat": (a_hat, b_hat, loc_hat, scale_hat),
}


{'true': (2.6, 0.65, -0.2, 1.3),
 'hat': (3.080842815399469,
  0.6938365831228384,
  -0.21570215913180396,
  1.4409291715704364)}

In [13]:
# A) Parametric bootstrap KS test demo (small B for speed)
# We'll generate data from a known NIG, fit it, and test goodness-of-fit.

from scipy.stats import kstest, norminvgauss

rng_local = np.random.default_rng(123)

alpha, beta, delta, mu = 2.0, 0.5, 1.3, -0.2
a_true, b_true, loc_true, scale_true = to_scipy_params(alpha, beta, delta, mu)
rv_true = norminvgauss(a_true, b_true, loc=loc_true, scale=scale_true)

n = 800
x = rv_true.rvs(size=n, random_state=rng_local)

# Fit the model
a_hat, b_hat, loc_hat, scale_hat = norminvgauss.fit(x)
rv_hat = norminvgauss(a_hat, b_hat, loc=loc_hat, scale=scale_hat)

# KS statistic on observed data against fitted CDF
ks_obs = kstest(x, rv_hat.cdf).statistic

# Parametric bootstrap: simulate from fitted, refit, recompute KS
B = 80
ks_boot = np.empty(B)
for i in range(B):
    xb = rv_hat.rvs(size=n, random_state=rng_local)
    a_b, b_b, loc_b, scale_b = norminvgauss.fit(xb)
    rv_b = norminvgauss(a_b, b_b, loc=loc_b, scale=scale_b)
    ks_boot[i] = kstest(xb, rv_b.cdf).statistic

p_boot = float(np.mean(ks_boot >= ks_obs))

{"ks_obs": float(ks_obs), "p_boot": p_boot, "B": B}


{'ks_obs': 0.01620866142668531, 'p_boot': 0.75, 'B': 80}

In [14]:
# C) Generative modeling: random walk with NIG vs normal innovations (matched mean/var)

T = 300
alpha, beta, delta, mu = 2.0, 0.5, 1.0, 0.0

eps_nig = sample_nig(size=T, alpha=alpha, beta=beta, delta=delta, mu=mu, rng=rng)

m = nig_mean(alpha, beta, delta, mu)
v = nig_var(alpha, beta, delta)
eps_norm = rng.normal(loc=m, scale=math.sqrt(v), size=T)

s0 = 0.0
s_nig = s0 + np.cumsum(eps_nig)
s_norm = s0 + np.cumsum(eps_norm)

fig = go.Figure()
fig.add_trace(go.Scatter(y=s_nig, mode="lines", name="NIG random walk"))
fig.add_trace(go.Scatter(y=s_norm, mode="lines", name="Normal random walk (matched mean/var)"))
fig.update_layout(title="Random walk paths", xaxis_title="t", yaxis_title="S_t")
fig.show()

# Compare tail behavior of innovations
qs = [0.001, 0.01, 0.5, 0.99, 0.999]
{
    "quantiles": qs,
    "nig": np.quantile(eps_nig, qs),
    "normal": np.quantile(eps_norm, qs),
}


{'quantiles': [0.001, 0.01, 0.5, 0.99, 0.999],
 'nig': array([-2.21934 , -1.787824,  0.211728,  2.341481,  2.964354]),
 'normal': array([-2.208528, -1.544096,  0.255063,  1.975488,  2.075406])}