Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
405 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
from empirical_distribution import ECDF, monotone_fn_inverter, StepFunction | ||
from edgeworth import ExpandedNormal | ||
|
||
from statsmodels import NoseWrapper as Tester | ||
test = Tester().test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
from __future__ import division, print_function, absolute_import | ||
|
||
import warnings | ||
|
||
import numpy as np | ||
from numpy.polynomial.hermite_e import HermiteE | ||
from scipy.misc import factorial | ||
from scipy.stats import rv_continuous | ||
import scipy.special as special | ||
|
||
# TODO: | ||
# * actually solve (31) of Blinnikov & Moessner | ||
# * numerical stability: multiply factorials in logspace? | ||
# * ppf & friends: Cornish & Fisher series, or tabulate/solve | ||
|
||
|
||
_faa_di_bruno_cache = { | ||
1: [[(1, 1)]], | ||
2: [[(1, 2)], [(2, 1)]], | ||
3: [[(1, 3)], [(2, 1), (1, 1)], [(3, 1)]], | ||
4: [[(1, 4)], [(1, 2), (2, 1)], [(2, 2)], [(3, 1), (1, 1)], [(4, 1)]]} | ||
|
||
|
||
def _faa_di_bruno_partitions(n): | ||
""" Return all non-negative integer solutions of the diophantine equation:: | ||
n*k_n + ... + 2*k_2 + 1*k_1 = n (1) | ||
Parameters | ||
---------- | ||
n: int | ||
the r.h.s. of Eq. (1) | ||
Returns | ||
------- | ||
partitions: a list of solutions of (1). Each solution is itself | ||
a list of the form `[(m, k_m), ...]` for non-zero `k_m`. | ||
Notice that the index `m` is 1-based. | ||
Examples: | ||
--------- | ||
>>> _faa_di_bruno_partitions(2) | ||
[[(1, 2)], [(2, 1)]] | ||
>>> for p in faa_di_bruno_partitions(4): | ||
... assert 4 == sum(m * k for (m, k) in p) | ||
""" | ||
if n < 1: | ||
raise ValueError("Expected a positive integer; got %s instead" % n) | ||
try: | ||
return _faa_di_bruno_cache[n] | ||
except KeyError: | ||
# TODO: higher order terms | ||
# solve Eq. (31) from Blinninkov & Moessner here | ||
raise NotImplementedError('Higher order terms not yet implemented.') | ||
|
||
|
||
def cumulant_from_moments(momt, n): | ||
"""Compute n-th cumulant given moments. | ||
Parameters | ||
---------- | ||
momt: array_like | ||
`momt[j]` contains `(j+1)`-th moment. | ||
These can be raw moments around zero, or central moments | ||
(in which case, `momt[0]` == 0). | ||
n: integer | ||
which cumulant to calculate (must be >1) | ||
Returns | ||
------- | ||
kappa: float | ||
n-th cumulant. | ||
""" | ||
if n < 1: | ||
raise ValueError("Expected a positive integer. Got %s instead." % n) | ||
if len(momt) < n: | ||
raise ValueError("%s-th cumulant requires %s moments, " | ||
"only got %s." % (n, n, len(momt))) | ||
kappa = 0. | ||
for p in _faa_di_bruno_partitions(n): | ||
r = sum(k for (m, k) in p) | ||
term = (-1)**(r - 1) * factorial(r - 1) | ||
for (m, k) in p: | ||
term *= np.power(momt[m - 1] / factorial(m), k) / factorial(k) | ||
kappa += term | ||
kappa *= factorial(n) | ||
return kappa | ||
|
||
## copied from scipy.stats.distributions to avoid the overhead of | ||
## the public methods | ||
_norm_pdf_C = np.sqrt(2*np.pi) | ||
def _norm_pdf(x): | ||
return np.exp(-x**2/2.0) / _norm_pdf_C | ||
|
||
def _norm_cdf(x): | ||
return special.ndtr(x) | ||
|
||
def _norm_sf(x): | ||
return special.ndtr(-x) | ||
|
||
|
||
class ExpandedNormal(rv_continuous): | ||
"""Construct the Edgeworth expansion pdf given cumulants. | ||
Parameters | ||
---------- | ||
cum: array_like | ||
`cum[j]` contains `(j+1)`-th cumulant: cum[0] is the mean, | ||
cum[1] is the variance and so on. | ||
Notes | ||
----- | ||
This is actually an asymptotic rather than convergent series, hence | ||
higher orders of the expansion may or may not improve the result. | ||
In a strongly non-Gaussian case, it is possible that the density | ||
becomes negative, especially far out in the tails. | ||
Examples | ||
-------- | ||
Construct the 4th order expansion for the chi-square distribution using | ||
the known values of the cumulants: | ||
>>> from scipy.misc import factorial | ||
>>> df = 12 | ||
>>> chi2_c = [2**(j-1) * factorial(j-1) * df for j in range(1, 5)] | ||
>>> edgw_chi2 = ExpandedNormal(chi2_c, name='edgw_chi2', momtype=0) | ||
Calculate several moments: | ||
>>> m, v = edgw_chi2.stats(moments='mv') | ||
>>> np.allclose([m, v], [df, 2 * df]) | ||
True | ||
Plot the density function: | ||
>>> mu, sigma = df, np.sqrt(2*df) | ||
>>> x = np.linspace(mu - 3*sigma, mu + 3*sigma) | ||
>>> plt.plot(x, stats.chi2.pdf(x, df=df), 'g-', lw=4, alpha=0.5) | ||
>>> plt.plot(x, stats.norm.pdf(x, mu, sigma), 'b--', lw=4, alpha=0.5) | ||
>>> plt.plot(x, edgw_chi2.pdf(x), 'r-', lw=2) | ||
>>> plt.show() | ||
References | ||
---------- | ||
.. [1] E.A. Cornish and R.A. Fisher, Moments and cumulants in the | ||
specification of distributions, Revue de l'Institut Internat. | ||
de Statistique. 5: 307 (1938), reprinted in | ||
R.A. Fisher, Contributions to Mathematical Statistics. Wiley, 1950. | ||
.. [2] http://en.wikipedia.org/wiki/Edgeworth_series | ||
.. [3] S. Blinnikov and R. Moessner, Expansions for nearly Gaussian | ||
distributions, Astron. Astrophys. Suppl. Ser. 130, 193 (1998) | ||
""" | ||
def __init__(self, cum, name='Edgeworth expanded normal', **kwds): | ||
if len(cum) < 2: | ||
raise ValueError("At least two cumulants are needed.") | ||
self._coef, self._mu, self._sigma = self._compute_coefs_pdf(cum) | ||
self._herm_pdf = HermiteE(self._coef) | ||
if self._coef.size > 2: | ||
self._herm_cdf = HermiteE(-self._coef[1:]) | ||
else: | ||
self._herm_cdf = lambda x: 0. | ||
|
||
# warn if pdf(x) < 0 for some values of x within 4 sigma | ||
r = np.real_if_close(self._herm_pdf.roots()) | ||
r = (r - self._mu) / self._sigma | ||
if r[(np.imag(r) == 0) & (np.abs(r) < 4)].any(): | ||
mesg = 'PDF has zeros at %s ' % r | ||
warnings.warn(mesg, UserWarning) | ||
|
||
kwds.update({'name': name, | ||
'momtype': 0}) # use pdf, not ppf in self.moment() | ||
super(ExpandedNormal, self).__init__(**kwds) | ||
|
||
def _pdf(self, x): | ||
y = (x - self._mu) / self._sigma | ||
return self._herm_pdf(y) * _norm_pdf(y) / self._sigma | ||
|
||
def _cdf(self, x): | ||
y = (x - self._mu) / self._sigma | ||
return (_norm_cdf(y) + | ||
self._herm_cdf(y) * _norm_pdf(y)) | ||
|
||
def _sf(self, x): | ||
y = (x - self._mu) / self._sigma | ||
return (_norm_sf(y) - | ||
self._herm_cdf(y) * _norm_pdf(y)) | ||
|
||
def _compute_coefs_pdf(self, cum): | ||
# scale cumulants by \sigma | ||
mu, sigma = cum[0], np.sqrt(cum[1]) | ||
lam = np.asarray(cum) | ||
for j, l in enumerate(lam): | ||
lam[j] /= cum[1]**j | ||
|
||
coef = np.zeros(lam.size * 3 - 5) | ||
coef[0] = 1. | ||
for s in range(lam.size - 2): | ||
for p in _faa_di_bruno_partitions(s+1): | ||
term = sigma**(s+1) | ||
for (m, k) in p: | ||
term *= np.power(lam[m+1] / factorial(m+2), k) / factorial(k) | ||
r = sum(k for (m, k) in p) | ||
coef[s + 1 + 2*r] += term | ||
return coef, mu, sigma | ||
|
||
|
||
if __name__ == "__main__": | ||
cum =[1, 1, 1, 1] | ||
en = ExpandedNormal(cum) | ||
|
Oops, something went wrong.