Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix variance and kurtosis functions for the Tukey Lambda distribution (ticket 1545) #106

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
202 changes: 202 additions & 0 deletions scipy/stats/_tukeylambda_stats.py
@@ -0,0 +1,202 @@


import numpy as np
from numpy import array, poly1d
from scipy.interpolate import interp1d
from scipy.special import beta


# The following code was used to generate the Pade coefficients for the
# Tukey Lambda variance function. Version 0.17 of mpmath was used.
#---------------------------------------------------------------------------
# import mpmath as mp
#
# mp.mp.dps = 60
#
# one = mp.mpf(1)
# two = mp.mpf(2)
#
# def mpvar(lam):
# if lam == 0:
# v = mp.pi**2 / three
# else:
# v = (two / lam**2) * (one / (one + two*lam) -
# mp.beta(lam + one, lam + one))
# return v
#
# t = mp.taylor(mpvar, 0, 8)
# p, q = mp.pade(t, 4, 4)
# print "p =", [mp.fp.mpf(c) for c in p]
# print "q =", [mp.fp.mpf(c) for c in q]
#---------------------------------------------------------------------------

# Pade coefficients for the Tukey Lambda variance function.
_tukeylambda_var_pc = [3.289868133696453, 0.7306125098871127,
-0.5370742306855439, 0.17292046290190008,
-0.02371146284628187]
_tukeylambda_var_qc = [1.0, 3.683605511659861, 4.184152498888124,
1.7660926747377275, 0.2643989311168465]

# numpy.poly1d instances for the numerator and denominator of the
# Pade approximation to the Tukey Lambda variance.
_tukeylambda_var_p = poly1d(_tukeylambda_var_pc[::-1])
_tukeylambda_var_q = poly1d(_tukeylambda_var_qc[::-1])


def tukeylambda_variance(lam):
"""Variance of the Tukey Lambda distribution.

Parameters
----------
lam : array_like
The lambda values at which to compute the variance.

Returns
-------
v : ndarray
The variance. For lam < -0.5, the variance is not defined, so
np.nan is returned. For lam = 0.5, np.inf is returned.

Notes
-----
In an interval around lambda=0, this function uses the [4,4] Pade
approximation to compute the variance. Otherwise it uses the standard
formula (http://en.wikipedia.org/wiki/Tukey_lambda_distribution). The
Pade approximation is used because the standard formula has a removable
discontinuity at lambda = 0, and does not produce accurate numerical
results near lambda = 0.
"""
lam = np.asarray(lam)
shp = lam.shape
lam = np.atleast_1d(lam).astype(np.float64)

# For absolute values of lam less than threshold, use the Pade
# approximation.
threshold = 0.075

# Play games with masks to implement the conditional evaluation of
# the distribution.
# lambda < -0.5: var = nan
low_mask = lam < -0.5
# lambda == -0.5: var = inf
neghalf_mask = lam == -0.5
# abs(lambda) < threshold: use Pade approximation
small_mask = np.abs(lam) < threshold
# else the "regular" case: use the explicit formula.
reg_mask = ~(low_mask | neghalf_mask | small_mask)

# Get the 'lam' values for the cases where they are needed.
small = lam[small_mask]
reg = lam[reg_mask]

# Compute the function for each case.
v = np.empty_like(lam)
v[low_mask] = np.nan
v[neghalf_mask] = np.inf
if small.size > 0:
# Use the Pade approximation near lambda = 0.
v[small_mask] = _tukeylambda_var_p(small) / _tukeylambda_var_q(small)
if reg.size > 0:
v[reg_mask] = (2.0 / reg**2) * (1.0 / (1.0 + 2 * reg) -
beta(reg + 1, reg + 1))
v.shape = shp
return v


# The following code was used to generate the Pade coefficients for the
# Tukey Lambda kurtosis function. Version 0.17 of mpmath was used.
#---------------------------------------------------------------------------
# import mpmath as mp
#
# mp.mp.dps = 60
#
# one = mp.mpf(1)
# two = mp.mpf(2)
# three = mp.mpf(3)
# four = mp.mpf(4)
#
# def mpkurt(lam):
# if lam == 0:
# k = mp.mpf(6)/5
# else:
# numer = (one/(four*lam+one) - four*mp.beta(three*lam+one, lam+one) +
# three*mp.beta(two*lam+one, two*lam+one))
# denom = two*(one/(two*lam+one) - mp.beta(lam+one,lam+one))**2
# k = numer / denom - three
# return k
#
# # There is a bug in mpmath 0.17: when we use the 'method' keyword of the
# # taylor function and we request a degree 9 Taylor polynomial, we actually
# # get degree 8.
# t = mp.taylor(mpkurt, 0, 9, method='quad', radius=0.01)
# t = [mp.chop(c, tol=1e-15) for c in t]
# p, q = mp.pade(t, 4, 4)
# print "p =", [mp.fp.mpf(c) for c in p]
# print "q =", [mp.fp.mpf(c) for c in q]
#---------------------------------------------------------------------------

# Pade coefficients for the Tukey Lambda kurtosis function.
_tukeylambda_kurt_pc = [1.2, -5.853465139719495, -22.653447381131077,
0.20601184383406815, 4.59796302262789]
_tukeylambda_kurt_qc = [1.0, 7.171149192233599, 12.96663094361842,
0.43075235247853005, -2.789746758009912]

# numpy.poly1d instances for the numerator and denominator of the
# Pade approximation to the Tukey Lambda kurtosis.
_tukeylambda_kurt_p = poly1d(_tukeylambda_kurt_pc[::-1])
_tukeylambda_kurt_q = poly1d(_tukeylambda_kurt_qc[::-1])


def tukeylambda_kurtosis(lam):
"""Kurtosis of the Tukey Lambda distribution.

Parameters
----------
lam : array_like
The lambda values at which to compute the variance.

Returns
-------
v : ndarray
The variance. For lam < -0.25, the variance is not defined, so
np.nan is returned. For lam = 0.25, np.inf is returned.

"""
lam = np.asarray(lam)
shp = lam.shape
lam = np.atleast_1d(lam).astype(np.float64)

# For absolute values of lam less than threshold, use the Pade
# approximation.
threshold = 0.055

# Use masks to implement the conditional evaluation of the kurtosis.
# lambda < -0.25: kurtosis = nan
low_mask = lam < -0.25
# lambda == -0.25: kurtosis = inf
negqrtr_mask = lam == -0.25
# lambda near 0: use Pade approximation
small_mask = np.abs(lam) < threshold
# else the "regular" case: use the explicit formula.
reg_mask = ~(low_mask | negqrtr_mask | small_mask)

# Get the 'lam' values for the cases where they are needed.
small = lam[small_mask]
reg = lam[reg_mask]

# Compute the function for each case.
k = np.empty_like(lam)
k[low_mask] = np.nan
k[negqrtr_mask] = np.inf
if small.size > 0:
k[small_mask] = _tukeylambda_kurt_p(small) / _tukeylambda_kurt_q(small)
if reg.size > 0:
numer = (1.0 / (4 * reg + 1) - 4 * beta(3 * reg + 1, reg + 1) +
3 * beta(2 * reg + 1, 2 * reg + 1))
denom = 2 * (1.0/(2 * reg + 1) - beta(reg + 1, reg + 1))**2
k[reg_mask] = numer / denom - 3

# The return value will be a numpy array; resetting the shape ensures that
# if `lam` was a scalar, the return value is a 0-d array.
k.shape = shp
return k
19 changes: 9 additions & 10 deletions scipy/stats/distributions.py
Expand Up @@ -28,6 +28,8 @@
import numpy.random as mtrand
from numpy import flatnonzero as nonzero
import vonmises_cython
from _tukeylambda_stats import tukeylambda_variance as _tlvar, \
tukeylambda_kurtosis as _tlkurt

__all__ = [
'rv_continuous',
Expand Down Expand Up @@ -5102,33 +5104,30 @@ class tukeylambda_gen(rv_continuous):
def _argcheck(self, lam):
# lam in RR.
return np.ones(np.shape(lam), dtype=bool)

def _pdf(self, x, lam):
Fx = arr(special.tklmbda(x,lam))
Px = Fx**(lam-1.0) + (arr(1-Fx))**(lam-1.0)
Px = 1.0/arr(Px)
return where((lam <= 0) | (abs(x) < 1.0/arr(lam)), Px, 0.0)

def _cdf(self, x, lam):
return special.tklmbda(x, lam)

def _ppf(self, q, lam):
q = q*1.0
vals1 = (q**lam - (1-q)**lam)/lam
vals2 = log(q/(1-q))
return where((lam == 0)&(q==q), vals2, vals1)

def _stats(self, lam):
mu2 = 2*gam(lam+1.5)-lam*pow(4,-lam)*sqrt(pi)*gam(lam)*(1-2*lam)
mu2 /= lam*lam*(1+2*lam)*gam(1+1.5)
mu4 = 3*gam(lam)*gam(lam+0.5)*pow(2,-2*lam) / lam**3 / gam(2*lam+1.5)
mu4 += 2.0/lam**4 / (1+4*lam)
mu4 -= 2*sqrt(3)*gam(lam)*pow(2,-6*lam)*pow(3,3*lam) * \
gam(lam+1.0/3)*gam(lam+2.0/3) / (lam**3.0 * gam(2*lam+1.5) * \
gam(lam+0.5))
g2 = mu4 / mu2 / mu2 - 3.0

return 0, mu2, 0, g2
return 0, _tlvar(lam), 0, _tlkurt(lam)

def _entropy(self, lam):
def integ(p):
return log(pow(p,lam-1)+pow(1-p,lam-1))
return integrate.quad(integ,0,1)[0]

tukeylambda = tukeylambda_gen(name='tukeylambda', shapes="lam")


Expand Down
22 changes: 21 additions & 1 deletion scipy/stats/tests/test_distributions.py
Expand Up @@ -791,6 +791,26 @@ def test_regression_ticket_1530():
assert_almost_equal(params, expected, decimal=1)


def test_tukeylambda_stats_ticket_1545():
"""Some test for the variance and kurtosis of the Tukey Lambda distr."""

# See test_tukeylamdba_stats.py for more tests.

mv = stats.tukeylambda.stats(0, moments='mvsk')
# Known exact values:
expected = [0, np.pi**2/3, 0, 1.2]
assert_almost_equal(mv, expected, decimal=10)

mv = stats.tukeylambda.stats(3.13, moments='mvsk')
# 'expected' computed with mpmath.
expected = [0, 0.0269220858861465102, 0, -0.898062386219224104]
assert_almost_equal(mv, expected, decimal=10)

mv = stats.tukeylambda.stats(0.14, moments='mvsk')
# 'expected' computed with mpmath.
expected = [0, 2.11029702221450250, 0, -0.02708377353223019456]
assert_almost_equal(mv, expected, decimal=10)


if __name__ == "__main__":
run_module_suite()

86 changes: 86 additions & 0 deletions scipy/stats/tests/test_tukeylambda_stats.py
@@ -0,0 +1,86 @@

import numpy as np
from numpy.testing import assert_allclose, assert_equal

from scipy.stats._tukeylambda_stats import tukeylambda_variance, \
tukeylambda_kurtosis


def test_tukeylambda_stats_known_exact():
"""Compare results with some known exact formulas."""
# Some exact values of the Tukey Lambda variance and kurtosis:
# lambda var kurtosis
# 0 pi**2/3 6/5 (logistic distribution)
# 0.5 4 - pi (5/3 - pi/2)/(pi/4 - 1)**2 - 3
# 1 1/3 -6/5 (uniform distribution on (-1,1))
# 2 1/12 -6/5 (uniform distribution on (-1/2, 1/2))

# lambda = 0
var = tukeylambda_variance(0)
assert_allclose(var, np.pi**2 / 3, atol=1e-12)
kurt = tukeylambda_kurtosis(0)
assert_allclose(kurt, 1.2, atol=1e-10)

# lambda = 0.5
var = tukeylambda_variance(0.5)
assert_allclose(var, 4 - np.pi, atol=1e-12)
kurt = tukeylambda_kurtosis(0.5)
desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
assert_allclose(kurt, desired, atol=1e-10)

# lambda = 1
var = tukeylambda_variance(1)
assert_allclose(var, 1.0 / 3, atol=1e-12)
kurt = tukeylambda_kurtosis(1)
assert_allclose(kurt, -1.2, atol=1e-10)

# lambda = 2
var = tukeylambda_variance(2)
assert_allclose(var, 1.0 / 12, atol=1e-12)
kurt = tukeylambda_kurtosis(2)
assert_allclose(kurt, -1.2, atol=1e-10)


def test_tukeylambda_stats_mpmath():
"""Compare results with some values that were computed using mpmath."""
a10 = dict(atol=1e-10, rtol=0)
a12 = dict(atol=1e-12, rtol=0)
data = [
# lambda variance kurtosis
[-0.1, 4.78050217874253547, 3.78559520346454510],
[-0.0649, 4.16428023599895777, 2.52019675947435718],
[-0.05, 3.93672267890775277, 2.13129793057777277],
[-0.001, 3.30128380390964882, 1.21452460083542988],
[ 0.001, 3.27850775649572176, 1.18560634779287585],
[ 0.03125, 2.95927803254615800, 0.804487555161819980],
[ 0.05, 2.78281053405464501, 0.611604043886644327],
[ 0.0649, 2.65282386754100551, 0.476834119532774540],
[ 1.2, 0.242153920578588346, -1.23428047169049726],
[ 10.0, 0.00095237579757703597, 2.37810697355144933],
[ 20.0, 0.00012195121951131043, 7.37654321002709531],
]

for lam, var_expected, kurt_expected in data:
var = tukeylambda_variance(lam)
assert_allclose(var, var_expected, **a12)
kurt = tukeylambda_kurtosis(lam)
assert_allclose(kurt, kurt_expected, **a10)

# Test with vector arguments (most of the other tests are for single
# values).
lam, var_expected, kurt_expected = zip(*data)
var = tukeylambda_variance(lam)
assert_allclose(var, var_expected, **a12)
kurt = tukeylambda_kurtosis(lam)
assert_allclose(kurt, kurt_expected, **a10)


def test_tukeylambda_stats_invalid():
"""Test values of lambda outside the domains of the functions."""
lam = [-1.0, -0.5]
var = tukeylambda_variance(lam)
assert_equal(var, np.array([np.nan, np.inf]))

lam = [-1.0, -0.25]
kurt = tukeylambda_kurtosis(lam)
assert_equal(kurt, np.array([np.nan, np.inf]))