Browse files

BUG: stats: fixed calculation of the variance and kurtosis of the Tuk…

…ey Lambda distribution (ticket 1545)
  • Loading branch information...
1 parent fc1dc71 commit 0cac0b5b324603f942df56ced31f10b560674037 Warren Weckesser committed Nov 6, 2011
View
202 scipy/stats/_tukeylambda_stats.py
@@ -0,0 +1,202 @@
+
+
+import numpy as np
+from numpy import array, poly1d
+from scipy.interpolate import interp1d
+from scipy.special import beta
+
+
+# The following code was used to generate the Pade coefficients for the
+# Tukey Lambda variance function. Version 0.17 of mpmath was used.
+#---------------------------------------------------------------------------
+# import mpmath as mp
+#
+# mp.mp.dps = 60
+#
+# one = mp.mpf(1)
+# two = mp.mpf(2)
+#
+# def mpvar(lam):
+# if lam == 0:
+# v = mp.pi**2 / three
+# else:
+# v = (two / lam**2) * (one / (one + two*lam) -
+# mp.beta(lam + one, lam + one))
+# return v
+#
+# t = mp.taylor(mpvar, 0, 8)
+# p, q = mp.pade(t, 4, 4)
+# print "p =", [mp.fp.mpf(c) for c in p]
+# print "q =", [mp.fp.mpf(c) for c in q]
+#---------------------------------------------------------------------------
+
+# Pade coefficients for the Tukey Lambda variance function.
+_tukeylambda_var_pc = [3.289868133696453, 0.7306125098871127,
+ -0.5370742306855439, 0.17292046290190008,
+ -0.02371146284628187]
+_tukeylambda_var_qc = [1.0, 3.683605511659861, 4.184152498888124,
+ 1.7660926747377275, 0.2643989311168465]
+
+# numpy.poly1d instances for the numerator and denominator of the
+# Pade approximation to the Tukey Lambda variance.
+_tukeylambda_var_p = poly1d(_tukeylambda_var_pc[::-1])
+_tukeylambda_var_q = poly1d(_tukeylambda_var_qc[::-1])
+
+
+def tukeylambda_variance(lam):
+ """Variance of the Tukey Lambda distribution.
+
+ Parameters
+ ----------
+ lam : array_like
+ The lambda values at which to compute the variance.
+
+ Returns
+ -------
+ v : ndarray
+ The variance. For lam < -0.5, the variance is not defined, so
+ np.nan is returned. For lam = 0.5, np.inf is returned.
+
+ Notes
+ -----
+ In an interval around lambda=0, this function uses the [4,4] Pade
+ approximation to compute the variance. Otherwise it uses the standard
+ formula (http://en.wikipedia.org/wiki/Tukey_lambda_distribution). The
+ Pade approximation is used because the standard formula has a removable
+ discontinuity at lambda = 0, and does not produce accurate numerical
+ results near lambda = 0.
+ """
+ lam = np.asarray(lam)
+ shp = lam.shape
+ lam = np.atleast_1d(lam).astype(np.float64)
+
+ # For absolute values of lam less than threshold, use the Pade
+ # approximation.
+ threshold = 0.075
+
+ # Play games with masks to implement the conditional evaluation of
+ # the distribution.
+ # lambda < -0.5: var = nan
+ low_mask = lam < -0.5
+ # lambda == -0.5: var = inf
+ neghalf_mask = lam == -0.5
+ # abs(lambda) < threshold: use Pade approximation
+ small_mask = np.abs(lam) < threshold
+ # else the "regular" case: use the explicit formula.
+ reg_mask = ~(low_mask | neghalf_mask | small_mask)
+
+ # Get the 'lam' values for the cases where they are needed.
+ small = lam[small_mask]
+ reg = lam[reg_mask]
+
+ # Compute the function for each case.
+ v = np.empty_like(lam)
+ v[low_mask] = np.nan
+ v[neghalf_mask] = np.inf
+ if small.size > 0:
+ # Use the Pade approximation near lambda = 0.
+ v[small_mask] = _tukeylambda_var_p(small) / _tukeylambda_var_q(small)
+ if reg.size > 0:
+ v[reg_mask] = (2.0 / reg**2) * (1.0 / (1.0 + 2 * reg) -
+ beta(reg + 1, reg + 1))
+ v.shape = shp
+ return v
+
+
+# The following code was used to generate the Pade coefficients for the
+# Tukey Lambda kurtosis function. Version 0.17 of mpmath was used.
+#---------------------------------------------------------------------------
+# import mpmath as mp
+#
+# mp.mp.dps = 60
+#
+# one = mp.mpf(1)
+# two = mp.mpf(2)
+# three = mp.mpf(3)
+# four = mp.mpf(4)
+#
+# def mpkurt(lam):
+# if lam == 0:
+# k = mp.mpf(6)/5
+# else:
+# numer = (one/(four*lam+one) - four*mp.beta(three*lam+one, lam+one) +
+# three*mp.beta(two*lam+one, two*lam+one))
+# denom = two*(one/(two*lam+one) - mp.beta(lam+one,lam+one))**2
+# k = numer / denom - three
+# return k
+#
+# # There is a bug in mpmath 0.17: when we use the 'method' keyword of the
+# # taylor function and we request a degree 9 Taylor polynomial, we actually
+# # get degree 8.
+# t = mp.taylor(mpkurt, 0, 9, method='quad', radius=0.01)
+# t = [mp.chop(c, tol=1e-15) for c in t]
+# p, q = mp.pade(t, 4, 4)
+# print "p =", [mp.fp.mpf(c) for c in p]
+# print "q =", [mp.fp.mpf(c) for c in q]
+#---------------------------------------------------------------------------
+
+# Pade coefficients for the Tukey Lambda kurtosis function.
+_tukeylambda_kurt_pc = [1.2, -5.853465139719495, -22.653447381131077,
+ 0.20601184383406815, 4.59796302262789]
+_tukeylambda_kurt_qc = [1.0, 7.171149192233599, 12.96663094361842,
+ 0.43075235247853005, -2.789746758009912]
+
+# numpy.poly1d instances for the numerator and denominator of the
+# Pade approximation to the Tukey Lambda kurtosis.
+_tukeylambda_kurt_p = poly1d(_tukeylambda_kurt_pc[::-1])
+_tukeylambda_kurt_q = poly1d(_tukeylambda_kurt_qc[::-1])
+
+
+def tukeylambda_kurtosis(lam):
+ """Kurtosis of the Tukey Lambda distribution.
+
+ Parameters
+ ----------
+ lam : array_like
+ The lambda values at which to compute the variance.
+
+ Returns
+ -------
+ v : ndarray
+ The variance. For lam < -0.25, the variance is not defined, so
+ np.nan is returned. For lam = 0.25, np.inf is returned.
+
+ """
+ lam = np.asarray(lam)
+ shp = lam.shape
+ lam = np.atleast_1d(lam).astype(np.float64)
+
+ # For absolute values of lam less than threshold, use the Pade
+ # approximation.
+ threshold = 0.055
+
+ # Use masks to implement the conditional evaluation of the kurtosis.
+ # lambda < -0.25: kurtosis = nan
+ low_mask = lam < -0.25
+ # lambda == -0.25: kurtosis = inf
+ negqrtr_mask = lam == -0.25
+ # lambda near 0: use Pade approximation
+ small_mask = np.abs(lam) < threshold
+ # else the "regular" case: use the explicit formula.
+ reg_mask = ~(low_mask | negqrtr_mask | small_mask)
+
+ # Get the 'lam' values for the cases where they are needed.
+ small = lam[small_mask]
+ reg = lam[reg_mask]
+
+ # Compute the function for each case.
+ k = np.empty_like(lam)
+ k[low_mask] = np.nan
+ k[negqrtr_mask] = np.inf
+ if small.size > 0:
+ k[small_mask] = _tukeylambda_kurt_p(small) / _tukeylambda_kurt_q(small)
+ if reg.size > 0:
+ numer = (1.0 / (4 * reg + 1) - 4 * beta(3 * reg + 1, reg + 1) +
+ 3 * beta(2 * reg + 1, 2 * reg + 1))
+ denom = 2 * (1.0/(2 * reg + 1) - beta(reg + 1, reg + 1))**2
+ k[reg_mask] = numer / denom - 3
+
+ # The return value will be a numpy array; resetting the shape ensures that
+ # if `lam` was a scalar, the return value is a 0-d array.
+ k.shape = shp
+ return k
View
19 scipy/stats/distributions.py
@@ -28,6 +28,8 @@
import numpy.random as mtrand
from numpy import flatnonzero as nonzero
import vonmises_cython
+from _tukeylambda_stats import tukeylambda_variance as _tlvar, \
+ tukeylambda_kurtosis as _tlkurt
__all__ = [
'rv_continuous',
@@ -5102,33 +5104,30 @@ class tukeylambda_gen(rv_continuous):
def _argcheck(self, lam):
# lam in RR.
return np.ones(np.shape(lam), dtype=bool)
+
def _pdf(self, x, lam):
Fx = arr(special.tklmbda(x,lam))
Px = Fx**(lam-1.0) + (arr(1-Fx))**(lam-1.0)
Px = 1.0/arr(Px)
return where((lam <= 0) | (abs(x) < 1.0/arr(lam)), Px, 0.0)
+
def _cdf(self, x, lam):
return special.tklmbda(x, lam)
+
def _ppf(self, q, lam):
q = q*1.0
vals1 = (q**lam - (1-q)**lam)/lam
vals2 = log(q/(1-q))
return where((lam == 0)&(q==q), vals2, vals1)
+
def _stats(self, lam):
- mu2 = 2*gam(lam+1.5)-lam*pow(4,-lam)*sqrt(pi)*gam(lam)*(1-2*lam)
- mu2 /= lam*lam*(1+2*lam)*gam(1+1.5)
- mu4 = 3*gam(lam)*gam(lam+0.5)*pow(2,-2*lam) / lam**3 / gam(2*lam+1.5)
- mu4 += 2.0/lam**4 / (1+4*lam)
- mu4 -= 2*sqrt(3)*gam(lam)*pow(2,-6*lam)*pow(3,3*lam) * \
- gam(lam+1.0/3)*gam(lam+2.0/3) / (lam**3.0 * gam(2*lam+1.5) * \
- gam(lam+0.5))
- g2 = mu4 / mu2 / mu2 - 3.0
-
- return 0, mu2, 0, g2
+ return 0, _tlvar(lam), 0, _tlkurt(lam)
+
def _entropy(self, lam):
def integ(p):
return log(pow(p,lam-1)+pow(1-p,lam-1))
return integrate.quad(integ,0,1)[0]
+
tukeylambda = tukeylambda_gen(name='tukeylambda', shapes="lam")
View
22 scipy/stats/tests/test_distributions.py
@@ -791,6 +791,26 @@ def test_regression_ticket_1530():
assert_almost_equal(params, expected, decimal=1)
+def test_tukeylambda_stats_ticket_1545():
+ """Some test for the variance and kurtosis of the Tukey Lambda distr."""
+
+ # See test_tukeylamdba_stats.py for more tests.
+
+ mv = stats.tukeylambda.stats(0, moments='mvsk')
+ # Known exact values:
+ expected = [0, np.pi**2/3, 0, 1.2]
+ assert_almost_equal(mv, expected, decimal=10)
+
+ mv = stats.tukeylambda.stats(3.13, moments='mvsk')
+ # 'expected' computed with mpmath.
+ expected = [0, 0.0269220858861465102, 0, -0.898062386219224104]
+ assert_almost_equal(mv, expected, decimal=10)
+
+ mv = stats.tukeylambda.stats(0.14, moments='mvsk')
+ # 'expected' computed with mpmath.
+ expected = [0, 2.11029702221450250, 0, -0.02708377353223019456]
+ assert_almost_equal(mv, expected, decimal=10)
+
+
if __name__ == "__main__":
run_module_suite()
-
View
86 scipy/stats/tests/test_tukeylambda_stats.py
@@ -0,0 +1,86 @@
+
+import numpy as np
+from numpy.testing import assert_allclose, assert_equal
+
+from scipy.stats._tukeylambda_stats import tukeylambda_variance, \
+ tukeylambda_kurtosis
+
+
+def test_tukeylambda_stats_known_exact():
+ """Compare results with some known exact formulas."""
+ # Some exact values of the Tukey Lambda variance and kurtosis:
+ # lambda var kurtosis
+ # 0 pi**2/3 6/5 (logistic distribution)
+ # 0.5 4 - pi (5/3 - pi/2)/(pi/4 - 1)**2 - 3
+ # 1 1/3 -6/5 (uniform distribution on (-1,1))
+ # 2 1/12 -6/5 (uniform distribution on (-1/2, 1/2))
+
+ # lambda = 0
+ var = tukeylambda_variance(0)
+ assert_allclose(var, np.pi**2 / 3, atol=1e-12)
+ kurt = tukeylambda_kurtosis(0)
+ assert_allclose(kurt, 1.2, atol=1e-10)
+
+ # lambda = 0.5
+ var = tukeylambda_variance(0.5)
+ assert_allclose(var, 4 - np.pi, atol=1e-12)
+ kurt = tukeylambda_kurtosis(0.5)
+ desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3
+ assert_allclose(kurt, desired, atol=1e-10)
+
+ # lambda = 1
+ var = tukeylambda_variance(1)
+ assert_allclose(var, 1.0 / 3, atol=1e-12)
+ kurt = tukeylambda_kurtosis(1)
+ assert_allclose(kurt, -1.2, atol=1e-10)
+
+ # lambda = 2
+ var = tukeylambda_variance(2)
+ assert_allclose(var, 1.0 / 12, atol=1e-12)
+ kurt = tukeylambda_kurtosis(2)
+ assert_allclose(kurt, -1.2, atol=1e-10)
+
+
+def test_tukeylambda_stats_mpmath():
+ """Compare results with some values that were computed using mpmath."""
+ a10 = dict(atol=1e-10, rtol=0)
+ a12 = dict(atol=1e-12, rtol=0)
+ data = [
+ # lambda variance kurtosis
+ [-0.1, 4.78050217874253547, 3.78559520346454510],
+ [-0.0649, 4.16428023599895777, 2.52019675947435718],
+ [-0.05, 3.93672267890775277, 2.13129793057777277],
+ [-0.001, 3.30128380390964882, 1.21452460083542988],
+ [ 0.001, 3.27850775649572176, 1.18560634779287585],
+ [ 0.03125, 2.95927803254615800, 0.804487555161819980],
+ [ 0.05, 2.78281053405464501, 0.611604043886644327],
+ [ 0.0649, 2.65282386754100551, 0.476834119532774540],
+ [ 1.2, 0.242153920578588346, -1.23428047169049726],
+ [ 10.0, 0.00095237579757703597, 2.37810697355144933],
+ [ 20.0, 0.00012195121951131043, 7.37654321002709531],
+ ]
+
+ for lam, var_expected, kurt_expected in data:
+ var = tukeylambda_variance(lam)
+ assert_allclose(var, var_expected, **a12)
+ kurt = tukeylambda_kurtosis(lam)
+ assert_allclose(kurt, kurt_expected, **a10)
+
+ # Test with vector arguments (most of the other tests are for single
+ # values).
+ lam, var_expected, kurt_expected = zip(*data)
+ var = tukeylambda_variance(lam)
+ assert_allclose(var, var_expected, **a12)
+ kurt = tukeylambda_kurtosis(lam)
+ assert_allclose(kurt, kurt_expected, **a10)
+
+
+def test_tukeylambda_stats_invalid():
+ """Test values of lambda outside the domains of the functions."""
+ lam = [-1.0, -0.5]
+ var = tukeylambda_variance(lam)
+ assert_equal(var, np.array([np.nan, np.inf]))
+
+ lam = [-1.0, -0.25]
+ kurt = tukeylambda_kurtosis(lam)
+ assert_equal(kurt, np.array([np.nan, np.inf]))

0 comments on commit 0cac0b5

Please sign in to comment.