diff --git a/scipy/stats/_tukeylambda_stats.py b/scipy/stats/_tukeylambda_stats.py new file mode 100644 index 000000000000..312a1085fdf5 --- /dev/null +++ b/scipy/stats/_tukeylambda_stats.py @@ -0,0 +1,202 @@ + + +import numpy as np +from numpy import array, poly1d +from scipy.interpolate import interp1d +from scipy.special import beta + + +# The following code was used to generate the Pade coefficients for the +# Tukey Lambda variance function. Version 0.17 of mpmath was used. +#--------------------------------------------------------------------------- +# import mpmath as mp +# +# mp.mp.dps = 60 +# +# one = mp.mpf(1) +# two = mp.mpf(2) +# +# def mpvar(lam): +# if lam == 0: +# v = mp.pi**2 / three +# else: +# v = (two / lam**2) * (one / (one + two*lam) - +# mp.beta(lam + one, lam + one)) +# return v +# +# t = mp.taylor(mpvar, 0, 8) +# p, q = mp.pade(t, 4, 4) +# print "p =", [mp.fp.mpf(c) for c in p] +# print "q =", [mp.fp.mpf(c) for c in q] +#--------------------------------------------------------------------------- + +# Pade coefficients for the Tukey Lambda variance function. +_tukeylambda_var_pc = [3.289868133696453, 0.7306125098871127, + -0.5370742306855439, 0.17292046290190008, + -0.02371146284628187] +_tukeylambda_var_qc = [1.0, 3.683605511659861, 4.184152498888124, + 1.7660926747377275, 0.2643989311168465] + +# numpy.poly1d instances for the numerator and denominator of the +# Pade approximation to the Tukey Lambda variance. +_tukeylambda_var_p = poly1d(_tukeylambda_var_pc[::-1]) +_tukeylambda_var_q = poly1d(_tukeylambda_var_qc[::-1]) + + +def tukeylambda_variance(lam): + """Variance of the Tukey Lambda distribution. + + Parameters + ---------- + lam : array_like + The lambda values at which to compute the variance. + + Returns + ------- + v : ndarray + The variance. For lam < -0.5, the variance is not defined, so + np.nan is returned. For lam = 0.5, np.inf is returned. + + Notes + ----- + In an interval around lambda=0, this function uses the [4,4] Pade + approximation to compute the variance. Otherwise it uses the standard + formula (http://en.wikipedia.org/wiki/Tukey_lambda_distribution). The + Pade approximation is used because the standard formula has a removable + discontinuity at lambda = 0, and does not produce accurate numerical + results near lambda = 0. + """ + lam = np.asarray(lam) + shp = lam.shape + lam = np.atleast_1d(lam).astype(np.float64) + + # For absolute values of lam less than threshold, use the Pade + # approximation. + threshold = 0.075 + + # Play games with masks to implement the conditional evaluation of + # the distribution. + # lambda < -0.5: var = nan + low_mask = lam < -0.5 + # lambda == -0.5: var = inf + neghalf_mask = lam == -0.5 + # abs(lambda) < threshold: use Pade approximation + small_mask = np.abs(lam) < threshold + # else the "regular" case: use the explicit formula. + reg_mask = ~(low_mask | neghalf_mask | small_mask) + + # Get the 'lam' values for the cases where they are needed. + small = lam[small_mask] + reg = lam[reg_mask] + + # Compute the function for each case. + v = np.empty_like(lam) + v[low_mask] = np.nan + v[neghalf_mask] = np.inf + if small.size > 0: + # Use the Pade approximation near lambda = 0. + v[small_mask] = _tukeylambda_var_p(small) / _tukeylambda_var_q(small) + if reg.size > 0: + v[reg_mask] = (2.0 / reg**2) * (1.0 / (1.0 + 2 * reg) - + beta(reg + 1, reg + 1)) + v.shape = shp + return v + + +# The following code was used to generate the Pade coefficients for the +# Tukey Lambda kurtosis function. Version 0.17 of mpmath was used. +#--------------------------------------------------------------------------- +# import mpmath as mp +# +# mp.mp.dps = 60 +# +# one = mp.mpf(1) +# two = mp.mpf(2) +# three = mp.mpf(3) +# four = mp.mpf(4) +# +# def mpkurt(lam): +# if lam == 0: +# k = mp.mpf(6)/5 +# else: +# numer = (one/(four*lam+one) - four*mp.beta(three*lam+one, lam+one) + +# three*mp.beta(two*lam+one, two*lam+one)) +# denom = two*(one/(two*lam+one) - mp.beta(lam+one,lam+one))**2 +# k = numer / denom - three +# return k +# +# # There is a bug in mpmath 0.17: when we use the 'method' keyword of the +# # taylor function and we request a degree 9 Taylor polynomial, we actually +# # get degree 8. +# t = mp.taylor(mpkurt, 0, 9, method='quad', radius=0.01) +# t = [mp.chop(c, tol=1e-15) for c in t] +# p, q = mp.pade(t, 4, 4) +# print "p =", [mp.fp.mpf(c) for c in p] +# print "q =", [mp.fp.mpf(c) for c in q] +#--------------------------------------------------------------------------- + +# Pade coefficients for the Tukey Lambda kurtosis function. +_tukeylambda_kurt_pc = [1.2, -5.853465139719495, -22.653447381131077, + 0.20601184383406815, 4.59796302262789] +_tukeylambda_kurt_qc = [1.0, 7.171149192233599, 12.96663094361842, + 0.43075235247853005, -2.789746758009912] + +# numpy.poly1d instances for the numerator and denominator of the +# Pade approximation to the Tukey Lambda kurtosis. +_tukeylambda_kurt_p = poly1d(_tukeylambda_kurt_pc[::-1]) +_tukeylambda_kurt_q = poly1d(_tukeylambda_kurt_qc[::-1]) + + +def tukeylambda_kurtosis(lam): + """Kurtosis of the Tukey Lambda distribution. + + Parameters + ---------- + lam : array_like + The lambda values at which to compute the variance. + + Returns + ------- + v : ndarray + The variance. For lam < -0.25, the variance is not defined, so + np.nan is returned. For lam = 0.25, np.inf is returned. + + """ + lam = np.asarray(lam) + shp = lam.shape + lam = np.atleast_1d(lam).astype(np.float64) + + # For absolute values of lam less than threshold, use the Pade + # approximation. + threshold = 0.055 + + # Use masks to implement the conditional evaluation of the kurtosis. + # lambda < -0.25: kurtosis = nan + low_mask = lam < -0.25 + # lambda == -0.25: kurtosis = inf + negqrtr_mask = lam == -0.25 + # lambda near 0: use Pade approximation + small_mask = np.abs(lam) < threshold + # else the "regular" case: use the explicit formula. + reg_mask = ~(low_mask | negqrtr_mask | small_mask) + + # Get the 'lam' values for the cases where they are needed. + small = lam[small_mask] + reg = lam[reg_mask] + + # Compute the function for each case. + k = np.empty_like(lam) + k[low_mask] = np.nan + k[negqrtr_mask] = np.inf + if small.size > 0: + k[small_mask] = _tukeylambda_kurt_p(small) / _tukeylambda_kurt_q(small) + if reg.size > 0: + numer = (1.0 / (4 * reg + 1) - 4 * beta(3 * reg + 1, reg + 1) + + 3 * beta(2 * reg + 1, 2 * reg + 1)) + denom = 2 * (1.0/(2 * reg + 1) - beta(reg + 1, reg + 1))**2 + k[reg_mask] = numer / denom - 3 + + # The return value will be a numpy array; resetting the shape ensures that + # if `lam` was a scalar, the return value is a 0-d array. + k.shape = shp + return k diff --git a/scipy/stats/distributions.py b/scipy/stats/distributions.py index 7f70474511c2..6b48dd08a51b 100644 --- a/scipy/stats/distributions.py +++ b/scipy/stats/distributions.py @@ -28,6 +28,8 @@ import numpy.random as mtrand from numpy import flatnonzero as nonzero import vonmises_cython +from _tukeylambda_stats import tukeylambda_variance as _tlvar, \ + tukeylambda_kurtosis as _tlkurt __all__ = [ 'rv_continuous', @@ -5102,33 +5104,30 @@ class tukeylambda_gen(rv_continuous): def _argcheck(self, lam): # lam in RR. return np.ones(np.shape(lam), dtype=bool) + def _pdf(self, x, lam): Fx = arr(special.tklmbda(x,lam)) Px = Fx**(lam-1.0) + (arr(1-Fx))**(lam-1.0) Px = 1.0/arr(Px) return where((lam <= 0) | (abs(x) < 1.0/arr(lam)), Px, 0.0) + def _cdf(self, x, lam): return special.tklmbda(x, lam) + def _ppf(self, q, lam): q = q*1.0 vals1 = (q**lam - (1-q)**lam)/lam vals2 = log(q/(1-q)) return where((lam == 0)&(q==q), vals2, vals1) + def _stats(self, lam): - mu2 = 2*gam(lam+1.5)-lam*pow(4,-lam)*sqrt(pi)*gam(lam)*(1-2*lam) - mu2 /= lam*lam*(1+2*lam)*gam(1+1.5) - mu4 = 3*gam(lam)*gam(lam+0.5)*pow(2,-2*lam) / lam**3 / gam(2*lam+1.5) - mu4 += 2.0/lam**4 / (1+4*lam) - mu4 -= 2*sqrt(3)*gam(lam)*pow(2,-6*lam)*pow(3,3*lam) * \ - gam(lam+1.0/3)*gam(lam+2.0/3) / (lam**3.0 * gam(2*lam+1.5) * \ - gam(lam+0.5)) - g2 = mu4 / mu2 / mu2 - 3.0 - - return 0, mu2, 0, g2 + return 0, _tlvar(lam), 0, _tlkurt(lam) + def _entropy(self, lam): def integ(p): return log(pow(p,lam-1)+pow(1-p,lam-1)) return integrate.quad(integ,0,1)[0] + tukeylambda = tukeylambda_gen(name='tukeylambda', shapes="lam") diff --git a/scipy/stats/tests/test_distributions.py b/scipy/stats/tests/test_distributions.py index 0dbc0d914d2f..3632b640633d 100644 --- a/scipy/stats/tests/test_distributions.py +++ b/scipy/stats/tests/test_distributions.py @@ -791,6 +791,26 @@ def test_regression_ticket_1530(): assert_almost_equal(params, expected, decimal=1) +def test_tukeylambda_stats_ticket_1545(): + """Some test for the variance and kurtosis of the Tukey Lambda distr.""" + + # See test_tukeylamdba_stats.py for more tests. + + mv = stats.tukeylambda.stats(0, moments='mvsk') + # Known exact values: + expected = [0, np.pi**2/3, 0, 1.2] + assert_almost_equal(mv, expected, decimal=10) + + mv = stats.tukeylambda.stats(3.13, moments='mvsk') + # 'expected' computed with mpmath. + expected = [0, 0.0269220858861465102, 0, -0.898062386219224104] + assert_almost_equal(mv, expected, decimal=10) + + mv = stats.tukeylambda.stats(0.14, moments='mvsk') + # 'expected' computed with mpmath. + expected = [0, 2.11029702221450250, 0, -0.02708377353223019456] + assert_almost_equal(mv, expected, decimal=10) + + if __name__ == "__main__": run_module_suite() - diff --git a/scipy/stats/tests/test_tukeylambda_stats.py b/scipy/stats/tests/test_tukeylambda_stats.py new file mode 100644 index 000000000000..282f336dfe48 --- /dev/null +++ b/scipy/stats/tests/test_tukeylambda_stats.py @@ -0,0 +1,86 @@ + +import numpy as np +from numpy.testing import assert_allclose, assert_equal + +from scipy.stats._tukeylambda_stats import tukeylambda_variance, \ + tukeylambda_kurtosis + + +def test_tukeylambda_stats_known_exact(): + """Compare results with some known exact formulas.""" + # Some exact values of the Tukey Lambda variance and kurtosis: + # lambda var kurtosis + # 0 pi**2/3 6/5 (logistic distribution) + # 0.5 4 - pi (5/3 - pi/2)/(pi/4 - 1)**2 - 3 + # 1 1/3 -6/5 (uniform distribution on (-1,1)) + # 2 1/12 -6/5 (uniform distribution on (-1/2, 1/2)) + + # lambda = 0 + var = tukeylambda_variance(0) + assert_allclose(var, np.pi**2 / 3, atol=1e-12) + kurt = tukeylambda_kurtosis(0) + assert_allclose(kurt, 1.2, atol=1e-10) + + # lambda = 0.5 + var = tukeylambda_variance(0.5) + assert_allclose(var, 4 - np.pi, atol=1e-12) + kurt = tukeylambda_kurtosis(0.5) + desired = (5./3 - np.pi/2) / (np.pi/4 - 1)**2 - 3 + assert_allclose(kurt, desired, atol=1e-10) + + # lambda = 1 + var = tukeylambda_variance(1) + assert_allclose(var, 1.0 / 3, atol=1e-12) + kurt = tukeylambda_kurtosis(1) + assert_allclose(kurt, -1.2, atol=1e-10) + + # lambda = 2 + var = tukeylambda_variance(2) + assert_allclose(var, 1.0 / 12, atol=1e-12) + kurt = tukeylambda_kurtosis(2) + assert_allclose(kurt, -1.2, atol=1e-10) + + +def test_tukeylambda_stats_mpmath(): + """Compare results with some values that were computed using mpmath.""" + a10 = dict(atol=1e-10, rtol=0) + a12 = dict(atol=1e-12, rtol=0) + data = [ + # lambda variance kurtosis + [-0.1, 4.78050217874253547, 3.78559520346454510], + [-0.0649, 4.16428023599895777, 2.52019675947435718], + [-0.05, 3.93672267890775277, 2.13129793057777277], + [-0.001, 3.30128380390964882, 1.21452460083542988], + [ 0.001, 3.27850775649572176, 1.18560634779287585], + [ 0.03125, 2.95927803254615800, 0.804487555161819980], + [ 0.05, 2.78281053405464501, 0.611604043886644327], + [ 0.0649, 2.65282386754100551, 0.476834119532774540], + [ 1.2, 0.242153920578588346, -1.23428047169049726], + [ 10.0, 0.00095237579757703597, 2.37810697355144933], + [ 20.0, 0.00012195121951131043, 7.37654321002709531], + ] + + for lam, var_expected, kurt_expected in data: + var = tukeylambda_variance(lam) + assert_allclose(var, var_expected, **a12) + kurt = tukeylambda_kurtosis(lam) + assert_allclose(kurt, kurt_expected, **a10) + + # Test with vector arguments (most of the other tests are for single + # values). + lam, var_expected, kurt_expected = zip(*data) + var = tukeylambda_variance(lam) + assert_allclose(var, var_expected, **a12) + kurt = tukeylambda_kurtosis(lam) + assert_allclose(kurt, kurt_expected, **a10) + + +def test_tukeylambda_stats_invalid(): + """Test values of lambda outside the domains of the functions.""" + lam = [-1.0, -0.5] + var = tukeylambda_variance(lam) + assert_equal(var, np.array([np.nan, np.inf])) + + lam = [-1.0, -0.25] + kurt = tukeylambda_kurtosis(lam) + assert_equal(kurt, np.array([np.nan, np.inf]))