# scipy/scipy

Merge branch 'pull-391-mstats' into master. Closes #1798.

`Reviewed at #391`
2 parents 785491c + 54f74c1 commit b69fe1860220e83bbdd52f7a0c4606c600c4ce4b rgommers committed Dec 27, 2012
Showing with 79 additions and 34 deletions.
1. +16 −4 scipy/stats/mstats_basic.py
2. +7 −3 scipy/stats/stats.py
3. +56 −27 scipy/stats/tests/test_mstats_basic.py
20 scipy/stats/mstats_basic.py
 @@ -1434,7 +1434,12 @@ def skew(a, axis=0, bias=True): n = a.count(axis) m2 = moment(a, 2, axis) m3 = moment(a, 3, axis) - vals = ma.where(m2 == 0, 0, m3 / m2**1.5) + olderr = np.seterr(all='ignore') + try: + vals = ma.where(m2 == 0, 0, m3 / m2**1.5) + finally: + np.seterr(**olderr) + if not bias: can_correct = (n > 2) & (m2 > 0) if can_correct.any(): @@ -1448,13 +1453,19 @@ def skew(a, axis=0, bias=True): def kurtosis(a, axis=0, fisher=True, bias=True): a, axis = _chk_asarray(a, axis) - n = a.count(axis) m2 = moment(a,2,axis) m4 = moment(a,4,axis) - vals = ma.where(m2 == 0, 0, m4/ m2**2.0) + olderr = np.seterr(all='ignore') + try: + vals = ma.where(m2 == 0, 0, m4 / m2**2.0) + finally: + np.seterr(**olderr) + if not bias: - can_correct = (n > 3) & (m2 > 0) + n = a.count(axis) + can_correct = (n > 3) & (m2 is not ma.masked and m2 > 0) if can_correct.any(): + n = np.extract(can_correct, n) m2 = np.extract(can_correct, m2) m4 = np.extract(can_correct, m4) nval = 1.0/(n-2)/(n-3)*((n*n-1.0)*m4/m2**2.0-3*(n-1)**2.0) @@ -1465,6 +1476,7 @@ def kurtosis(a, axis=0, fisher=True, bias=True): return vals kurtosis.__doc__ = stats.kurtosis.__doc__ + def describe(a, axis=0): """ Computes several descriptive statistics of the passed array.
10 scipy/stats/stats.py
 @@ -1060,7 +1060,6 @@ def kurtosis(a, axis=0, fisher=True, bias=True): The kurtosis of values along an axis. If all values are equal, return -3 for Fisher's definition and 0 for Pearson's definition. - References ---------- [CRCProbStat2000]_ Section 2.2.25 @@ -1075,7 +1074,12 @@ def kurtosis(a, axis=0, fisher=True, bias=True): m2 = moment(a,2,axis) m4 = moment(a,4,axis) zero = (m2 == 0) - vals = np.where(zero, 0, m4/ m2**2.0) + olderr = np.seterr(all='ignore') + try: + vals = np.where(zero, 0, m4 / m2**2.0) + finally: + np.seterr(**olderr) + if not bias: can_correct = (n > 3) & (m2 > 0) if can_correct.any(): @@ -3034,7 +3038,7 @@ def ttest_ind(a, b, axis=0, equal_var=True): population variance [2]_. .. versionadded:: 0.11.0 - + Returns ------- t : float or array
83 scipy/stats/tests/test_mstats_basic.py
 @@ -9,9 +9,10 @@ from numpy.ma import masked, nomask import scipy.stats.mstats as mstats +from scipy import stats from numpy.testing import TestCase, run_module_suite from numpy.ma.testutils import assert_equal, assert_almost_equal, \ - assert_array_almost_equal, assert_ + assert_array_almost_equal, assert_array_almost_equal_nulp, assert_ class TestMquantiles(TestCase): @@ -36,7 +37,6 @@ def test_mquantiles_limit_keyword(self): assert_almost_equal(quants, desired) - class TestGMean(TestCase): def test_1D(self): a = (1,2,3,4) @@ -72,6 +72,7 @@ def test_2D(self): np.power(1*4,1./2.))) assert_array_almost_equal(actual, desired, decimal=14) + class TestHMean(TestCase): def test_1D(self): a = (1,2,3,4) @@ -277,21 +278,30 @@ def test_winsorization(self): class TestMoments(TestCase): - """ - Comparison numbers are found using R v.1.5.1 - note that length(testcase) = 4 - testmathworks comes from documentation for the - Statistics Toolbox for Matlab and can be found at both - http://www.mathworks.com/access/helpdesk/help/toolbox/stats/kurtosis.shtml - http://www.mathworks.com/access/helpdesk/help/toolbox/stats/skewness.shtml - Note that both test cases came from here. - """ + # Comparison numbers are found using R v.1.5.1 + # note that length(testcase) = 4 + # testmathworks comes from documentation for the + # Statistics Toolbox for Matlab and can be found at both + # http://www.mathworks.com/access/helpdesk/help/toolbox/stats/kurtosis.shtml + # http://www.mathworks.com/access/helpdesk/help/toolbox/stats/skewness.shtml + # Note that both test cases came from here. testcase = [1,2,3,4] testmathworks = ma.fix_invalid([1.165 , 0.6268, 0.0751, 0.3516, -0.6965, np.nan]) + testcase_2d = ma.array( + np.array([[ 0.05245846, 0.50344235, 0.86589117, 0.36936353, 0.46961149], + [ 0.11574073, 0.31299969, 0.45925772, 0.72618805, 0.75194407], + [ 0.67696689, 0.91878127, 0.09769044, 0.04645137, 0.37615733], + [ 0.05903624, 0.29908861, 0.34088298, 0.66216337, 0.83160998], + [ 0.64619526, 0.94894632, 0.27855892, 0.0706151 , 0.39962917]]), + mask = np.array([[ True, False, False, True, False], + [ True, True, True, False, True], + [False, False, False, False, False], + [True, True, True, True, True], + [False, False, True, False, False]], dtype=np.bool)) + def test_moment(self): - """ - mean((testcase-mean(testcase))**power,axis=0),axis=0))**power))""" + # mean((testcase-mean(testcase))**power,axis=0),axis=0))**power)) y = mstats.moment(self.testcase,1) assert_almost_equal(y,0.0,10) y = mstats.moment(self.testcase,2) @@ -300,17 +310,16 @@ def test_moment(self): assert_almost_equal(y,0.0) y = mstats.moment(self.testcase,4) assert_almost_equal(y,2.5625) + def test_variation(self): - """variation = samplestd/mean """ + #variation = samplestd/mean ## y = stats.variation(self.shoes[0]) ## assert_almost_equal(y,21.8770668) y = mstats.variation(self.testcase) assert_almost_equal(y,0.44721359549996, 10) def test_skewness(self): - """ - sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0)/((sqrt(var(testmathworks)*4/5))**3)/5 - """ + # sum((testmathworks-mean(testmathworks,axis=0))**3,axis=0)/((sqrt(var(testmathworks)*4/5))**3)/5 y = mstats.skew(self.testmathworks) assert_almost_equal(y,-0.29322304336607,10) y = mstats.skew(self.testmathworks,bias=0) @@ -319,26 +328,46 @@ def test_skewness(self): assert_almost_equal(y,0.0,10) def test_kurtosis(self): - """ - sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4 - sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5 - Set flags for axis = 0 and - fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab) - """ + # sum((testcase-mean(testcase,axis=0))**4,axis=0)/((sqrt(var(testcase)*3/4))**4)/4 + # sum((test2-mean(testmathworks,axis=0))**4,axis=0)/((sqrt(var(testmathworks)*4/5))**4)/5 + # Set flags for axis = 0 and + # fisher=0 (Pearson's definition of kurtosis for compatibility with Matlab) y = mstats.kurtosis(self.testmathworks,0,fisher=0,bias=1) assert_almost_equal(y, 2.1658856802973,10) # Note that MATLAB has confusing docs for the following case # kurtosis(x,0) gives an unbiased estimate of Pearson's skewness # kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3) # The MATLAB docs imply that both should give Fisher's - y = mstats.kurtosis(self.testmathworks,fisher=0,bias=0) + y = mstats.kurtosis(self.testmathworks,fisher=0, bias=0) assert_almost_equal(y, 3.663542721189047,10) y = mstats.kurtosis(self.testcase,0,0) assert_almost_equal(y,1.64) - # + + # test that kurtosis works on multidimensional masked arrays + correct_2d = ma.array(np.array([-1.5, -3., -1.47247052385, 0., + -1.26979517952]), + mask=np.array([False, False, False, True, + False], dtype=np.bool)) + assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1), + correct_2d) + for i, row in enumerate(self.testcase_2d): + assert_almost_equal(mstats.kurtosis(row), correct_2d[i]) + + correct_2d_bias_corrected = ma.array( + np.array([-1.5, -3., -1.88988209538, 0., -0.5234638463918877]), + mask=np.array([False, False, False, True, False], dtype=np.bool)) + assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1, + bias=False), + correct_2d_bias_corrected) + for i, row in enumerate(self.testcase_2d): + assert_almost_equal(mstats.kurtosis(row, bias=False), + correct_2d_bias_corrected[i]) + + # Check consistency between stats and mstats implementations + assert_array_almost_equal_nulp(mstats.kurtosis(self.testcase_2d[2, :]), + stats.kurtosis(self.testcase_2d[2, :])) + def test_mode(self): - "Tests the mode" - # a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7] a2 = np.reshape(a1, (3,5)) ma1 = ma.masked_where(ma.array(a1)>2,a1)