# public scipy /scipy

### Subversion checkout URL

You can clone with HTTPS or Subversion.

BUG: stats: Merge pull request #87 (handle array_like in kruskal, imp…

`…rove code in kruskal and f_oneway)`
commit ea3d301172f6cdf1f6703ec1ae1ffcedec7124ec 2 parents 0496569 + 9807ac1
authored

Showing 2 changed files with 92 additions and 32 deletions.

1. scipy/stats/stats.py
2. scipy/stats/tests/test_stats.py
64  scipy/stats/stats.py
 `@@ -2152,26 +2152,25 @@ def f_oneway(*args):` 2152 2152 ` .. [2] Heiman, G.W. Research Methods in Statistics. 2002.` 2153 2153 ` ` 2154 2154 ` """` 2155 `- na = len(args) # ANOVA on 'na' groups, each in it's own array` 2156 `- tmp = map(np.array,args)` 2155 `+ args = map(np.asarray, args) # convert to an numpy array` 2156 `+ na = len(args) # ANOVA on 'na' groups, each in it's own array` 2157 2157 ` alldata = np.concatenate(args)` 2158 2158 ` bign = len(alldata)` 2159 `- sstot = ss(alldata)-(square_of_sums(alldata)/float(bign))` 2159 `+ sstot = ss(alldata) - (square_of_sums(alldata) / float(bign))` 2160 2160 ` ssbn = 0` 2161 2161 ` for a in args:` 2162 `- ssbn = ssbn + square_of_sums(array(a))/float(len(a))` 2163 `- ssbn = ssbn - (square_of_sums(alldata)/float(bign))` 2164 `- sswn = sstot-ssbn` 2165 `- dfbn = na-1` 2162 `+ ssbn += square_of_sums(a) / float(len(a))` 2163 `+ ssbn -= (square_of_sums(alldata) / float(bign))` 2164 `+ sswn = sstot - ssbn` 2165 `+ dfbn = na - 1` 2166 2166 ` dfwn = bign - na` 2167 `- msb = ssbn/float(dfbn)` 2168 `- msw = sswn/float(dfwn)` 2169 `- f = msb/msw` 2170 `- prob = fprob(dfbn,dfwn,f)` 2167 `+ msb = ssbn / float(dfbn)` 2168 `+ msw = sswn / float(dfwn)` 2169 `+ f = msb / msw` 2170 `+ prob = fprob(dfbn, dfwn, f)` 2171 2171 ` return f, prob` 2172 2172 ` ` 2173 2173 ` ` 2174 `-` 2175 2174 ` def pearsonr(x, y):` 2176 2175 ` """Calculates a Pearson correlation coefficient and the p-value for testing` 2177 2176 ` non-correlation.` `@@ -3548,30 +3547,31 @@ def kruskal(*args):` 3548 3547 ` .. [1] http://en.wikipedia.org/wiki/Kruskal-Wallis_one-way_analysis_of_variance` 3549 3548 ` ` 3550 3549 ` """` 3551 `- if len(args) < 2:` 3550 `+ args = map(np.asarray, args) # convert to a numpy array` 3551 `+ na = len(args) # Kruskal-Wallis on 'na' groups, each in it's own array` 3552 `+ if na < 2:` 3552 3553 ` raise ValueError("Need at least two groups in stats.kruskal()")` 3553 `- n = map(len,args)` 3554 `- all = []` 3555 `- for i in range(len(args)):` 3556 `- all.extend(args[i].tolist())` 3557 `- ranked = list(rankdata(all))` 3558 `- T = tiecorrect(ranked)` 3559 `- args = list(args)` 3560 `- for i in range(len(args)):` 3561 `- args[i] = ranked[0:n[i]]` 3562 `- del ranked[0:n[i]]` 3563 `- rsums = []` 3564 `- for i in range(len(args)):` 3565 `- rsums.append(np.sum(args[i],axis=0)**2)` 3566 `- rsums[i] = rsums[i] / float(n[i])` 3567 `- ssbn = np.sum(rsums,axis=0)` 3568 `- totaln = np.sum(n,axis=0)` 3569 `- h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1)` 3570 `- df = len(args) - 1` 3554 `+ n = np.asarray(map(len, args))` 3555 `+ ` 3556 `+ alldata = np.concatenate(args)` 3557 `+` 3558 `+ ranked = rankdata(alldata) # Rank the data` 3559 `+ T = tiecorrect(ranked) # Correct for ties` 3571 3560 ` if T == 0:` 3572 3561 ` raise ValueError('All numbers are identical in kruskal')` 3562 `+ ` 3563 `+ # Compute sum^2/n for each group and sum` 3564 `+ j = np.insert(np.cumsum(n), 0, 0)` 3565 `+ ssbn = 0` 3566 `+ for i in range(na):` 3567 `+ ssbn += square_of_sums(ranked[j[i]:j[i+1]]) / float(n[i]) ` 3568 `+ ` 3569 `+ totaln = np.sum(n)` 3570 `+ h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1)` 3571 `+ df = na - 1` 3573 3572 ` h = h / float(T)` 3574 `- return h, chisqprob(h,df)` 3573 `+ return h, chisqprob(h, df)` 3574 `+` 3575 3575 ` ` 3576 3576 ` ` 3577 3577 ` def friedmanchisquare(*args):`
60  scipy/stats/tests/test_stats.py
 `@@ -1873,5 +1873,65 @@ def test_basic(self):` 1873 1873 ` # result in F being exactly 2.0.` 1874 1874 ` assert_equal(F, 2.0)` 1875 1875 ` ` 1876 `+` 1877 `+class TestKruskal(TestCase):` 1878 `+` 1879 `+ def test_simple(self):` 1880 `+ """A really simple case for stats.kruskal"""` 1881 `+ x = [1]` 1882 `+ y = [2]` 1883 `+ h, p = stats.kruskal(x, y)` 1884 `+ assert_equal(h, 1.0)` 1885 `+ assert_approx_equal(p, stats.chisqprob(h, 1))` 1886 `+ h, p = stats.kruskal(np.array(x), np.array(y))` 1887 `+ assert_equal(h, 1.0)` 1888 `+ assert_approx_equal(p, stats.chisqprob(h, 1))` 1889 `+` 1890 `+ def test_basic(self):` 1891 `+ """A basic test, with no ties."""` 1892 `+ x = [1, 3, 5, 7, 9]` 1893 `+ y = [2, 4, 6, 8, 10]` 1894 `+ h, p = stats.kruskal(x, y)` 1895 `+ assert_approx_equal(h, 3./11, significant=10)` 1896 `+ assert_approx_equal(p, stats.chisqprob(3./11, 1))` 1897 `+ h, p = stats.kruskal(np.array(x), np.array(y))` 1898 `+ assert_approx_equal(h, 3./11, significant=10)` 1899 `+ assert_approx_equal(p, stats.chisqprob(3./11, 1))` 1900 `+` 1901 `+ def test_simple_tie(self):` 1902 `+ """A simple case with a tie."""` 1903 `+ x = [1]` 1904 `+ y = [1, 2]` 1905 `+ h_uncorr = 1.5**2 + 2*2.25**2 - 12` 1906 `+ corr = 0.75` 1907 `+ expected = h_uncorr / corr # 0.5` 1908 `+ h, p = stats.kruskal(x, y)` 1909 `+ # Since the expression is simple and the exact answer is 0.5, it` 1910 `+ # should be safe to use assert_equal().` 1911 `+ assert_equal(h, expected)` 1912 `+` 1913 `+ def test_another_tie(self):` 1914 `+ """Another test of stats.kruskal with a tie."""` 1915 `+ x = [1, 1, 1, 2]` 1916 `+ y = [2, 2, 2, 2]` 1917 `+ h_uncorr = (12. / 8. / 9.) * 4 * (3**2 + 6**2) - 3 * 9` 1918 `+ corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)` 1919 `+ expected = h_uncorr / corr` 1920 `+ h, p = stats.kruskal(x, y)` 1921 `+ assert_approx_equal(h, expected)` 1922 `+` 1923 `+ def test_three_groups(self):` 1924 `+ """A test of stats.kruskal with three groups, with ties."""` 1925 `+ x = [1, 1, 1]` 1926 `+ y = [2, 2, 2]` 1927 `+ z = [2, 2]` 1928 `+ h_uncorr = (12. / 8. / 9.) * (3*2**2 + 3*6**2 + 2*6**2) - 3 * 9 # 5.0` 1929 `+ corr = 1 - float(3**3 - 3 + 5**3 - 5) / (8**3 - 8)` 1930 `+ expected = h_uncorr / corr # 7.0` 1931 `+ h, p = stats.kruskal(x, y, z)` 1932 `+ assert_approx_equal(h, expected)` 1933 `+ assert_approx_equal(p, stats.chisqprob(h, 2))` 1934 `+` 1935 `+` 1876 1936 ` if __name__ == "__main__":` 1877 1937 ` run_module_suite()`