Skip to content

Commit

Permalink
BUG: Fix mannwhitneyu to be backward compatible
Browse files Browse the repository at this point in the history
  • Loading branch information
tavinathanson committed May 1, 2016
1 parent b4dfc3d commit dccd070
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 42 deletions.
56 changes: 37 additions & 19 deletions scipy/stats/stats.py
Expand Up @@ -4374,7 +4374,7 @@ def tiecorrect(rankvals):

MannwhitneyuResult = namedtuple('MannwhitneyuResult', ('statistic', 'pvalue'))

def mannwhitneyu(x, y, use_continuity=True, alternative='two-sided'):
def mannwhitneyu(x, y, use_continuity=True, alternative=None):
"""
Computes the Mann-Whitney rank test on samples x and y.
Expand All @@ -4385,17 +4385,23 @@ def mannwhitneyu(x, y, use_continuity=True, alternative='two-sided'):
use_continuity : bool, optional
Whether a continuity correction (1/2.) should be taken into
account. Default is True.
alternative : 'less', 'two-sided', or 'greater'
alternative : None (deprecated), 'less', 'two-sided', or 'greater'
Whether to get the p-value for the one-sided hypothesis ('less'
or 'greater'), or for the two-sided hypothesis ('two-sided', is
the default)
or 'greater') or for the two-sided hypothesis ('two-sided').
Defaults to None, which results in a p-value half the size of
the 'two-sided' p-value and a different U statistic. The
default behavior is not the same as using 'less' or 'greater':
it only exists for backward compatibility and is deprecated.
Returns
-------
statistic : float
The Mann-Whitney statistics.
The Mann-Whitney U statistic, equal to min(U for x, U for y) if
`alternative` is equal to None (deprecated; exists for backward
compatibility), and U for y otherwise.
pvalue : float
One-sided p-value assuming a asymptotic normal distribution.
p-value assuming an asymptotic normal distribution. One-sided or
two-sided, depending on the choice of `alternative`.
Notes
-----
Expand All @@ -4407,6 +4413,10 @@ def mannwhitneyu(x, y, use_continuity=True, alternative='two-sided'):
This test corrects for ties and by default uses a continuity correction.
"""
if alternative is None:
warnings.warn("Calling `mannwhitneyu` without specifying "
"`alternative` is deprecated.", DeprecationWarning)

x = np.asarray(x)
y = np.asarray(y)
n1 = len(x)
Expand All @@ -4417,27 +4427,35 @@ def mannwhitneyu(x, y, use_continuity=True, alternative='two-sided'):
u2 = n1*n2 - u1 # remainder is U for y
T = tiecorrect(ranked)
if T == 0:
raise ValueError('All numbers are identical in amannwhitneyu')
raise ValueError('All numbers are identical in mannwhitneyu')
sd = np.sqrt(T * n1 * n2 * (n1+n2+1) / 12.0)

fact2 = 1

meanrank = n1*n2/2.0 + 0.5 * use_continuity
if alternative == 'less':
z = u1 - meanrank
elif alternative == 'greater':
z = u2 - meanrank
elif alternative == 'two-sided':
if alternative is None or alternative == 'two-sided':
bigu = max(u1, u2)
z = np.abs(bigu - meanrank)
fact2 = 2.
elif alternative == 'less':
bigu = u1
elif alternative == 'greater':
bigu = u2
else:
raise ValueError("alternative should be 'less', 'greater'"
raise ValueError("alternative should be None, 'less', 'greater' "
"or 'two-sided'")

z = z / sd
z = (bigu - meanrank) / sd
if alternative is None:
# This behavior, equal to half the size of the two-sided
# p-value, will soon be deprecated.
p = distributions.norm.sf(abs(z))
elif alternative == 'two-sided':
p = 2 * distributions.norm.sf(abs(z))
else:
p = distributions.norm.sf(z)

return MannwhitneyuResult(u2, distributions.norm.sf(z) * fact2)
u = u2
# This behavior will soon be deprecated.
if alternative is None:
u = min(u1, u2)
return MannwhitneyuResult(u, p)

RanksumsResult = namedtuple('RanksumsResult', ('statistic', 'pvalue'))

Expand Down
76 changes: 53 additions & 23 deletions scipy/stats/tests/test_stats.py
Expand Up @@ -2732,65 +2732,95 @@ class TestMannWhitneyU(TestCase):

significant = 14

def test_mannwhitneyu_less(self):
def test_mannwhitneyu_one_sided(self):
u1, p1 = stats.mannwhitneyu(self.X, self.Y, alternative='less')
u2, p2 = stats.mannwhitneyu(self.Y, self.X, alternative='greater')
u3, p3 = stats.mannwhitneyu(self.X, self.Y, alternative='greater')
u4, p4 = stats.mannwhitneyu(self.Y, self.X, alternative='less')

assert_equal(p1, p2)
assert_equal(p3, p4)
assert_(p1 != p3)
assert_equal(u1, 498)
assert_equal(u2, 102)
assert_equal(u3, 498)
assert_equal(u4, 102)
assert_approx_equal(p1, 0.999957683256589, significant=self.significant)
assert_approx_equal(p3, 4.5941632666275e-05, significant=self.significant)

def test_mannwhitneyu_greater(self):
u1, p1 = stats.mannwhitneyu(self.X, self.Y, alternative='greater')
u2, p2 = stats.mannwhitneyu(self.Y, self.X, alternative='less')
def test_mannwhitneyu_two_sided(self):
u1, p1 = stats.mannwhitneyu(self.X, self.Y, alternative='two-sided')
u2, p2 = stats.mannwhitneyu(self.Y, self.X, alternative='two-sided')

assert_equal(p1, p2)
assert_equal(u1, 498)
assert_equal(u2, 102)
assert_approx_equal(p1, 4.5941632666275e-05,
assert_approx_equal(p1, 9.188326533255e-05,
significant=self.significant)

def test_mannwhitneyu_two_sided(self):
u1, p1 = stats.mannwhitneyu(self.X, self.Y, alternative='two-sided')
u2, p2 = stats.mannwhitneyu(self.Y, self.X) # two-sided is default
def test_mannwhitneyu_default(self):
# The default value for alternative is None
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
u1, p1 = stats.mannwhitneyu(self.X, self.Y)
u2, p2 = stats.mannwhitneyu(self.Y, self.X)
u3, p3 = stats.mannwhitneyu(self.X, self.Y, alternative=None)

assert_equal(p1, p2)
assert_equal(u1, 498)
assert_equal(p1, p3)
assert_equal(u1, 102)
assert_equal(u2, 102)
assert_approx_equal(p1, 9.188326533255e-05,
assert_equal(u3, 102)
assert_approx_equal(p1, 4.5941632666275e-05,
significant=self.significant)

def test_mannwhitneyu_no_correct_less(self):
u1, p1 = stats.mannwhitneyu(self.X, self.Y, False, alternative='less')
def test_mannwhitneyu_no_correct_one_sided(self):
u1, p1 = stats.mannwhitneyu(self.X, self.Y, False,
alternative='less')
u2, p2 = stats.mannwhitneyu(self.Y, self.X, False,
alternative='greater')
u3, p3 = stats.mannwhitneyu(self.X, self.Y, False,
alternative='greater')
u4, p4 = stats.mannwhitneyu(self.Y, self.X, False,
alternative='less')

assert_equal(p1, p2)
assert_equal(p3, p4)
assert_(p1 != p3)
assert_equal(u1, 498)
assert_equal(u2, 102)
assert_equal(u3, 498)
assert_equal(u4, 102)
assert_approx_equal(p1, 0.999955905990004, significant=self.significant)
assert_approx_equal(p3, 4.40940099958089e-05, significant=self.significant)

def test_mannwhitneyu_no_correct_greater(self):
def test_mannwhitneyu_no_correct_two_sided(self):
u1, p1 = stats.mannwhitneyu(self.X, self.Y, False,
alternative='greater')
u2, p2 = stats.mannwhitneyu(self.Y, self.X, False, alternative='less')
alternative='two-sided')
u2, p2 = stats.mannwhitneyu(self.Y, self.X, False,
alternative='two-sided')

assert_equal(p1, p2)
assert_equal(u1, 498)
assert_equal(u2, 102)
assert_approx_equal(p1, 4.40940099958089e-05,
assert_approx_equal(p1, 8.81880199916178e-05,
significant=self.significant)

def test_mannwhitneyu_no_correct_two_sided(self):
u1, p1 = stats.mannwhitneyu(self.X, self.Y, False,
alternative='two-sided')
u2, p2 = stats.mannwhitneyu(self.Y, self.X, False,)
def test_mannwhitneyu_no_correct_default(self):
# The default value for alternative is None
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
u1, p1 = stats.mannwhitneyu(self.X, self.Y, False)
u2, p2 = stats.mannwhitneyu(self.Y, self.X, False)
u3, p3 = stats.mannwhitneyu(self.X, self.Y, False,
alternative=None)

assert_equal(p1, p2)
assert_equal(u1, 498)
assert_equal(p1, p3)
assert_equal(u1, 102)
assert_equal(u2, 102)
assert_approx_equal(p1, 8.81880199916178e-05,
assert_equal(u3, 102)
assert_approx_equal(p1, 4.40940099958089e-05,
significant=self.significant)

def test_mannwhitneyu_ones(self):
Expand Down Expand Up @@ -2832,7 +2862,7 @@ def test_mannwhitneyu_ones(self):
(16980.5, 2.8214327656317373e-005),
decimal=12)

def test_mannwhitneyu_result_attribuets(self):
def test_mannwhitneyu_result_attributes(self):
# test for namedtuple attribute results
attributes = ('statistic', 'pvalue')
res = stats.mannwhitneyu(self.X, self.Y)
Expand Down

0 comments on commit dccd070

Please sign in to comment.