Skip to content

Commit

Permalink
ENH: add prob(x1 > x2) effect size measure, inference based on brunne…
Browse files Browse the repository at this point in the history
…r munzel
  • Loading branch information
josef-pkt committed Aug 18, 2020
1 parent 1c82e3a commit 1cbcdfb
Show file tree
Hide file tree
Showing 2 changed files with 345 additions and 2 deletions.
252 changes: 252 additions & 0 deletions statsmodels/stats/nonparametric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
# -*- coding: utf-8 -*-
"""
Rank based methods for inferential statistics
Created on Sat Aug 15 10:18:53 2020
Author: Josef Perktold
License: BSD-3
"""


import numpy as np

from scipy import stats
from scipy.stats import rankdata

from statsmodels.stats.base import HolderTuple
from statsmodels.stats.weightstats import (
_zconfint_generic, _tconfint_generic, _zstat_generic, _tstat_generic)


class BrunnerMunzelResult(HolderTuple):
"""Results for rank comparison
"""

def conf_int(self, alpha=0.05, value=None, alternative="two-sided"):

p0 = value
if p0 is None:
p0 = 0
diff = self.prob1 - p0
std_diff = np.sqrt(self.var / self.nobs)

if self.df is None:
return _zconfint_generic(diff, std_diff, alpha, alternative)
else:
return _tconfint_generic(diff, std_diff, self.df, alpha,
alternative)

def test_prob_superior(self, value=0.5, alternative="two-sided"):
"""test for superiority probability
H0: P(x1 > x2) + 0.5 * P(x1 = x2) = value
"""

p0 = value # alias
diff = self.prob1 - p0
# TODO: use var_prob
std_diff = np.sqrt(self.var / self.nobs)

# TODO: return HolderTuple
# corresponds to a one-sample test and either p0 or diff could be used
if self.df is None:
return _zstat_generic(self.prob1, p0, std_diff, alternative,
diff=0)
else:
return _tstat_generic(self.prob1, p0, std_diff, self.df,
alternative, diff=0)

def tost_prob_superior(self, low, upp):
'''test of stochastic (non-)equivalence of p = P(x1 > x2)
null hypothesis: p < low or p > upp
alternative hypothesis: low < p < upp
where p is the probability that a random draw from the population of
the first sample has a larger value than a random draw from the
population of the second sample, specifically
p = P(x1 > x2) + 0.5 * P(x1 = x2)
If the pvalue is smaller than a threshold, say 0.05, then we reject the
hypothesis that the probability p that distribution 1 is stochastically
superior to distribution 2 is outside of the interval given by
thresholds low and upp.
Parameters
----------
low, upp : float
equivalence interval low < mean < upp
Returns
-------
pvalue : float
pvalue of the non-equivalence test
t1, pv1, df1 : tuple
test statistic, pvalue and degrees of freedom for lower threshold
test
t2, pv2, df2 : tuple
test statistic, pvalue and degrees of freedom for upper threshold
test
'''

t1, pv1 = self.test_prob_superior(low, alternative='larger')
t2, pv2 = self.test_prob_superior(upp, alternative='smaller')
df1 = df2 = None
# TODO: return HolderTuple
return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2)


def brunnermunzel(x, y, alternative="two-sided", distribution="t",
nan_policy='propagate'):
"""
Compute the Brunner-Munzel test on samples x and y.
The Brunner-Munzel test is a nonparametric test of the null hypothesis that
when values are taken one by one from each group, the probabilities of
getting large values in both groups are equal.
Unlike the Wilcoxon-Mann-Whitney's U test, this does not require the
assumption of equivariance of two groups. Note that this does not assume
the distributions are same. This test works on two independent samples,
which may have different sizes.
Parameters
----------
x, y : array_like
Array of samples, should be one-dimensional.
alternative : {'two-sided', 'less', 'greater'}, optional
Defines the alternative hypothesis.
The following options are available (default is 'two-sided'):
* 'two-sided'
* 'less': one-sided
* 'greater': one-sided
distribution : {'t', 'normal'}, optional
Defines how to get the p-value.
The following options are available (default is 't'):
* 't': get the p-value by t-distribution
* 'normal': get the p-value by standard normal distribution.
nan_policy : {'propagate', 'raise', 'omit'}, optional
Defines how to handle when input contains nan.
The following options are available (default is 'propagate'):
* 'propagate': returns nan
* 'raise': throws an error
* 'omit': performs the calculations ignoring nan values
Returns
-------
statistic : float
The Brunner-Munzer W statistic.
pvalue : float
p-value assuming an t distribution. One-sided or
two-sided, depending on the choice of `alternative` and `distribution`.
See Also
--------
mannwhitneyu : Mann-Whitney rank test on two samples.
Notes
-----
Brunner and Munzel recommended to estimate the p-value by t-distribution
when the size of data is 50 or less. If the size is lower than 10, it would
be better to use permuted Brunner Munzel test (see [2]_).
References
----------
.. [1] Brunner, E. and Munzel, U. "The nonparametric Benhrens-Fisher
problem: Asymptotic theory and a small-sample approximation".
Biometrical Journal. Vol. 42(2000): 17-25.
.. [2] Neubert, K. and Brunner, E. "A studentized permutation test for the
non-parametric Behrens-Fisher problem". Computational Statistics and
Data Analysis. Vol. 51(2007): 5192-5204.
Examples
--------
>>> from scipy import stats
>>> x1 = [1,2,1,1,1,1,1,1,1,1,2,4,1,1]
>>> x2 = [3,3,4,3,1,2,3,1,1,5,4]
>>> w, p_value = stats.brunnermunzel(x1, x2)
>>> w
3.1374674823029505
>>> p_value
0.0057862086661515377
"""
x = np.asarray(x)
y = np.asarray(y)

# # check both x and y
# cnx, npx = _contains_nan(x, nan_policy)
# cny, npy = _contains_nan(y, nan_policy)
# contains_nan = cnx or cny
# if npx == "omit" or npy == "omit":
# nan_policy = "omit"

# if contains_nan and nan_policy == "propagate":
# return BrunnerMunzelResult(np.nan, np.nan)
# elif contains_nan and nan_policy == "omit":
# x = ma.masked_invalid(x)
# y = ma.masked_invalid(y)
# return mstats_basic.brunnermunzel(x, y, alternative, distribution)

nx = len(x)
ny = len(y)
nobs = nx + ny
if nx == 0 or ny == 0:
return BrunnerMunzelResult(np.nan, np.nan)
rankc = rankdata(np.concatenate((x, y)))
rankcx = rankc[0:nx]
rankcy = rankc[nx:nx+ny]
rankcx_mean = np.mean(rankcx)
rankcy_mean = np.mean(rankcy)
rankx = rankdata(x)
ranky = rankdata(y)
rankx_mean = np.mean(rankx)
ranky_mean = np.mean(ranky)

Sx = np.sum(np.power(rankcx - rankx - rankcx_mean + rankx_mean, 2.0))
Sx /= nx - 1
Sy = np.sum(np.power(rankcy - ranky - rankcy_mean + ranky_mean, 2.0))
Sy /= ny - 1

wbfn = nx * ny * (rankcy_mean - rankcx_mean)
wbfn /= (nx + ny) * np.sqrt(nx * Sx + ny * Sy)

if distribution == "t":
df_numer = np.power(nx * Sx + ny * Sy, 2.0)
df_denom = np.power(nx * Sx, 2.0) / (nx - 1)
df_denom += np.power(ny * Sy, 2.0) / (ny - 1)
df = df_numer / df_denom
p = stats.t.cdf(wbfn, df)
elif distribution == "normal":
p = stats.norm.cdf(wbfn)
df = None
else:
raise ValueError(
"distribution should be 't' or 'normal'")

if alternative == "greater":
pass
elif alternative == "less":
p = 1 - p
elif alternative == "two-sided":
p = 2 * np.min([p, 1-p])
else:
raise ValueError(
"alternative should be 'less', 'greater' or 'two-sided'")

# other info
nobs1, nobs2 = nx, ny # rename
mean1 = rankcx_mean
mean2 = rankcy_mean

var1 = Sx / (nobs - nx)**2
var2 = Sy / (nobs - ny)**2
var_prob = (var1 / nobs1 + var2 / nobs2)
var = nobs * (var1 / nobs1 + var2 / nobs2)
prob1 = (mean1 - (nobs1 + 1) / 2) / nobs2
prob2 = (mean2 - (nobs2 + 1) / 2) / nobs1

return BrunnerMunzelResult(statistic=wbfn, pvalue=p, x1=Sx, s2=Sy,
var1=var1, var2=var2, var=var,
var_prob=var_prob,
nobs1=nx, nobs2=ny, nobs=nobs,
mean1=mean1, mean2=mean2,
prob1=prob1, prob2=prob2,
somersd1=prob1 * 2 - 1, somersd2=prob2 * 2 - 1,
df=df
)
95 changes: 93 additions & 2 deletions statsmodels/stats/tests/test_nonparametric.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,23 @@
"""
Created on Fri Jul 05 14:05:24 2013
Aug 15 2020: add brunnermunzel, rank_compare_2indep
Author: Josef Perktold
"""
from statsmodels.compat.python import lzip
import numpy as np
from numpy.testing import assert_allclose, assert_almost_equal
from numpy.testing import (assert_allclose, assert_almost_equal,
assert_approx_equal, assert_)
import pytest

from statsmodels.stats.contingency_tables import mcnemar, cochrans_q, SquareTable
from statsmodels.stats.contingency_tables import (
mcnemar, cochrans_q, SquareTable)
from statsmodels.sandbox.stats.runs import (Runs,
runstest_1samp, runstest_2samp)
from statsmodels.sandbox.stats.runs import mcnemar as sbmcnemar
from statsmodels.stats.nonparametric import brunnermunzel
from statsmodels.tools.testing import Holder


def _expand_table(table):
Expand Down Expand Up @@ -274,3 +279,89 @@ def test_runstest_2sample():
# check cutoff
res2_1s = runstest_1samp(xy, xy.mean())
assert_allclose(res2_1s, res_1s, rtol=1e-6)


def test_brunnermunzel_one_sided():
# copied from scipy with adjustment
x = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1]
y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4]
significant = 13

# Results are compared with R's lawstat package.
u1, p1 = brunnermunzel(x, y, alternative='less')
u2, p2 = brunnermunzel(y, x, alternative='greater')
u3, p3 = brunnermunzel(x, y, alternative='greater')
u4, p4 = brunnermunzel(y, x, alternative='less')

assert_approx_equal(p1, p2, significant=significant)
assert_approx_equal(p3, p4, significant=significant)
assert_(p1 != p3)
assert_approx_equal(u1, 3.1374674823029505,
significant=significant)
assert_approx_equal(u2, -3.1374674823029505,
significant=significant)
assert_approx_equal(u3, 3.1374674823029505,
significant=significant)
assert_approx_equal(u4, -3.1374674823029505,
significant=significant)
assert_approx_equal(p1, 0.0028931043330757342,
significant=significant)
assert_approx_equal(p3, 0.99710689566692423,
significant=significant)


def test_brunnermunzel_two_sided():
# copied from scipy with adjustment
x = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1]
y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4]
significant = 13

# Results are compared with R's lawstat package.
u1, p1 = brunnermunzel(x, y, alternative='two-sided')
u2, p2 = brunnermunzel(y, x, alternative='two-sided')

assert_approx_equal(p1, p2, significant=significant)
assert_approx_equal(u1, 3.1374674823029505,
significant=significant)
assert_approx_equal(u2, -3.1374674823029505,
significant=significant)
assert_approx_equal(p1, 0.0057862086661515377,
significant=significant)


def test_rank_compare1():
# Example from Munzel and Hauschke 2003
# data is given by counts, expand to observations
levels = [-2, -1, 0, 1, 2]
new = [24, 37, 21, 19, 6]
active = [11, 51, 22, 21, 7]
x1 = np.repeat(levels, new)
x2 = np.repeat(levels, active)

# using lawstat
# > brunner.munzel.test(xn, xa) #brunnermunzel.test(x, y)
res2_t = Holder(statistic=1.1757561456582,
df=204.2984239868,
pvalue=0.2410606649547,
ci=[0.4700629827705593, 0.6183882855872511],
prob=0.5442256341789052)

res = brunnermunzel(x1, x2, distribution="normal")
assert_allclose(res.statistic, res2_t.statistic)
tt = res.test_prob_superior()
# TODO: return HolderTuple
# assert_allclose(tt.statistic, res2_t.statistic)
# TODO: check sign/direction in lawstat
assert_allclose(tt[0], -res2_t.statistic)

ci = res.conf_int(alpha=0.05)
# test consistency of test and confint
res_lb = res.test_prob_superior(value=ci[0])
assert_allclose(res_lb[1], 0.05, rtol=1e-13)
res_ub = res.test_prob_superior(value=ci[1])
assert_allclose(res_ub[1], 0.05, rtol=1e-13)

# test consistency of tost and confint
res_tost = res.tost_prob_superior(*ci)
assert_allclose(res_tost[1][1], 0.025, rtol=1e-13)
assert_allclose(res_tost[2][1], 0.025, rtol=1e-13)

0 comments on commit 1cbcdfb

Please sign in to comment.