ENH: add prob(x1 > x2) effect size measure, inference based on brunne…

…r munzel
statsmodels · Aug 18, 2020 · 1cbcdfb · 1cbcdfb
1 parent 1c82e3a
commit 1cbcdfb
Show file tree

Hide file tree

Showing 2 changed files with 345 additions and 2 deletions.
diff --git a/statsmodels/stats/nonparametric.py b/statsmodels/stats/nonparametric.py
@@ -0,0 +1,252 @@
+# -*- coding: utf-8 -*-
+"""
+Rank based methods for inferential statistics
+
+Created on Sat Aug 15 10:18:53 2020
+
+Author: Josef Perktold
+License: BSD-3
+
+"""
+
+
+import numpy as np
+
+from scipy import stats
+from scipy.stats import rankdata
+
+from statsmodels.stats.base import HolderTuple
+from statsmodels.stats.weightstats import (
+    _zconfint_generic, _tconfint_generic, _zstat_generic, _tstat_generic)
+
+
+class BrunnerMunzelResult(HolderTuple):
+    """Results for rank comparison
+    """
+
+    def conf_int(self, alpha=0.05, value=None, alternative="two-sided"):
+
+        p0 = value
+        if p0 is None:
+            p0 = 0
+        diff = self.prob1 - p0
+        std_diff = np.sqrt(self.var / self.nobs)
+
+        if self.df is None:
+            return _zconfint_generic(diff, std_diff, alpha, alternative)
+        else:
+            return _tconfint_generic(diff, std_diff, self.df, alpha,
+                                     alternative)
+
+    def test_prob_superior(self, value=0.5, alternative="two-sided"):
+        """test for superiority probability
+
+        H0: P(x1 > x2) + 0.5 * P(x1 = x2) = value
+        """
+
+        p0 = value  # alias
+        diff = self.prob1 - p0
+        # TODO: use var_prob
+        std_diff = np.sqrt(self.var / self.nobs)
+
+        # TODO: return HolderTuple
+        # corresponds to a one-sample test and either p0 or diff could be used
+        if self.df is None:
+            return _zstat_generic(self.prob1, p0, std_diff, alternative,
+                                  diff=0)
+        else:
+            return _tstat_generic(self.prob1, p0, std_diff, self.df,
+                                  alternative, diff=0)
+
+    def tost_prob_superior(self, low, upp):
+        '''test of stochastic (non-)equivalence of p = P(x1 > x2)
+
+        null hypothesis:  p < low or p > upp
+        alternative hypothesis:  low < p < upp
+
+        where p is the probability that a random draw from the population of
+        the first sample has a larger value than a random draw from the
+        population of the second sample, specifically
+
+            p = P(x1 > x2) + 0.5 * P(x1 = x2)
+
+        If the pvalue is smaller than a threshold, say 0.05, then we reject the
+        hypothesis that the probability p that distribution 1 is stochastically
+        superior to distribution 2 is outside of the interval given by
+        thresholds low and upp.
+
+        Parameters
+        ----------
+        low, upp : float
+            equivalence interval low < mean < upp
+
+        Returns
+        -------
+        pvalue : float
+            pvalue of the non-equivalence test
+        t1, pv1, df1 : tuple
+            test statistic, pvalue and degrees of freedom for lower threshold
+            test
+        t2, pv2, df2 : tuple
+            test statistic, pvalue and degrees of freedom for upper threshold
+            test
+
+        '''
+
+        t1, pv1 = self.test_prob_superior(low, alternative='larger')
+        t2, pv2 = self.test_prob_superior(upp, alternative='smaller')
+        df1 = df2 = None
+        # TODO: return HolderTuple
+        return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2)
+
+
+def brunnermunzel(x, y, alternative="two-sided", distribution="t",
+                  nan_policy='propagate'):
+    """
+    Compute the Brunner-Munzel test on samples x and y.
+    The Brunner-Munzel test is a nonparametric test of the null hypothesis that
+    when values are taken one by one from each group, the probabilities of
+    getting large values in both groups are equal.
+    Unlike the Wilcoxon-Mann-Whitney's U test, this does not require the
+    assumption of equivariance of two groups. Note that this does not assume
+    the distributions are same. This test works on two independent samples,
+    which may have different sizes.
+    Parameters
+    ----------
+    x, y : array_like
+        Array of samples, should be one-dimensional.
+    alternative : {'two-sided', 'less', 'greater'}, optional
+        Defines the alternative hypothesis.
+        The following options are available (default is 'two-sided'):
+          * 'two-sided'
+          * 'less': one-sided
+          * 'greater': one-sided
+    distribution : {'t', 'normal'}, optional
+        Defines how to get the p-value.
+        The following options are available (default is 't'):
+          * 't': get the p-value by t-distribution
+          * 'normal': get the p-value by standard normal distribution.
+    nan_policy : {'propagate', 'raise', 'omit'}, optional
+        Defines how to handle when input contains nan.
+        The following options are available (default is 'propagate'):
+          * 'propagate': returns nan
+          * 'raise': throws an error
+          * 'omit': performs the calculations ignoring nan values
+    Returns
+    -------
+    statistic : float
+        The Brunner-Munzer W statistic.
+    pvalue : float
+        p-value assuming an t distribution. One-sided or
+        two-sided, depending on the choice of `alternative` and `distribution`.
+    See Also
+    --------
+    mannwhitneyu : Mann-Whitney rank test on two samples.
+    Notes
+    -----
+    Brunner and Munzel recommended to estimate the p-value by t-distribution
+    when the size of data is 50 or less. If the size is lower than 10, it would
+    be better to use permuted Brunner Munzel test (see [2]_).
+    References
+    ----------
+    .. [1] Brunner, E. and Munzel, U. "The nonparametric Benhrens-Fisher
+           problem: Asymptotic theory and a small-sample approximation".
+           Biometrical Journal. Vol. 42(2000): 17-25.
+    .. [2] Neubert, K. and Brunner, E. "A studentized permutation test for the
+           non-parametric Behrens-Fisher problem". Computational Statistics and
+           Data Analysis. Vol. 51(2007): 5192-5204.
+    Examples
+    --------
+    >>> from scipy import stats
+    >>> x1 = [1,2,1,1,1,1,1,1,1,1,2,4,1,1]
+    >>> x2 = [3,3,4,3,1,2,3,1,1,5,4]
+    >>> w, p_value = stats.brunnermunzel(x1, x2)
+    >>> w
+    3.1374674823029505
+    >>> p_value
+    0.0057862086661515377
+    """
+    x = np.asarray(x)
+    y = np.asarray(y)
+
+#    # check both x and y
+#    cnx, npx = _contains_nan(x, nan_policy)
+#    cny, npy = _contains_nan(y, nan_policy)
+#    contains_nan = cnx or cny
+#    if npx == "omit" or npy == "omit":
+#        nan_policy = "omit"
+
+#    if contains_nan and nan_policy == "propagate":
+#        return BrunnerMunzelResult(np.nan, np.nan)
+#    elif contains_nan and nan_policy == "omit":
+#        x = ma.masked_invalid(x)
+#        y = ma.masked_invalid(y)
+#        return mstats_basic.brunnermunzel(x, y, alternative, distribution)
+
+    nx = len(x)
+    ny = len(y)
+    nobs = nx + ny
+    if nx == 0 or ny == 0:
+        return BrunnerMunzelResult(np.nan, np.nan)
+    rankc = rankdata(np.concatenate((x, y)))
+    rankcx = rankc[0:nx]
+    rankcy = rankc[nx:nx+ny]
+    rankcx_mean = np.mean(rankcx)
+    rankcy_mean = np.mean(rankcy)
+    rankx = rankdata(x)
+    ranky = rankdata(y)
+    rankx_mean = np.mean(rankx)
+    ranky_mean = np.mean(ranky)
+
+    Sx = np.sum(np.power(rankcx - rankx - rankcx_mean + rankx_mean, 2.0))
+    Sx /= nx - 1
+    Sy = np.sum(np.power(rankcy - ranky - rankcy_mean + ranky_mean, 2.0))
+    Sy /= ny - 1
+
+    wbfn = nx * ny * (rankcy_mean - rankcx_mean)
+    wbfn /= (nx + ny) * np.sqrt(nx * Sx + ny * Sy)
+
+    if distribution == "t":
+        df_numer = np.power(nx * Sx + ny * Sy, 2.0)
+        df_denom = np.power(nx * Sx, 2.0) / (nx - 1)
+        df_denom += np.power(ny * Sy, 2.0) / (ny - 1)
+        df = df_numer / df_denom
+        p = stats.t.cdf(wbfn, df)
+    elif distribution == "normal":
+        p = stats.norm.cdf(wbfn)
+        df = None
+    else:
+        raise ValueError(
+            "distribution should be 't' or 'normal'")
+
+    if alternative == "greater":
+        pass
+    elif alternative == "less":
+        p = 1 - p
+    elif alternative == "two-sided":
+        p = 2 * np.min([p, 1-p])
+    else:
+        raise ValueError(
+            "alternative should be 'less', 'greater' or 'two-sided'")
+
+    # other info
+    nobs1, nobs2 = nx, ny   # rename
+    mean1 = rankcx_mean
+    mean2 = rankcy_mean
+
+    var1 = Sx / (nobs - nx)**2
+    var2 = Sy / (nobs - ny)**2
+    var_prob = (var1 / nobs1 + var2 / nobs2)
+    var = nobs * (var1 / nobs1 + var2 / nobs2)
+    prob1 = (mean1 - (nobs1 + 1) / 2) / nobs2
+    prob2 = (mean2 - (nobs2 + 1) / 2) / nobs1
+
+    return BrunnerMunzelResult(statistic=wbfn, pvalue=p, x1=Sx, s2=Sy,
+                               var1=var1, var2=var2, var=var,
+                               var_prob=var_prob,
+                               nobs1=nx, nobs2=ny, nobs=nobs,
+                               mean1=mean1, mean2=mean2,
+                               prob1=prob1, prob2=prob2,
+                               somersd1=prob1 * 2 - 1, somersd2=prob2 * 2 - 1,
+                               df=df
+                               )
diff --git a/statsmodels/stats/tests/test_nonparametric.py b/statsmodels/stats/tests/test_nonparametric.py
@@ -2,18 +2,23 @@
 """
 
 Created on Fri Jul 05 14:05:24 2013
+Aug 15 2020: add brunnermunzel, rank_compare_2indep
 
 Author: Josef Perktold
 """
 from statsmodels.compat.python import lzip
 import numpy as np
-from numpy.testing import assert_allclose, assert_almost_equal
+from numpy.testing import (assert_allclose, assert_almost_equal,
+                           assert_approx_equal, assert_)
 import pytest
 
-from statsmodels.stats.contingency_tables import mcnemar, cochrans_q, SquareTable
+from statsmodels.stats.contingency_tables import (
+    mcnemar, cochrans_q, SquareTable)
 from statsmodels.sandbox.stats.runs import (Runs,
                                             runstest_1samp, runstest_2samp)
 from statsmodels.sandbox.stats.runs import mcnemar as sbmcnemar
+from statsmodels.stats.nonparametric import brunnermunzel
+from statsmodels.tools.testing import Holder
 
 
 def _expand_table(table):
@@ -274,3 +279,89 @@ def test_runstest_2sample():
     # check cutoff
     res2_1s = runstest_1samp(xy, xy.mean())
     assert_allclose(res2_1s, res_1s, rtol=1e-6)
+
+
+def test_brunnermunzel_one_sided():
+    # copied from scipy with adjustment
+    x = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1]
+    y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4]
+    significant = 13
+
+    # Results are compared with R's lawstat package.
+    u1, p1 = brunnermunzel(x, y, alternative='less')
+    u2, p2 = brunnermunzel(y, x, alternative='greater')
+    u3, p3 = brunnermunzel(x, y, alternative='greater')
+    u4, p4 = brunnermunzel(y, x, alternative='less')
+
+    assert_approx_equal(p1, p2, significant=significant)
+    assert_approx_equal(p3, p4, significant=significant)
+    assert_(p1 != p3)
+    assert_approx_equal(u1, 3.1374674823029505,
+                        significant=significant)
+    assert_approx_equal(u2, -3.1374674823029505,
+                        significant=significant)
+    assert_approx_equal(u3, 3.1374674823029505,
+                        significant=significant)
+    assert_approx_equal(u4, -3.1374674823029505,
+                        significant=significant)
+    assert_approx_equal(p1, 0.0028931043330757342,
+                        significant=significant)
+    assert_approx_equal(p3, 0.99710689566692423,
+                        significant=significant)
+
+
+def test_brunnermunzel_two_sided():
+    # copied from scipy with adjustment
+    x = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1]
+    y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4]
+    significant = 13
+
+    # Results are compared with R's lawstat package.
+    u1, p1 = brunnermunzel(x, y, alternative='two-sided')
+    u2, p2 = brunnermunzel(y, x, alternative='two-sided')
+
+    assert_approx_equal(p1, p2, significant=significant)
+    assert_approx_equal(u1, 3.1374674823029505,
+                        significant=significant)
+    assert_approx_equal(u2, -3.1374674823029505,
+                        significant=significant)
+    assert_approx_equal(p1, 0.0057862086661515377,
+                        significant=significant)
+
+
+def test_rank_compare1():
+    # Example from Munzel and Hauschke 2003
+    # data is given by counts, expand to observations
+    levels = [-2, -1, 0, 1, 2]
+    new = [24, 37, 21, 19, 6]
+    active = [11, 51, 22, 21, 7]
+    x1 = np.repeat(levels, new)
+    x2 = np.repeat(levels, active)
+
+    # using lawstat
+    # > brunner.munzel.test(xn, xa) #brunnermunzel.test(x, y)
+    res2_t = Holder(statistic=1.1757561456582,
+                    df=204.2984239868,
+                    pvalue=0.2410606649547,
+                    ci=[0.4700629827705593, 0.6183882855872511],
+                    prob=0.5442256341789052)
+
+    res = brunnermunzel(x1, x2, distribution="normal")
+    assert_allclose(res.statistic, res2_t.statistic)
+    tt = res.test_prob_superior()
+    # TODO: return HolderTuple
+    # assert_allclose(tt.statistic, res2_t.statistic)
+    # TODO: check sign/direction in lawstat
+    assert_allclose(tt[0], -res2_t.statistic)
+
+    ci = res.conf_int(alpha=0.05)
+    # test consistency of test and confint
+    res_lb = res.test_prob_superior(value=ci[0])
+    assert_allclose(res_lb[1], 0.05, rtol=1e-13)
+    res_ub = res.test_prob_superior(value=ci[1])
+    assert_allclose(res_ub[1], 0.05, rtol=1e-13)
+
+    # test consistency of tost and confint
+    res_tost = res.tost_prob_superior(*ci)
+    assert_allclose(res_tost[1][1], 0.025, rtol=1e-13)
+    assert_allclose(res_tost[2][1], 0.025, rtol=1e-13)