From 6ecda5f88d93192ac1e217b7f9957ce7378a4e5b Mon Sep 17 00:00:00 2001 From: Josef Perktold Date: Wed, 19 Aug 2020 14:58:20 -0400 Subject: [PATCH 1/6] ENH: add prob(x1 > x2) effect size measure, inference based on brunner munzel --- statsmodels/stats/nonparametric.py | 252 ++++++++++++++++++ statsmodels/stats/tests/test_nonparametric.py | 124 ++++++++- 2 files changed, 374 insertions(+), 2 deletions(-) create mode 100644 statsmodels/stats/nonparametric.py diff --git a/statsmodels/stats/nonparametric.py b/statsmodels/stats/nonparametric.py new file mode 100644 index 00000000000..8513bd03967 --- /dev/null +++ b/statsmodels/stats/nonparametric.py @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- +""" +Rank based methods for inferential statistics + +Created on Sat Aug 15 10:18:53 2020 + +Author: Josef Perktold +License: BSD-3 + +""" + + +import numpy as np + +from scipy import stats +from scipy.stats import rankdata + +from statsmodels.stats.base import HolderTuple +from statsmodels.stats.weightstats import ( + _zconfint_generic, _tconfint_generic, _zstat_generic, _tstat_generic) + + +class BrunnerMunzelResult(HolderTuple): + """Results for rank comparison + """ + + def conf_int(self, alpha=0.05, value=None, alternative="two-sided"): + + p0 = value + if p0 is None: + p0 = 0 + diff = self.prob1 - p0 + std_diff = np.sqrt(self.var / self.nobs) + + if self.df is None: + return _zconfint_generic(diff, std_diff, alpha, alternative) + else: + return _tconfint_generic(diff, std_diff, self.df, alpha, + alternative) + + def test_prob_superior(self, value=0.5, alternative="two-sided"): + """test for superiority probability + + H0: P(x1 > x2) + 0.5 * P(x1 = x2) = value + """ + + p0 = value # alias + # diff = self.prob1 - p0 # for reporting, not used in computation + # TODO: use var_prob + std_diff = np.sqrt(self.var / self.nobs) + + # TODO: return HolderTuple + # corresponds to a one-sample test and either p0 or diff could be used + if self.df is None: + return _zstat_generic(self.prob1, p0, std_diff, alternative, + diff=0) + else: + return _tstat_generic(self.prob1, p0, std_diff, self.df, + alternative, diff=0) + + def tost_prob_superior(self, low, upp): + '''test of stochastic (non-)equivalence of p = P(x1 > x2) + + null hypothesis: p < low or p > upp + alternative hypothesis: low < p < upp + + where p is the probability that a random draw from the population of + the first sample has a larger value than a random draw from the + population of the second sample, specifically + + p = P(x1 > x2) + 0.5 * P(x1 = x2) + + If the pvalue is smaller than a threshold, say 0.05, then we reject the + hypothesis that the probability p that distribution 1 is stochastically + superior to distribution 2 is outside of the interval given by + thresholds low and upp. + + Parameters + ---------- + low, upp : float + equivalence interval low < mean < upp + + Returns + ------- + pvalue : float + pvalue of the non-equivalence test + t1, pv1, df1 : tuple + test statistic, pvalue and degrees of freedom for lower threshold + test + t2, pv2, df2 : tuple + test statistic, pvalue and degrees of freedom for upper threshold + test + + ''' + + t1, pv1 = self.test_prob_superior(low, alternative='larger') + t2, pv2 = self.test_prob_superior(upp, alternative='smaller') + df1 = df2 = None + # TODO: return HolderTuple + return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2) + + +def brunnermunzel(x, y, alternative="two-sided", distribution="t", + nan_policy='propagate'): + """ + Compute the Brunner-Munzel test on samples x and y. + The Brunner-Munzel test is a nonparametric test of the null hypothesis that + when values are taken one by one from each group, the probabilities of + getting large values in both groups are equal. + Unlike the Wilcoxon-Mann-Whitney's U test, this does not require the + assumption of equivariance of two groups. Note that this does not assume + the distributions are same. This test works on two independent samples, + which may have different sizes. + Parameters + ---------- + x, y : array_like + Array of samples, should be one-dimensional. + alternative : {'two-sided', 'less', 'greater'}, optional + Defines the alternative hypothesis. + The following options are available (default is 'two-sided'): + * 'two-sided' + * 'less': one-sided + * 'greater': one-sided + distribution : {'t', 'normal'}, optional + Defines how to get the p-value. + The following options are available (default is 't'): + * 't': get the p-value by t-distribution + * 'normal': get the p-value by standard normal distribution. + nan_policy : {'propagate', 'raise', 'omit'}, optional + Defines how to handle when input contains nan. + The following options are available (default is 'propagate'): + * 'propagate': returns nan + * 'raise': throws an error + * 'omit': performs the calculations ignoring nan values + Returns + ------- + statistic : float + The Brunner-Munzer W statistic. + pvalue : float + p-value assuming an t distribution. One-sided or + two-sided, depending on the choice of `alternative` and `distribution`. + See Also + -------- + mannwhitneyu : Mann-Whitney rank test on two samples. + Notes + ----- + Brunner and Munzel recommended to estimate the p-value by t-distribution + when the size of data is 50 or less. If the size is lower than 10, it would + be better to use permuted Brunner Munzel test (see [2]_). + References + ---------- + .. [1] Brunner, E. and Munzel, U. "The nonparametric Benhrens-Fisher + problem: Asymptotic theory and a small-sample approximation". + Biometrical Journal. Vol. 42(2000): 17-25. + .. [2] Neubert, K. and Brunner, E. "A studentized permutation test for the + non-parametric Behrens-Fisher problem". Computational Statistics and + Data Analysis. Vol. 51(2007): 5192-5204. + Examples + -------- + >>> from scipy import stats + >>> x1 = [1,2,1,1,1,1,1,1,1,1,2,4,1,1] + >>> x2 = [3,3,4,3,1,2,3,1,1,5,4] + >>> w, p_value = stats.brunnermunzel(x1, x2) + >>> w + 3.1374674823029505 + >>> p_value + 0.0057862086661515377 + """ + x = np.asarray(x) + y = np.asarray(y) + +# # check both x and y +# cnx, npx = _contains_nan(x, nan_policy) +# cny, npy = _contains_nan(y, nan_policy) +# contains_nan = cnx or cny +# if npx == "omit" or npy == "omit": +# nan_policy = "omit" + +# if contains_nan and nan_policy == "propagate": +# return BrunnerMunzelResult(np.nan, np.nan) +# elif contains_nan and nan_policy == "omit": +# x = ma.masked_invalid(x) +# y = ma.masked_invalid(y) +# return mstats_basic.brunnermunzel(x, y, alternative, distribution) + + nx = len(x) + ny = len(y) + nobs = nx + ny + if nx == 0 or ny == 0: + return BrunnerMunzelResult(np.nan, np.nan) + rankc = rankdata(np.concatenate((x, y))) + rankcx = rankc[0:nx] + rankcy = rankc[nx:nx+ny] + rankcx_mean = np.mean(rankcx) + rankcy_mean = np.mean(rankcy) + rankx = rankdata(x) + ranky = rankdata(y) + rankx_mean = np.mean(rankx) + ranky_mean = np.mean(ranky) + + Sx = np.sum(np.power(rankcx - rankx - rankcx_mean + rankx_mean, 2.0)) + Sx /= nx - 1 + Sy = np.sum(np.power(rankcy - ranky - rankcy_mean + ranky_mean, 2.0)) + Sy /= ny - 1 + + wbfn = nx * ny * (rankcy_mean - rankcx_mean) + wbfn /= (nx + ny) * np.sqrt(nx * Sx + ny * Sy) + + if distribution == "t": + df_numer = np.power(nx * Sx + ny * Sy, 2.0) + df_denom = np.power(nx * Sx, 2.0) / (nx - 1) + df_denom += np.power(ny * Sy, 2.0) / (ny - 1) + df = df_numer / df_denom + p = stats.t.cdf(wbfn, df) + elif distribution == "normal": + p = stats.norm.cdf(wbfn) + df = None + else: + raise ValueError( + "distribution should be 't' or 'normal'") + + if alternative == "greater": + pass + elif alternative == "less": + p = 1 - p + elif alternative == "two-sided": + p = 2 * np.min([p, 1-p]) + else: + raise ValueError( + "alternative should be 'less', 'greater' or 'two-sided'") + + # other info + nobs1, nobs2 = nx, ny # rename + mean1 = rankcx_mean + mean2 = rankcy_mean + + var1 = Sx / (nobs - nx)**2 + var2 = Sy / (nobs - ny)**2 + var_prob = (var1 / nobs1 + var2 / nobs2) + var = nobs * (var1 / nobs1 + var2 / nobs2) + prob1 = (mean1 - (nobs1 + 1) / 2) / nobs2 + prob2 = (mean2 - (nobs2 + 1) / 2) / nobs1 + + return BrunnerMunzelResult(statistic=wbfn, pvalue=p, x1=Sx, s2=Sy, + var1=var1, var2=var2, var=var, + var_prob=var_prob, + nobs1=nx, nobs2=ny, nobs=nobs, + mean1=mean1, mean2=mean2, + prob1=prob1, prob2=prob2, + somersd1=prob1 * 2 - 1, somersd2=prob2 * 2 - 1, + df=df + ) diff --git a/statsmodels/stats/tests/test_nonparametric.py b/statsmodels/stats/tests/test_nonparametric.py index 65ac6a54cf9..6552692b554 100644 --- a/statsmodels/stats/tests/test_nonparametric.py +++ b/statsmodels/stats/tests/test_nonparametric.py @@ -2,18 +2,23 @@ """ Created on Fri Jul 05 14:05:24 2013 +Aug 15 2020: add brunnermunzel, rank_compare_2indep Author: Josef Perktold """ from statsmodels.compat.python import lzip import numpy as np -from numpy.testing import assert_allclose, assert_almost_equal +from numpy.testing import (assert_allclose, assert_almost_equal, + assert_approx_equal, assert_) import pytest -from statsmodels.stats.contingency_tables import mcnemar, cochrans_q, SquareTable +from statsmodels.stats.contingency_tables import ( + mcnemar, cochrans_q, SquareTable) from statsmodels.sandbox.stats.runs import (Runs, runstest_1samp, runstest_2samp) from statsmodels.sandbox.stats.runs import mcnemar as sbmcnemar +from statsmodels.stats.nonparametric import brunnermunzel +from statsmodels.tools.testing import Holder def _expand_table(table): @@ -274,3 +279,118 @@ def test_runstest_2sample(): # check cutoff res2_1s = runstest_1samp(xy, xy.mean()) assert_allclose(res2_1s, res_1s, rtol=1e-6) + + +def test_brunnermunzel_one_sided(): + # copied from scipy with adjustment + x = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1] + y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4] + significant = 13 + + # Results are compared with R's lawstat package. + u1, p1 = brunnermunzel(x, y, alternative='less') + u2, p2 = brunnermunzel(y, x, alternative='greater') + u3, p3 = brunnermunzel(x, y, alternative='greater') + u4, p4 = brunnermunzel(y, x, alternative='less') + + assert_approx_equal(p1, p2, significant=significant) + assert_approx_equal(p3, p4, significant=significant) + assert_(p1 != p3) + assert_approx_equal(u1, 3.1374674823029505, + significant=significant) + assert_approx_equal(u2, -3.1374674823029505, + significant=significant) + assert_approx_equal(u3, 3.1374674823029505, + significant=significant) + assert_approx_equal(u4, -3.1374674823029505, + significant=significant) + assert_approx_equal(p1, 0.0028931043330757342, + significant=significant) + assert_approx_equal(p3, 0.99710689566692423, + significant=significant) + + +def test_brunnermunzel_two_sided(): + # copied from scipy with adjustment + x = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1] + y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4] + significant = 13 + + # Results are compared with R's lawstat package. + u1, p1 = brunnermunzel(x, y, alternative='two-sided') + u2, p2 = brunnermunzel(y, x, alternative='two-sided') + + assert_approx_equal(p1, p2, significant=significant) + assert_approx_equal(u1, 3.1374674823029505, + significant=significant) + assert_approx_equal(u2, -3.1374674823029505, + significant=significant) + assert_approx_equal(p1, 0.0057862086661515377, + significant=significant) + + +def test_rank_compare1(): + # Example from Munzel and Hauschke 2003 + # data is given by counts, expand to observations + levels = [-2, -1, 0, 1, 2] + new = [24, 37, 21, 19, 6] + active = [11, 51, 22, 21, 7] + x1 = np.repeat(levels, new) + x2 = np.repeat(levels, active) + + # using lawstat + # > brunner.munzel.test(xn, xa) #brunnermunzel.test(x, y) + res2_t = Holder(statistic=1.1757561456582, + df=204.2984239868, + pvalue=0.2410606649547, + ci=[0.4700629827705593, 0.6183882855872511], + prob=0.5442256341789052) + + res = brunnermunzel(x1, x2, distribution="normal") + assert_allclose(res.statistic, res2_t.statistic, rtol=1e-13) + assert_allclose(res.prob1, 1 - res2_t.prob, rtol=1e-13) + assert_allclose(res.prob2, res2_t.prob, rtol=1e-13) + tt = res.test_prob_superior() + # TODO: return HolderTuple + # assert_allclose(tt.statistic, res2_t.statistic) + # TODO: check sign/direction in lawstat + assert_allclose(tt[0], -res2_t.statistic, rtol=1e-13) + + ci = res.conf_int(alpha=0.05) + # we compare normal confint with t confint, lower rtol + assert_allclose(ci, 1 - np.array(res2_t.ci)[::-1], rtol=0.005) + # test consistency of test and confint + res_lb = res.test_prob_superior(value=ci[0]) + assert_allclose(res_lb[1], 0.05, rtol=1e-13) + res_ub = res.test_prob_superior(value=ci[1]) + assert_allclose(res_ub[1], 0.05, rtol=1e-13) + + # test consistency of tost and confint + res_tost = res.tost_prob_superior(*ci) + assert_allclose(res_tost[1][1], 0.025, rtol=1e-13) + assert_allclose(res_tost[2][1], 0.025, rtol=1e-13) + + res = brunnermunzel(x1, x2, distribution="t") + assert_allclose(res.statistic, res2_t.statistic, rtol=1e-13) + tt = res.test_prob_superior() + # TODO: return HolderTuple + # assert_allclose(tt.statistic, res2_t.statistic) + # TODO: check sign/direction in lawstat, reversed from ours + assert_allclose(tt[0], -res2_t.statistic, rtol=1e-13) + assert_allclose(tt[1], res2_t.pvalue, rtol=1e-13) + assert_allclose(res.pvalue, res2_t.pvalue, rtol=1e-13) + assert_allclose(res.df, res2_t.df, rtol=1e-13) + + ci = res.conf_int(alpha=0.05) + # our ranking is defined as reversed from lawstat, and BM article + assert_allclose(ci, 1 - np.array(res2_t.ci)[::-1], rtol=1e-11) + # test consistency of test and confint + res_lb = res.test_prob_superior(value=ci[0]) + assert_allclose(res_lb[1], 0.05, rtol=1e-11) + res_ub = res.test_prob_superior(value=ci[1]) + assert_allclose(res_ub[1], 0.05, rtol=1e-11) + + # test consistency of tost and confint + res_tost = res.tost_prob_superior(*ci) + assert_allclose(res_tost[1][1], 0.025, rtol=1e-11) + assert_allclose(res_tost[2][1], 0.025, rtol=1e-11) From 58d83059c13f5dd4a95f9b20e0f514b45862120b Mon Sep 17 00:00:00 2001 From: Josef Perktold Date: Fri, 21 Aug 2020 11:31:47 -0400 Subject: [PATCH 2/6] REF: rename function, adjust namings to statsmodels --- statsmodels/stats/nonparametric.py | 186 +++++++++--------- statsmodels/stats/tests/test_nonparametric.py | 35 ++-- 2 files changed, 118 insertions(+), 103 deletions(-) diff --git a/statsmodels/stats/nonparametric.py b/statsmodels/stats/nonparametric.py index 8513bd03967..b147c4fc143 100644 --- a/statsmodels/stats/nonparametric.py +++ b/statsmodels/stats/nonparametric.py @@ -20,7 +20,7 @@ _zconfint_generic, _tconfint_generic, _zstat_generic, _tstat_generic) -class BrunnerMunzelResult(HolderTuple): +class RankCompareResult(HolderTuple): """Results for rank comparison """ @@ -100,53 +100,76 @@ def tost_prob_superior(self, low, upp): return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2) -def brunnermunzel(x, y, alternative="two-sided", distribution="t", - nan_policy='propagate'): +def rank_compare(x1, x2, alternative="two-sided", use_t=True): """ - Compute the Brunner-Munzel test on samples x and y. - The Brunner-Munzel test is a nonparametric test of the null hypothesis that - when values are taken one by one from each group, the probabilities of - getting large values in both groups are equal. - Unlike the Wilcoxon-Mann-Whitney's U test, this does not require the - assumption of equivariance of two groups. Note that this does not assume - the distributions are same. This test works on two independent samples, - which may have different sizes. + Statistics and tests for the probability that x1 has larger values than x2. + + p is the probability that a random draw from the population of + the first sample has a larger value than a random draw from the + population of the second sample, specifically + + p = P(x1 > x2) + 0.5 * P(x1 = x2) + + This is a measure underlying Wilcoxon-Mann-Whitney's U test, + Fligner-Policello test and Brunner-Munzel test, and + Inference is based on the asymptotic distribution of the Brunner-Munzel + test. + + The Null hypothesis for stochastic equality is p = 0.5, which corresponds + to the Brunner-Munzel test. + Parameters ---------- - x, y : array_like + x1, x2 : array_like Array of samples, should be one-dimensional. alternative : {'two-sided', 'less', 'greater'}, optional Defines the alternative hypothesis. The following options are available (default is 'two-sided'): * 'two-sided' - * 'less': one-sided - * 'greater': one-sided - distribution : {'t', 'normal'}, optional - Defines how to get the p-value. - The following options are available (default is 't'): - * 't': get the p-value by t-distribution - * 'normal': get the p-value by standard normal distribution. - nan_policy : {'propagate', 'raise', 'omit'}, optional - Defines how to handle when input contains nan. - The following options are available (default is 'propagate'): - * 'propagate': returns nan - * 'raise': throws an error - * 'omit': performs the calculations ignoring nan values + * 'smaller': one-sided + * 'larger': one-sided + use_t : poolean + If use_t is true, the t distribution with Welch-Satterthwaite type + degrees of freedom is used for p-value and confidence interval. + If use_t is false, then the normal distribution is used. + Returns ------- - statistic : float - The Brunner-Munzer W statistic. - pvalue : float - p-value assuming an t distribution. One-sided or - two-sided, depending on the choice of `alternative` and `distribution`. + res : RankCompareResult + + statistic : float + The Brunner-Munzer W statistic. + pvalue : float + p-value assuming an t distribution. One-sided or + two-sided, depending on the choice of `alternative` and `use_t`. + + See Also -------- - mannwhitneyu : Mann-Whitney rank test on two samples. + scipy.stats.brunnermunzel : Brunner-Munzel test for stochastic equality + scipy.stats.mannwhitneyu : Mann-Whitney rank test on two samples. + Notes ----- + Wilcoxon-Mann-Whitney assumes equal variance or equal distribution under + the Null hypothesis. Fligner-Policello test allows for unequal variances + but assumes continuous distribution, i.e. no ties. + Brunner-Munzel extend the test to allow for unequal variance and discrete + or ordered categorical random variables. + Brunner and Munzel recommended to estimate the p-value by t-distribution when the size of data is 50 or less. If the size is lower than 10, it would be better to use permuted Brunner Munzel test (see [2]_). + + This measure has been introduced in the literature under many different + names relying on a variety of assumptions. + In psychology, ... introduced it as Common Language effect size for the + continuous, normal distribution case, ... extended it to the nonparameteric + continuous distribution case as in Fligner-Policello. + + Note: Brunner-Munzel define the probability for x1 to be stochastically + smaller than x2, while here we use stochastically larger. + References ---------- .. [1] Brunner, E. and Munzel, U. "The nonparametric Benhrens-Fisher @@ -166,87 +189,70 @@ def brunnermunzel(x, y, alternative="two-sided", distribution="t", >>> p_value 0.0057862086661515377 """ - x = np.asarray(x) - y = np.asarray(y) - -# # check both x and y -# cnx, npx = _contains_nan(x, nan_policy) -# cny, npy = _contains_nan(y, nan_policy) -# contains_nan = cnx or cny -# if npx == "omit" or npy == "omit": -# nan_policy = "omit" - -# if contains_nan and nan_policy == "propagate": -# return BrunnerMunzelResult(np.nan, np.nan) -# elif contains_nan and nan_policy == "omit": -# x = ma.masked_invalid(x) -# y = ma.masked_invalid(y) -# return mstats_basic.brunnermunzel(x, y, alternative, distribution) - - nx = len(x) - ny = len(y) - nobs = nx + ny - if nx == 0 or ny == 0: - return BrunnerMunzelResult(np.nan, np.nan) - rankc = rankdata(np.concatenate((x, y))) - rankcx = rankc[0:nx] - rankcy = rankc[nx:nx+ny] + x1 = np.asarray(x1) + x2 = np.asarray(x2) + + nobs1 = len(x1) + nobs2 = len(x2) + nobs = nobs1 + nobs2 + if nobs1 == 0 or nobs2 == 0: + return RankCompareResult(statistic=np.nan, pvalue=np.nan) + rankc = rankdata(np.concatenate((x1, x2))) + rankcx = rankc[0:nobs1] + rankcy = rankc[nobs1:nobs1+nobs2] rankcx_mean = np.mean(rankcx) rankcy_mean = np.mean(rankcy) - rankx = rankdata(x) - ranky = rankdata(y) + rankx = rankdata(x1) + ranky = rankdata(x2) rankx_mean = np.mean(rankx) ranky_mean = np.mean(ranky) - Sx = np.sum(np.power(rankcx - rankx - rankcx_mean + rankx_mean, 2.0)) - Sx /= nx - 1 - Sy = np.sum(np.power(rankcy - ranky - rankcy_mean + ranky_mean, 2.0)) - Sy /= ny - 1 + S1 = np.sum(np.power(rankcx - rankx - rankcx_mean + rankx_mean, 2.0)) + S1 /= nobs1 - 1 + S2 = np.sum(np.power(rankcy - ranky - rankcy_mean + ranky_mean, 2.0)) + S2 /= nobs2 - 1 - wbfn = nx * ny * (rankcy_mean - rankcx_mean) - wbfn /= (nx + ny) * np.sqrt(nx * Sx + ny * Sy) + wbfn = nobs1 * nobs2 * (rankcx_mean - rankcy_mean) + wbfn /= (nobs1 + nobs2) * np.sqrt(nobs1 * S1 + nobs2 * S2) - if distribution == "t": - df_numer = np.power(nx * Sx + ny * Sy, 2.0) - df_denom = np.power(nx * Sx, 2.0) / (nx - 1) - df_denom += np.power(ny * Sy, 2.0) / (ny - 1) + if use_t: + df_numer = np.power(nobs1 * S1 + nobs2 * S2, 2.0) + df_denom = np.power(nobs1 * S1, 2.0) / (nobs1 - 1) + df_denom += np.power(nobs2 * S2, 2.0) / (nobs2 - 1) df = df_numer / df_denom - p = stats.t.cdf(wbfn, df) - elif distribution == "normal": - p = stats.norm.cdf(wbfn) - df = None + pvalue = stats.t.cdf(wbfn, df) else: - raise ValueError( - "distribution should be 't' or 'normal'") + pvalue = stats.norm.cdf(wbfn) + df = None - if alternative == "greater": + if alternative == "larger": pass - elif alternative == "less": - p = 1 - p + elif alternative == "smaller": + pvalue = 1 - pvalue elif alternative == "two-sided": - p = 2 * np.min([p, 1-p]) + pvalue = 2 * np.min([pvalue, 1-pvalue]) else: raise ValueError( "alternative should be 'less', 'greater' or 'two-sided'") # other info - nobs1, nobs2 = nx, ny # rename + nobs1, nobs2 = nobs1, nobs2 # rename mean1 = rankcx_mean mean2 = rankcy_mean - var1 = Sx / (nobs - nx)**2 - var2 = Sy / (nobs - ny)**2 + var1 = S1 / (nobs - nobs1)**2 + var2 = S2 / (nobs - nobs2)**2 var_prob = (var1 / nobs1 + var2 / nobs2) var = nobs * (var1 / nobs1 + var2 / nobs2) prob1 = (mean1 - (nobs1 + 1) / 2) / nobs2 prob2 = (mean2 - (nobs2 + 1) / 2) / nobs1 - return BrunnerMunzelResult(statistic=wbfn, pvalue=p, x1=Sx, s2=Sy, - var1=var1, var2=var2, var=var, - var_prob=var_prob, - nobs1=nx, nobs2=ny, nobs=nobs, - mean1=mean1, mean2=mean2, - prob1=prob1, prob2=prob2, - somersd1=prob1 * 2 - 1, somersd2=prob2 * 2 - 1, - df=df - ) + return RankCompareResult(statistic=wbfn, pvalue=pvalue, s1=S1, s2=S2, + var1=var1, var2=var2, var=var, + var_prob=var_prob, + nobs1=nobs1, nobs2=nobs2, nobs=nobs, + mean1=mean1, mean2=mean2, + prob1=prob1, prob2=prob2, + somersd1=prob1 * 2 - 1, somersd2=prob2 * 2 - 1, + df=df + ) diff --git a/statsmodels/stats/tests/test_nonparametric.py b/statsmodels/stats/tests/test_nonparametric.py index 6552692b554..f6ecf548667 100644 --- a/statsmodels/stats/tests/test_nonparametric.py +++ b/statsmodels/stats/tests/test_nonparametric.py @@ -17,7 +17,7 @@ from statsmodels.sandbox.stats.runs import (Runs, runstest_1samp, runstest_2samp) from statsmodels.sandbox.stats.runs import mcnemar as sbmcnemar -from statsmodels.stats.nonparametric import brunnermunzel +from statsmodels.stats.nonparametric import rank_compare from statsmodels.tools.testing import Holder @@ -287,11 +287,14 @@ def test_brunnermunzel_one_sided(): y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4] significant = 13 + # revere direction to match our definition + x, y = y, x + # Results are compared with R's lawstat package. - u1, p1 = brunnermunzel(x, y, alternative='less') - u2, p2 = brunnermunzel(y, x, alternative='greater') - u3, p3 = brunnermunzel(x, y, alternative='greater') - u4, p4 = brunnermunzel(y, x, alternative='less') + u1, p1 = rank_compare(x, y, alternative='smaller') + u2, p2 = rank_compare(y, x, alternative='larger') + u3, p3 = rank_compare(x, y, alternative='larger') + u4, p4 = rank_compare(y, x, alternative='smaller') assert_approx_equal(p1, p2, significant=significant) assert_approx_equal(p3, p4, significant=significant) @@ -316,9 +319,12 @@ def test_brunnermunzel_two_sided(): y = [3, 3, 4, 3, 1, 2, 3, 1, 1, 5, 4] significant = 13 + # revere direction to match our definition + x, y = y, x + # Results are compared with R's lawstat package. - u1, p1 = brunnermunzel(x, y, alternative='two-sided') - u2, p2 = brunnermunzel(y, x, alternative='two-sided') + u1, p1 = rank_compare(x, y, alternative='two-sided') + u2, p2 = rank_compare(y, x, alternative='two-sided') assert_approx_equal(p1, p2, significant=significant) assert_approx_equal(u1, 3.1374674823029505, @@ -346,8 +352,8 @@ def test_rank_compare1(): ci=[0.4700629827705593, 0.6183882855872511], prob=0.5442256341789052) - res = brunnermunzel(x1, x2, distribution="normal") - assert_allclose(res.statistic, res2_t.statistic, rtol=1e-13) + res = rank_compare(x1, x2, use_t=False) + assert_allclose(res.statistic, -res2_t.statistic, rtol=1e-13) assert_allclose(res.prob1, 1 - res2_t.prob, rtol=1e-13) assert_allclose(res.prob2, res2_t.prob, rtol=1e-13) tt = res.test_prob_superior() @@ -370,20 +376,23 @@ def test_rank_compare1(): assert_allclose(res_tost[1][1], 0.025, rtol=1e-13) assert_allclose(res_tost[2][1], 0.025, rtol=1e-13) - res = brunnermunzel(x1, x2, distribution="t") + # use t-distribution + # our ranking is defined as reversed from lawstat, and BM article + # revere direction to match our definition + x1, x2 = x2, x1 + res = rank_compare(x1, x2, use_t=True) assert_allclose(res.statistic, res2_t.statistic, rtol=1e-13) tt = res.test_prob_superior() # TODO: return HolderTuple # assert_allclose(tt.statistic, res2_t.statistic) # TODO: check sign/direction in lawstat, reversed from ours - assert_allclose(tt[0], -res2_t.statistic, rtol=1e-13) + assert_allclose(tt[0], res2_t.statistic, rtol=1e-13) assert_allclose(tt[1], res2_t.pvalue, rtol=1e-13) assert_allclose(res.pvalue, res2_t.pvalue, rtol=1e-13) assert_allclose(res.df, res2_t.df, rtol=1e-13) ci = res.conf_int(alpha=0.05) - # our ranking is defined as reversed from lawstat, and BM article - assert_allclose(ci, 1 - np.array(res2_t.ci)[::-1], rtol=1e-11) + assert_allclose(ci, res2_t.ci, rtol=1e-11) # test consistency of test and confint res_lb = res.test_prob_superior(value=ci[0]) assert_allclose(res_lb[1], 0.05, rtol=1e-11) From 4bddf82f3dffeeb181d71ed02b47fde9d608b718 Mon Sep 17 00:00:00 2001 From: Josef Perktold Date: Sun, 23 Aug 2020 14:25:59 -0400 Subject: [PATCH 3/6] REF: more cleanup refactoring and renaming, add rankdata_2samp function --- statsmodels/stats/nonparametric.py | 98 +++++++++++-------- statsmodels/stats/tests/test_nonparametric.py | 37 ++++--- 2 files changed, 82 insertions(+), 53 deletions(-) diff --git a/statsmodels/stats/nonparametric.py b/statsmodels/stats/nonparametric.py index b147c4fc143..5430c757dde 100644 --- a/statsmodels/stats/nonparametric.py +++ b/statsmodels/stats/nonparametric.py @@ -20,6 +20,23 @@ _zconfint_generic, _tconfint_generic, _zstat_generic, _tstat_generic) +def rankdata_2samp(x1, x2): + x1 = np.asarray(x1) + x2 = np.asarray(x2) + + nobs1 = len(x1) + nobs2 = len(x2) + if nobs1 == 0 or nobs2 == 0: + raise ValueError("one sample has zero length") + + rank = rankdata(np.concatenate((x1, x2))) + rank1 = rank[:nobs1] + rank2 = rank[nobs1:] + ranki1 = rankdata(x1) + ranki2 = rankdata(x2) + return rank1, rank2, ranki1, ranki2 + + class RankCompareResult(HolderTuple): """Results for rank comparison """ @@ -99,8 +116,30 @@ def tost_prob_superior(self, low, upp): # TODO: return HolderTuple return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2) + def summary(self, alpha=0.05): + + xname = ["prob(x1>x2)"] + yname = "None" + effect = np.atleast_1d(self.prob1) + pvalues = np.atleast_1d(self.pvalue) + ci = np.array(self.conf_int(alpha))[None, :] + use_t = (self.df is not None) + sd = np.atleast_1d(np.sqrt(self.var_prob)) + statistic = np.atleast_1d(self.statistic) + if xname is None: + xname = ['c%d' % ii for ii in range(len(self.effect))] -def rank_compare(x1, x2, alternative="two-sided", use_t=True): + title = "Probability sample 1 is stochastically larger" + from statsmodels.iolib.summary import summary_params + + summ = summary_params((self, effect, sd, statistic, + pvalues, ci), + yname=yname, xname=xname, use_t=use_t, + title=title, alpha=alpha) + return summ + + +def rank_compare_2indep(x1, x2, use_t=True): """ Statistics and tests for the probability that x1 has larger values than x2. @@ -122,12 +161,6 @@ def rank_compare(x1, x2, alternative="two-sided", use_t=True): ---------- x1, x2 : array_like Array of samples, should be one-dimensional. - alternative : {'two-sided', 'less', 'greater'}, optional - Defines the alternative hypothesis. - The following options are available (default is 'two-sided'): - * 'two-sided' - * 'smaller': one-sided - * 'larger': one-sided use_t : poolean If use_t is true, the t distribution with Welch-Satterthwaite type degrees of freedom is used for p-value and confidence interval. @@ -196,62 +229,47 @@ def rank_compare(x1, x2, alternative="two-sided", use_t=True): nobs2 = len(x2) nobs = nobs1 + nobs2 if nobs1 == 0 or nobs2 == 0: - return RankCompareResult(statistic=np.nan, pvalue=np.nan) - rankc = rankdata(np.concatenate((x1, x2))) - rankcx = rankc[0:nobs1] - rankcy = rankc[nobs1:nobs1+nobs2] - rankcx_mean = np.mean(rankcx) - rankcy_mean = np.mean(rankcy) - rankx = rankdata(x1) - ranky = rankdata(x2) - rankx_mean = np.mean(rankx) - ranky_mean = np.mean(ranky) - - S1 = np.sum(np.power(rankcx - rankx - rankcx_mean + rankx_mean, 2.0)) + raise ValueError("one sample has zero length") + + rank1, rank2, ranki1, ranki2 = rankdata_2samp(x1, x2) + + meanr1 = np.mean(rank1) + meanr2 = np.mean(rank2) + meanri1 = np.mean(ranki1) + meanri2 = np.mean(ranki2) + + S1 = np.sum(np.power(rank1 - ranki1 - meanr1 + meanri1, 2.0)) S1 /= nobs1 - 1 - S2 = np.sum(np.power(rankcy - ranky - rankcy_mean + ranky_mean, 2.0)) + S2 = np.sum(np.power(rank2 - ranki2 - meanr2 + meanri2, 2.0)) S2 /= nobs2 - 1 - wbfn = nobs1 * nobs2 * (rankcx_mean - rankcy_mean) + wbfn = nobs1 * nobs2 * (meanr1 - meanr2) wbfn /= (nobs1 + nobs2) * np.sqrt(nobs1 * S1 + nobs2 * S2) + # Here we only use alternative == "two-sided" if use_t: df_numer = np.power(nobs1 * S1 + nobs2 * S2, 2.0) df_denom = np.power(nobs1 * S1, 2.0) / (nobs1 - 1) df_denom += np.power(nobs2 * S2, 2.0) / (nobs2 - 1) df = df_numer / df_denom - pvalue = stats.t.cdf(wbfn, df) + pvalue = 2 * stats.t.sf(np.abs(wbfn), df) else: - pvalue = stats.norm.cdf(wbfn) + pvalue = 2 * stats.norm.sf(np.abs(wbfn)) df = None - if alternative == "larger": - pass - elif alternative == "smaller": - pvalue = 1 - pvalue - elif alternative == "two-sided": - pvalue = 2 * np.min([pvalue, 1-pvalue]) - else: - raise ValueError( - "alternative should be 'less', 'greater' or 'two-sided'") - # other info - nobs1, nobs2 = nobs1, nobs2 # rename - mean1 = rankcx_mean - mean2 = rankcy_mean - var1 = S1 / (nobs - nobs1)**2 var2 = S2 / (nobs - nobs2)**2 var_prob = (var1 / nobs1 + var2 / nobs2) var = nobs * (var1 / nobs1 + var2 / nobs2) - prob1 = (mean1 - (nobs1 + 1) / 2) / nobs2 - prob2 = (mean2 - (nobs2 + 1) / 2) / nobs1 + prob1 = (meanr1 - (nobs1 + 1) / 2) / nobs2 + prob2 = (meanr2 - (nobs2 + 1) / 2) / nobs1 return RankCompareResult(statistic=wbfn, pvalue=pvalue, s1=S1, s2=S2, var1=var1, var2=var2, var=var, var_prob=var_prob, nobs1=nobs1, nobs2=nobs2, nobs=nobs, - mean1=mean1, mean2=mean2, + mean1=meanr1, mean2=meanr2, prob1=prob1, prob2=prob2, somersd1=prob1 * 2 - 1, somersd2=prob2 * 2 - 1, df=df diff --git a/statsmodels/stats/tests/test_nonparametric.py b/statsmodels/stats/tests/test_nonparametric.py index f6ecf548667..2b25e5c27eb 100644 --- a/statsmodels/stats/tests/test_nonparametric.py +++ b/statsmodels/stats/tests/test_nonparametric.py @@ -17,7 +17,7 @@ from statsmodels.sandbox.stats.runs import (Runs, runstest_1samp, runstest_2samp) from statsmodels.sandbox.stats.runs import mcnemar as sbmcnemar -from statsmodels.stats.nonparametric import rank_compare +from statsmodels.stats.nonparametric import rank_compare_2indep from statsmodels.tools.testing import Holder @@ -291,10 +291,10 @@ def test_brunnermunzel_one_sided(): x, y = y, x # Results are compared with R's lawstat package. - u1, p1 = rank_compare(x, y, alternative='smaller') - u2, p2 = rank_compare(y, x, alternative='larger') - u3, p3 = rank_compare(x, y, alternative='larger') - u4, p4 = rank_compare(y, x, alternative='smaller') + u1, p1 = rank_compare_2indep(x, y).test_prob_superior(alternative='smaller') + u2, p2 = rank_compare_2indep(y, x).test_prob_superior(alternative='larger') + u3, p3 = rank_compare_2indep(x, y).test_prob_superior(alternative='larger') + u4, p4 = rank_compare_2indep(y, x).test_prob_superior(alternative='smaller') assert_approx_equal(p1, p2, significant=significant) assert_approx_equal(p3, p4, significant=significant) @@ -307,9 +307,11 @@ def test_brunnermunzel_one_sided(): significant=significant) assert_approx_equal(u4, -3.1374674823029505, significant=significant) - assert_approx_equal(p1, 0.0028931043330757342, + + # Note: scipy and lawstat tail is reversed compared to test statistic + assert_approx_equal(p3, 0.0028931043330757342, significant=significant) - assert_approx_equal(p3, 0.99710689566692423, + assert_approx_equal(p1, 0.99710689566692423, significant=significant) @@ -323,19 +325,28 @@ def test_brunnermunzel_two_sided(): x, y = y, x # Results are compared with R's lawstat package. - u1, p1 = rank_compare(x, y, alternative='two-sided') - u2, p2 = rank_compare(y, x, alternative='two-sided') + res1 = rank_compare_2indep(x, y) + u1, p1 = res1 + t1 = res1.test_prob_superior(alternative='two-sided') + res2 = rank_compare_2indep(y, x) + u2, p2 = res2 + t2 = res2.test_prob_superior(alternative='two-sided') assert_approx_equal(p1, p2, significant=significant) assert_approx_equal(u1, 3.1374674823029505, significant=significant) assert_approx_equal(u2, -3.1374674823029505, significant=significant) - assert_approx_equal(p1, 0.0057862086661515377, + assert_approx_equal(p2, 0.0057862086661515377, significant=significant) + assert_allclose(t1[0], u1, rtol=1e-13) + assert_allclose(t2[0], u2, rtol=1e-13) + assert_allclose(t1[1], p1, rtol=1e-13) + assert_allclose(t2[1], p2, rtol=1e-13) + -def test_rank_compare1(): +def test_rank_compare_2indep1(): # Example from Munzel and Hauschke 2003 # data is given by counts, expand to observations levels = [-2, -1, 0, 1, 2] @@ -352,7 +363,7 @@ def test_rank_compare1(): ci=[0.4700629827705593, 0.6183882855872511], prob=0.5442256341789052) - res = rank_compare(x1, x2, use_t=False) + res = rank_compare_2indep(x1, x2, use_t=False) assert_allclose(res.statistic, -res2_t.statistic, rtol=1e-13) assert_allclose(res.prob1, 1 - res2_t.prob, rtol=1e-13) assert_allclose(res.prob2, res2_t.prob, rtol=1e-13) @@ -380,7 +391,7 @@ def test_rank_compare1(): # our ranking is defined as reversed from lawstat, and BM article # revere direction to match our definition x1, x2 = x2, x1 - res = rank_compare(x1, x2, use_t=True) + res = rank_compare_2indep(x1, x2, use_t=True) assert_allclose(res.statistic, res2_t.statistic, rtol=1e-13) tt = res.test_prob_superior() # TODO: return HolderTuple From 772279cf9e887cf66d9812b0fcb8b51ce3556d6d Mon Sep 17 00:00:00 2001 From: Josef Perktold Date: Mon, 24 Aug 2020 16:22:27 -0400 Subject: [PATCH 4/6] ENH: allow vectorized, return HolderTuple in test and tost --- statsmodels/stats/nonparametric.py | 79 +++++++++++++------ statsmodels/stats/tests/test_nonparametric.py | 34 +++++++- statsmodels/tools/testing.py | 8 +- 3 files changed, 91 insertions(+), 30 deletions(-) diff --git a/statsmodels/stats/nonparametric.py b/statsmodels/stats/nonparametric.py index 5430c757dde..0ea9d17f892 100644 --- a/statsmodels/stats/nonparametric.py +++ b/statsmodels/stats/nonparametric.py @@ -29,11 +29,19 @@ def rankdata_2samp(x1, x2): if nobs1 == 0 or nobs2 == 0: raise ValueError("one sample has zero length") - rank = rankdata(np.concatenate((x1, x2))) + x_combined = np.concatenate((x1, x2)) + if x_combined.ndim > 1: + rank = np.apply_along_axis(rankdata, 0, x_combined) + else: + rank = rankdata(x_combined) # no axis in older scipy rank1 = rank[:nobs1] rank2 = rank[nobs1:] - ranki1 = rankdata(x1) - ranki2 = rankdata(x2) + if x_combined.ndim > 1: + ranki1 = np.apply_along_axis(rankdata, 0, x1) + ranki2 = np.apply_along_axis(rankdata, 0, x2) + else: + ranki1 = rankdata(x1) + ranki2 = rankdata(x2) return rank1, rank2, ranki1, ranki2 @@ -69,11 +77,20 @@ def test_prob_superior(self, value=0.5, alternative="two-sided"): # TODO: return HolderTuple # corresponds to a one-sample test and either p0 or diff could be used if self.df is None: - return _zstat_generic(self.prob1, p0, std_diff, alternative, - diff=0) + stat, pv = _zstat_generic(self.prob1, p0, std_diff, alternative, + diff=0) + distr = "normal" else: - return _tstat_generic(self.prob1, p0, std_diff, self.df, - alternative, diff=0) + stat, pv = _tstat_generic(self.prob1, p0, std_diff, self.df, + alternative, diff=0) + distr = "t" + + res = HolderTuple(statistic=stat, + pvalue=pv, + df=self.df, + distribution=distr + ) + return res def tost_prob_superior(self, low, upp): '''test of stochastic (non-)equivalence of p = P(x1 > x2) @@ -110,31 +127,45 @@ def tost_prob_superior(self, low, upp): ''' - t1, pv1 = self.test_prob_superior(low, alternative='larger') - t2, pv2 = self.test_prob_superior(upp, alternative='smaller') - df1 = df2 = None - # TODO: return HolderTuple - return np.maximum(pv1, pv2), (t1, pv1, df1), (t2, pv2, df2) - - def summary(self, alpha=0.05): + t1 = self.test_prob_superior(low, alternative='larger') + t2 = self.test_prob_superior(upp, alternative='smaller') + + # idx_max = 0 if t1.pvalue < t2.pvalue else 1 + idx_max = np.asarray(t1.pvalue < t2.pvalue, int) + title = "Equivalence test for Prob(x1 > x2) + 0.5 Prob(x1 = x2) " + res = HolderTuple(statistic=np.choose(idx_max, + [t1.statistic, t2.statistic]), + # pvalue=[t1.pvalue, t2.pvalue][idx_max], # python + # use np.choose for vectorized selection + pvalue=np.choose(idx_max, [t1.pvalue, t2.pvalue]), + results_larger=t1, + results_smaller=t2, + title=title + ) + return res + + def summary(self, alpha=0.05, xname=None): - xname = ["prob(x1>x2)"] yname = "None" effect = np.atleast_1d(self.prob1) pvalues = np.atleast_1d(self.pvalue) - ci = np.array(self.conf_int(alpha))[None, :] + ci = np.atleast_2d(self.conf_int(alpha)) + if ci.shape[0] > 1: + ci = ci.T use_t = (self.df is not None) sd = np.atleast_1d(np.sqrt(self.var_prob)) statistic = np.atleast_1d(self.statistic) if xname is None: - xname = ['c%d' % ii for ii in range(len(self.effect))] + xname = ['c%d' % ii for ii in range(len(effect))] + + xname2 = ['prob(x1>x2) %s' % ii for ii in xname] title = "Probability sample 1 is stochastically larger" from statsmodels.iolib.summary import summary_params summ = summary_params((self, effect, sd, statistic, pvalues, ci), - yname=yname, xname=xname, use_t=use_t, + yname=yname, xname=xname2, use_t=use_t, title=title, alpha=alpha) return summ @@ -233,14 +264,14 @@ def rank_compare_2indep(x1, x2, use_t=True): rank1, rank2, ranki1, ranki2 = rankdata_2samp(x1, x2) - meanr1 = np.mean(rank1) - meanr2 = np.mean(rank2) - meanri1 = np.mean(ranki1) - meanri2 = np.mean(ranki2) + meanr1 = np.mean(rank1, axis=0) + meanr2 = np.mean(rank2, axis=0) + meanri1 = np.mean(ranki1, axis=0) + meanri2 = np.mean(ranki2, axis=0) - S1 = np.sum(np.power(rank1 - ranki1 - meanr1 + meanri1, 2.0)) + S1 = np.sum(np.power(rank1 - ranki1 - meanr1 + meanri1, 2.0), axis=0) S1 /= nobs1 - 1 - S2 = np.sum(np.power(rank2 - ranki2 - meanr2 + meanri2, 2.0)) + S2 = np.sum(np.power(rank2 - ranki2 - meanr2 + meanri2, 2.0), axis=0) S2 /= nobs2 - 1 wbfn = nobs1 * nobs2 * (meanr1 - meanr2) diff --git a/statsmodels/stats/tests/test_nonparametric.py b/statsmodels/stats/tests/test_nonparametric.py index 2b25e5c27eb..d0d09b77ddf 100644 --- a/statsmodels/stats/tests/test_nonparametric.py +++ b/statsmodels/stats/tests/test_nonparametric.py @@ -384,8 +384,8 @@ def test_rank_compare_2indep1(): # test consistency of tost and confint res_tost = res.tost_prob_superior(*ci) - assert_allclose(res_tost[1][1], 0.025, rtol=1e-13) - assert_allclose(res_tost[2][1], 0.025, rtol=1e-13) + assert_allclose(res_tost.results_smaller.pvalue, 0.025, rtol=1e-13) + assert_allclose(res_tost.results_larger.pvalue, 0.025, rtol=1e-13) # use t-distribution # our ranking is defined as reversed from lawstat, and BM article @@ -412,5 +412,31 @@ def test_rank_compare_2indep1(): # test consistency of tost and confint res_tost = res.tost_prob_superior(*ci) - assert_allclose(res_tost[1][1], 0.025, rtol=1e-11) - assert_allclose(res_tost[2][1], 0.025, rtol=1e-11) + assert_allclose(res_tost.results_smaller.pvalue, 0.025, rtol=1e-11) + assert_allclose(res_tost.results_larger.pvalue, 0.025, rtol=1e-11) + + +def test_rank_compare_vectorized(): + np.random.seed(987126) + x1 = np.random.randint(0, 20, (50, 3)) + x2 = np.random.randint(5, 25, (50, 3)) + res = rank_compare_2indep(x1, x2) + tst = res.test_prob_superior(0.5) + tost = res.tost_prob_superior(0.4, 0.6) + + # smoke test for summary + res.summary() + + for i in range(3): + res_i = rank_compare_2indep(x1[:, i], x2[:, i]) + assert_allclose(res.statistic[i], res_i.statistic, rtol=1e-14) + assert_allclose(res.pvalue[i], res_i.pvalue, rtol=1e-14) + assert_allclose(res.prob1[i], res_i.prob1, rtol=1e-14) + + tst_i = res_i.test_prob_superior(0.5) + assert_allclose(tst.statistic[i], tst_i.statistic, rtol=1e-14) + assert_allclose(tst.pvalue[i], tst_i.pvalue, rtol=1e-14) + + tost_i = res_i.tost_prob_superior(0.4, 0.6) + assert_allclose(tost.statistic[i], tost_i.statistic, rtol=1e-14) + assert_allclose(tost.pvalue[i], tost_i.pvalue, rtol=1e-14) diff --git a/statsmodels/tools/testing.py b/statsmodels/tools/testing.py index ead4e16bfe1..bd6571668ca 100644 --- a/statsmodels/tools/testing.py +++ b/statsmodels/tools/testing.py @@ -54,11 +54,15 @@ def __init__(self, **kwds): self.__dict__.update(kwds) def __str__(self): - ss = "\n".join(str(k) + " = " + str(v) for k, v in vars(self).items()) + ss = "\n".join(str(k) + " = " + str(v).replace('\n', '\n ') + for k, v in vars(self).items()) return ss def __repr__(self): - ss = str(self.__class__) + "\n" + self.__str__() + # use repr for values including nested cases as in tost + ss = "\n".join(str(k) + " = " + repr(v).replace('\n', '\n ') + for k, v in vars(self).items()) + ss = str(self.__class__) + "\n" + ss return ss From 18a490acf7e1d562ee976b594d0e83636c60ab1f Mon Sep 17 00:00:00 2001 From: Josef Perktold Date: Thu, 27 Aug 2020 16:25:58 -0400 Subject: [PATCH 5/6] ENH: add function for ordinal counts, add normal reference --- statsmodels/stats/nonparametric.py | 171 +++++++++++++++++- statsmodels/stats/tests/test_nonparametric.py | 32 +++- 2 files changed, 196 insertions(+), 7 deletions(-) diff --git a/statsmodels/stats/nonparametric.py b/statsmodels/stats/nonparametric.py index 0ea9d17f892..0270c698290 100644 --- a/statsmodels/stats/nonparametric.py +++ b/statsmodels/stats/nonparametric.py @@ -57,7 +57,7 @@ def conf_int(self, alpha=0.05, value=None, alternative="two-sided"): diff = self.prob1 - p0 std_diff = np.sqrt(self.var / self.nobs) - if self.df is None: + if self.use_t is False: return _zconfint_generic(diff, std_diff, alpha, alternative) else: return _tconfint_generic(diff, std_diff, self.df, alpha, @@ -76,7 +76,7 @@ def test_prob_superior(self, value=0.5, alternative="two-sided"): # TODO: return HolderTuple # corresponds to a one-sample test and either p0 or diff could be used - if self.df is None: + if not self.use_t: stat, pv = _zstat_generic(self.prob1, p0, std_diff, alternative, diff=0) distr = "normal" @@ -144,17 +144,63 @@ def tost_prob_superior(self, low, upp): ) return res + def confint_lintransf(self, const=-1, slope=2, alpha=0.05, + alternative="two-sided"): + """confidence interval of a linear transformation of prob1 + + This computes the confidence interval for + + d = const + slope * prob1 + + Default values correspond to Somers' d. + + Parameters + ---------- + const, slope : float + Constant and slope for linear transformation. + alpha : float in [0, 1] + alternative : + + """ + + low_p, upp_p = self.conf_int(alpha=alpha, alternative=alternative) + low = const + slope * low_p + upp = const + slope * upp_p + if slope < 0: + low, upp = upp, low + return low, upp + + def effectsize_normal(self): + """ + Cohen's d, standardized mean difference under normality assumption. + + This computes the standardized mean difference effect size that is + equivalent to the rank based probability of superiority estimate, + if we assume that the data is normally distributed. + + Returns + ------- + equivalent smd effect size + + """ + return stats.norm.ppf(self.prob1) * np.sqrt(2) + def summary(self, alpha=0.05, xname=None): yname = "None" effect = np.atleast_1d(self.prob1) - pvalues = np.atleast_1d(self.pvalue) + if self.pvalue is None: + statistic, pvalue = self.test_prob_superior() + else: + pvalue = self.pvalue + statistic = self.statistic + pvalues = np.atleast_1d(pvalue) ci = np.atleast_2d(self.conf_int(alpha)) if ci.shape[0] > 1: ci = ci.T - use_t = (self.df is not None) + use_t = self.use_t sd = np.atleast_1d(np.sqrt(self.var_prob)) - statistic = np.atleast_1d(self.statistic) + statistic = np.atleast_1d(statistic) if xname is None: xname = ['c%d' % ii for ii in range(len(effect))] @@ -303,5 +349,118 @@ def rank_compare_2indep(x1, x2, use_t=True): mean1=meanr1, mean2=meanr2, prob1=prob1, prob2=prob2, somersd1=prob1 * 2 - 1, somersd2=prob2 * 2 - 1, - df=df + df=df, use_t=use_t ) + + +def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True): + """stochastically larger probability for 2 independend ordinal sample + + This is a special case of `rank_compare_2indep` when the data are given as + counts of two independent ordinal, i.e. ordered multinomial samples. + + The statistic of interest is the probability that a random draw from the + population of the first sample has a larger value than a random draw from + the population of the second sample, specifically + + p = P(x1 > x2) + 0.5 * P(x1 = x2) + + Parameters + ---------- + count1 : array_like + counts of the first sample, categories are assumed to be ordered. + count1 : array_like + counts of the second sample, number of categories and ordering needs + to be the same as for sample 1 + ddof : scalar + Degrees of freedom correction for variance estimation. The default + ddof=1 corresponds to `rank_compare_2indep`. + use_t : bool + If use_t is true, the t distribution with Welch-Satterthwaite type + degrees of freedom is used for p-value and confidence interval. + If use_t is false, then the normal distribution is used. + + Returns + ------- + res : RankCompareResult + This includes methods for hypothesis tests and confidence intervals + for the probability that sample 1 is stochastically larger than + sample 2. + """ + + count1 = np.asarray(count1) + count2 = np.asarray(count2) + nobs1, nobs2 = count1.sum(), count2.sum() + freq1 = count1 / nobs1 + freq2 = count2 / nobs2 + cdf1 = np.concatenate(([0], freq1)).cumsum(axis=0) + cdf2 = np.concatenate(([0], freq2)).cumsum(axis=0) + + # mid rank cdf + cdfm1 = (cdf1[1:] + cdf1[:-1]) / 2 + cdfm2 = (cdf2[1:] + cdf2[:-1]) / 2 + prob1 = (cdfm2 * freq1).sum() + prob2 = (cdfm1 * freq2).sum() + + var1 = (cdfm2**2 * freq1).sum() - prob1**2 + var2 = (cdfm1**2 * freq2).sum() - prob2**2 + + var_prob = (var1 / (nobs1 - ddof) + var2 / (nobs2 - ddof)) + nobs = nobs1 + nobs2 + var = nobs * var_prob + vn1 = var1 * nobs2 * nobs1 / (nobs1 - ddof) + vn2 = var2 * nobs1 * nobs2 / (nobs2 - ddof) + df = (vn1 + vn2)**2 / (vn1**2 / (nobs1 - 1) + vn2**2 / (nobs2 - 1)) + res = RankCompareResult(statistic=None, pvalue=None, s1=None, s2=None, + var1=var1, var2=var2, var=var, + var_prob=var_prob, + nobs1=nobs1, nobs2=nobs2, nobs=nobs, + mean1=None, mean2=None, + prob1=prob1, prob2=prob2, + somersd1=prob1 * 2 - 1, somersd2=prob2 * 2 - 1, + df=df, use_t=use_t + ) + + return res + + +def prob_larger_continuous(distr1, distr2): + """probability that distr1 is stochastically larger than distr2 + + This computes + + p = P(x1 > x2) + + for two distributions. + + Parameters + ---------- + distr1, distr2 : distributions + Two instances of scipy.stats.distributions. Methods that are required + are cdf, pdf and expect. + + Returns + ------- + p : probability x1 is larger than xw + + + Notes + ----- + This is a one-liner that is added mainly as reference. + + Examples + -------- + >>> from scipy import stats + >>> prob_larger_continuous(stats.norm, stats.t(5)) + 0.4999999999999999 + + # which is the same as + >>> stats.norm.expect(stats.t(5).cdf) + 0.4999999999999999 + + >>> prob_larger_continuous(stats.norm, stats.norm(loc=1)) + 0.23975006109347669 + + """ + + return distr1.expect(distr2.cdf) diff --git a/statsmodels/stats/tests/test_nonparametric.py b/statsmodels/stats/tests/test_nonparametric.py index d0d09b77ddf..5c1d36b68bc 100644 --- a/statsmodels/stats/tests/test_nonparametric.py +++ b/statsmodels/stats/tests/test_nonparametric.py @@ -10,6 +10,8 @@ import numpy as np from numpy.testing import (assert_allclose, assert_almost_equal, assert_approx_equal, assert_) + +from scipy import stats import pytest from statsmodels.stats.contingency_tables import ( @@ -17,7 +19,8 @@ from statsmodels.sandbox.stats.runs import (Runs, runstest_1samp, runstest_2samp) from statsmodels.sandbox.stats.runs import mcnemar as sbmcnemar -from statsmodels.stats.nonparametric import rank_compare_2indep +from statsmodels.stats.nonparametric import ( + rank_compare_2indep, rank_compare_2ordinal, prob_larger_continuous) from statsmodels.tools.testing import Holder @@ -415,6 +418,33 @@ def test_rank_compare_2indep1(): assert_allclose(res_tost.results_smaller.pvalue, 0.025, rtol=1e-11) assert_allclose(res_tost.results_larger.pvalue, 0.025, rtol=1e-11) + # extras + # cohen's d + esd = res.effectsize_normal() + p = prob_larger_continuous(stats.norm(loc=esd), stats.norm) + # round trip + assert_allclose(p, res.prob1, rtol=1e-13) + + +def test_rank_compare_ord(): + # compare ordinal count version with full version + # Example from Munzel and Hauschke 2003 + # data is given by counts, expand to observations + levels = [-2, -1, 0, 1, 2] + new = [24, 37, 21, 19, 6] + active = [11, 51, 22, 21, 7] + x1 = np.repeat(levels, new) + x2 = np.repeat(levels, active) + + for use_t in [False, True]: + res2 = rank_compare_2indep(x1, x2, use_t=use_t) + res1 = rank_compare_2ordinal(new, active, use_t=use_t) + assert_allclose(res2.prob1, res1.prob1, rtol=1e-13) + assert_allclose(res2.var_prob, res1.var_prob, rtol=1e-13) + s1 = str(res1.summary()) + s2 = str(res2.summary()) + assert s1 == s2 + def test_rank_compare_vectorized(): np.random.seed(987126) From c33d27c1481325733e4d1ca4ee1a2ad17d15ea0a Mon Sep 17 00:00:00 2001 From: Josef Perktold Date: Tue, 1 Sep 2020 11:52:26 -0400 Subject: [PATCH 6/6] DOC: improve docstrings, add to stats.rst, unit test for cohensd2problarger --- docs/source/stats.rst | 11 + statsmodels/stats/nonparametric.py | 280 +++++++++++++++--- statsmodels/stats/tests/test_nonparametric.py | 22 +- 3 files changed, 261 insertions(+), 52 deletions(-) diff --git a/docs/source/stats.rst b/docs/source/stats.rst index ea7deb8ffd3..aa7859ba4c9 100644 --- a/docs/source/stats.rst +++ b/docs/source/stats.rst @@ -181,6 +181,17 @@ Non-Parametric Tests sign_test +.. currentmodule:: statsmodels.stats.nonparametric + +.. autosummary:: + :toctree: generated/ + + rank_compare_2indep + rank_compare_2ordinal + cohensd2problarger + prob_larger_continuous + rankdata_2samp + Descriptive Statistics ---------------------- diff --git a/statsmodels/stats/nonparametric.py b/statsmodels/stats/nonparametric.py index 0270c698290..ff814ec3eb6 100644 --- a/statsmodels/stats/nonparametric.py +++ b/statsmodels/stats/nonparametric.py @@ -21,6 +21,25 @@ def rankdata_2samp(x1, x2): + """Compute midranks for two samples + + Parameters + ---------- + x1, x2 : array_like + Original data for two samples that will be converted to midranks. + + Returns + ------- + rank1 : ndarray + Midranks of the first sample in the pooled sample. + rank2 : ndarray + Midranks of the second sample in the pooled sample. + ranki1 : ndarray + Internal midranks of the first sample. + ranki2 : ndarray + Internal midranks of the second sample. + + """ x1 = np.asarray(x1) x2 = np.asarray(x2) @@ -47,9 +66,45 @@ def rankdata_2samp(x1, x2): class RankCompareResult(HolderTuple): """Results for rank comparison + + This is a subclass of HolderTuple that includes results from intermediate + computations, as well as methods for hypothesis tests, confidence intervals + ans summary. """ - def conf_int(self, alpha=0.05, value=None, alternative="two-sided"): + def conf_int(self, value=None, alpha=0.05, alternative="two-sided"): + """ + Confidence interval for probability that sample 1 has larger values + + Confidence interval is for the shifted probability + + P(x1 > x2) + 0.5 * P(x1 = x2) - value + + Parameters + ---------- + value : float + Value, default 0, shifts the confidence interval, + e.g. ``value=0.5`` centers the confidence interval at zero. + alpha : float + Significance level for the confidence interval, coverage is + ``1-alpha`` + alternative : str + The alternative hypothesis, H1, has to be one of the following + + * 'two-sided' : H1: ``prob - value`` not equal to 0. + * 'larger' : H1: ``prob - value > 0`` + * 'smaller' : H1: ``prob - value < 0`` + + Returns + ------- + lower : float or ndarray + Lower confidence limit. This is -inf for the one-sided alternative + "smaller". + upper : float or ndarray + Upper confidence limit. This is inf for the one-sided alternative + "larger". + + """ p0 = value if p0 is None: @@ -67,6 +122,31 @@ def test_prob_superior(self, value=0.5, alternative="two-sided"): """test for superiority probability H0: P(x1 > x2) + 0.5 * P(x1 = x2) = value + + The alternative is that the probability is either not equal, larger + or smaller than the null-value depending on the chosen alternative. + + Parameters + ---------- + value : float + Value of the probability under the Null hypothesis. + alternative : str + The alternative hypothesis, H1, has to be one of the following + + * 'two-sided' : H1: ``prob - value`` not equal to 0. + * 'larger' : H1: ``prob - value > 0`` + * 'smaller' : H1: ``prob - value < 0`` + + Returns + ------- + res : HolderTuple + HolderTuple instance with the following main attributes + + statistic : float + Test statistic for z- or t-test + pvalue : float + Pvalue of the test based on either normal or t distribution. + """ p0 = value # alias @@ -95,8 +175,8 @@ def test_prob_superior(self, value=0.5, alternative="two-sided"): def tost_prob_superior(self, low, upp): '''test of stochastic (non-)equivalence of p = P(x1 > x2) - null hypothesis: p < low or p > upp - alternative hypothesis: low < p < upp + Null hypothesis: p < low or p > upp + Alternative hypothesis: low < p < upp where p is the probability that a random draw from the population of the first sample has a larger value than a random draw from the @@ -116,14 +196,21 @@ def tost_prob_superior(self, low, upp): Returns ------- - pvalue : float - pvalue of the non-equivalence test - t1, pv1, df1 : tuple - test statistic, pvalue and degrees of freedom for lower threshold - test - t2, pv2, df2 : tuple - test statistic, pvalue and degrees of freedom for upper threshold - test + res : HolderTuple + HolderTuple instance with the following main attributes + + pvalue : float + Pvalue of the equivalence test given by the larger pvalue of + the two one-sided tests. + statistic : float + Test statistic of the one-sided test that has the larger + pvalue. + results_larger : HolderTuple + Results instanc with test statistic, pvalue and degrees of + freedom for lower threshold test. + results_smaller : HolderTuple + Results instanc with test statistic, pvalue and degrees of + freedom for upper threshold test. ''' @@ -157,9 +244,25 @@ def confint_lintransf(self, const=-1, slope=2, alpha=0.05, Parameters ---------- const, slope : float - Constant and slope for linear transformation. - alpha : float in [0, 1] - alternative : + Constant and slope for linear (affine) transformation. + alpha : float + Significance level for the confidence interval, coverage is + ``1-alpha`` + alternative : str + The alternative hypothesis, H1, has to be one of the following + + * 'two-sided' : H1: ``prob - value`` not equal to 0. + * 'larger' : H1: ``prob - value > 0`` + * 'smaller' : H1: ``prob - value < 0`` + + Returns + ------- + lower : float or ndarray + Lower confidence limit. This is -inf for the one-sided alternative + "smaller". + upper : float or ndarray + Upper confidence limit. This is inf for the one-sided alternative + "larger". """ @@ -170,22 +273,52 @@ def confint_lintransf(self, const=-1, slope=2, alpha=0.05, low, upp = upp, low return low, upp - def effectsize_normal(self): + def effectsize_normal(self, prob=None): """ Cohen's d, standardized mean difference under normality assumption. - This computes the standardized mean difference effect size that is - equivalent to the rank based probability of superiority estimate, - if we assume that the data is normally distributed. + This computes the standardized mean difference, Cohen's d, effect size + that is equivalent to the rank based probability ``p`` of being + stochastically larger if we assume that the data is normally + distributed, given by + + :math: `d = F^{-1}(p) * \\sqrt{2}` + + where :math:`F^{-1}` is the inverse of the cdf of the normal + distribution. + + Parameters + ---------- + prob : float in (0, 1) + Probability to be converted to Cohen's d effect size. + If prob is None, then the ``prob1`` attribute is used. Returns ------- - equivalent smd effect size + equivalent Cohen's d effect size under normality assumption. """ - return stats.norm.ppf(self.prob1) * np.sqrt(2) + if prob is None: + prob = self.prob1 + return stats.norm.ppf(prob) * np.sqrt(2) def summary(self, alpha=0.05, xname=None): + """summary table for probability that random draw x1 is larger than x2 + + Parameters + ---------- + alpha : float + Significance level for confidence intervals. Coverage is 1 - alpha + xname : None or list of str + If None, then each row has a name column with generic names. + If xname is a list of strings, then it will be included as part + of those names. + + Returns + ------- + SimpleTable instance with methods to convert to different output + formats. + """ yname = "None" effect = np.atleast_1d(self.prob1) @@ -229,7 +362,8 @@ def rank_compare_2indep(x1, x2, use_t=True): This is a measure underlying Wilcoxon-Mann-Whitney's U test, Fligner-Policello test and Brunner-Munzel test, and Inference is based on the asymptotic distribution of the Brunner-Munzel - test. + test. The half probability for ties corresponds to the use of midranks + and make it valid for discrete variables. The Null hypothesis for stochastic equality is p = 0.5, which corresponds to the Brunner-Munzel test. @@ -253,7 +387,6 @@ def rank_compare_2indep(x1, x2, use_t=True): p-value assuming an t distribution. One-sided or two-sided, depending on the choice of `alternative` and `use_t`. - See Also -------- scipy.stats.brunnermunzel : Brunner-Munzel test for stochastic equality @@ -269,16 +402,28 @@ def rank_compare_2indep(x1, x2, use_t=True): Brunner and Munzel recommended to estimate the p-value by t-distribution when the size of data is 50 or less. If the size is lower than 10, it would - be better to use permuted Brunner Munzel test (see [2]_). + be better to use permuted Brunner Munzel test (see [2]_) for the test + of stochastic equality. This measure has been introduced in the literature under many different names relying on a variety of assumptions. - In psychology, ... introduced it as Common Language effect size for the - continuous, normal distribution case, ... extended it to the nonparameteric + In psychology, McGraw and Wong (1992) introduced it as Common Language + effect size for the continuous, normal distribution case, + Vargha and Delaney (2000) [3]_ extended it to the nonparameteric continuous distribution case as in Fligner-Policello. - Note: Brunner-Munzel define the probability for x1 to be stochastically - smaller than x2, while here we use stochastically larger. + WMW and related tests can only be interpreted as test of medians or tests + of central location only under very restrictive additional assumptions + such as both distribution are identical under the equality null hypothesis + (assumed by Mann-Whitney) or both distributions are symmetric (shown by + Fligner-Policello). If the distribution of the two samples can differ in + an arbitrary way, then the equality Null hypothesis corresponds to p=0.5 + against an alternative p != 0.5. see for example Conroy (2012) [4]_ and + Divine et al (2018) [5]_ . + + Note: Brunner-Munzel and related literature define the probability that x1 + is stochastically smaller than x2, while here we use stochastically larger. + This equivalent to switching x1 and x2 in the two sample case. References ---------- @@ -288,16 +433,19 @@ def rank_compare_2indep(x1, x2, use_t=True): .. [2] Neubert, K. and Brunner, E. "A studentized permutation test for the non-parametric Behrens-Fisher problem". Computational Statistics and Data Analysis. Vol. 51(2007): 5192-5204. - Examples - -------- - >>> from scipy import stats - >>> x1 = [1,2,1,1,1,1,1,1,1,1,2,4,1,1] - >>> x2 = [3,3,4,3,1,2,3,1,1,5,4] - >>> w, p_value = stats.brunnermunzel(x1, x2) - >>> w - 3.1374674823029505 - >>> p_value - 0.0057862086661515377 + .. [3] Vargha, András, and Harold D. Delaney. 2000. “A Critique and + Improvement of the CL Common Language Effect Size Statistics of + McGraw and Wong.” Journal of Educational and Behavioral Statistics + 25 (2): 101–32. https://doi.org/10.3102/10769986025002101. + .. [4] Conroy, Ronán M. 2012. “What Hypotheses Do ‘Nonparametric’ Two-Group + Tests Actually Test?” The Stata Journal: Promoting Communications on + Statistics and Stata 12 (2): 182–90. + https://doi.org/10.1177/1536867X1201200202. + .. [5] Divine, George W., H. James Norton, Anna E. Barón, and Elizabeth + Juarez-Colunga. 2018. “The Wilcoxon–Mann–Whitney Procedure Fails as + a Test of Medians.” The American Statistician 72 (3): 278–86. + https://doi.org/10.1080/00031305.2017.1305291. + """ x1 = np.asarray(x1) x2 = np.asarray(x2) @@ -354,10 +502,10 @@ def rank_compare_2indep(x1, x2, use_t=True): def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True): - """stochastically larger probability for 2 independend ordinal sample + """stochastically larger probability for 2 independend ordinal samples This is a special case of `rank_compare_2indep` when the data are given as - counts of two independent ordinal, i.e. ordered multinomial samples. + counts of two independent ordinal, i.e. ordered multinomial, samples. The statistic of interest is the probability that a random draw from the population of the first sample has a larger value than a random draw from @@ -368,9 +516,9 @@ def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True): Parameters ---------- count1 : array_like - counts of the first sample, categories are assumed to be ordered. - count1 : array_like - counts of the second sample, number of categories and ordering needs + Counts of the first sample, categories are assumed to be ordered. + count2 : array_like + Counts of the second sample, number of categories and ordering needs to be the same as for sample 1 ddof : scalar Degrees of freedom correction for variance estimation. The default @@ -386,6 +534,13 @@ def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True): This includes methods for hypothesis tests and confidence intervals for the probability that sample 1 is stochastically larger than sample 2. + + Notes + ----- + The implementation is based on the appendix of Munzel and Hauschke (2003) + with the addition of ``ddof`` so that the results match the general + function `rank_compare_2indep`. + """ count1 = np.asarray(count1) @@ -425,23 +580,24 @@ def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True): def prob_larger_continuous(distr1, distr2): - """probability that distr1 is stochastically larger than distr2 + """probability indicating that distr1 is stochastically larger than distr2 This computes p = P(x1 > x2) - for two distributions. + for two continuous distributions, where `distr1` and `distr2` are the + distributions of random variables x1 and x2 respectively. Parameters ---------- distr1, distr2 : distributions - Two instances of scipy.stats.distributions. Methods that are required - are cdf, pdf and expect. + Two instances of scipy.stats.distributions. The required methods are + cdf of the second distribution and expect of the first distribution. Returns ------- - p : probability x1 is larger than xw + p : probability x1 is larger than x2 Notes @@ -458,9 +614,39 @@ def prob_larger_continuous(distr1, distr2): >>> stats.norm.expect(stats.t(5).cdf) 0.4999999999999999 + # distribution 1 with smaller mean (loc) than distribution 2 >>> prob_larger_continuous(stats.norm, stats.norm(loc=1)) 0.23975006109347669 """ return distr1.expect(distr2.cdf) + + +def cohensd2problarger(d): + """convert Cohen's d effect size to stochastically-larger-probability + + This assumes observations are normally distributed. + + Computed as + + p = Prob(x1 > x2) = F(d / sqrt(2)) + + where `F` is cdf of normal distribution. Cohen's d is defined as + + d = (mean1 - mean2) / std + + where ``std`` is the pooled within standard deviation. + + Parameters + ---------- + d : float or array_like + Cohen's d effect size for difference mean1 - mean2. + + Returns + ------- + prob : float or ndarray + Prob(x1 > x2) + """ + + return stats.norm.cdf(d / np.sqrt(2)) diff --git a/statsmodels/stats/tests/test_nonparametric.py b/statsmodels/stats/tests/test_nonparametric.py index 5c1d36b68bc..c5bac26aee3 100644 --- a/statsmodels/stats/tests/test_nonparametric.py +++ b/statsmodels/stats/tests/test_nonparametric.py @@ -20,7 +20,8 @@ runstest_1samp, runstest_2samp) from statsmodels.sandbox.stats.runs import mcnemar as sbmcnemar from statsmodels.stats.nonparametric import ( - rank_compare_2indep, rank_compare_2ordinal, prob_larger_continuous) + rank_compare_2indep, rank_compare_2ordinal, prob_larger_continuous, + cohensd2problarger) from statsmodels.tools.testing import Holder @@ -294,10 +295,14 @@ def test_brunnermunzel_one_sided(): x, y = y, x # Results are compared with R's lawstat package. - u1, p1 = rank_compare_2indep(x, y).test_prob_superior(alternative='smaller') - u2, p2 = rank_compare_2indep(y, x).test_prob_superior(alternative='larger') - u3, p3 = rank_compare_2indep(x, y).test_prob_superior(alternative='larger') - u4, p4 = rank_compare_2indep(y, x).test_prob_superior(alternative='smaller') + u1, p1 = rank_compare_2indep(x, y + ).test_prob_superior(alternative='smaller') + u2, p2 = rank_compare_2indep(y, x + ).test_prob_superior(alternative='larger') + u3, p3 = rank_compare_2indep(x, y + ).test_prob_superior(alternative='larger') + u4, p4 = rank_compare_2indep(y, x + ).test_prob_superior(alternative='smaller') assert_approx_equal(p1, p2, significant=significant) assert_approx_equal(p3, p4, significant=significant) @@ -425,6 +430,13 @@ def test_rank_compare_2indep1(): # round trip assert_allclose(p, res.prob1, rtol=1e-13) + # round trip with cohen's d + pc = cohensd2problarger(esd) + assert_allclose(pc, res.prob1, rtol=1e-13) + + ci_tr = res.confint_lintransf(1, -1) + assert_allclose(ci_tr, 1 - np.array(res2_t.ci)[::-1], rtol=0.005) + def test_rank_compare_ord(): # compare ordinal count version with full version