diff --git a/statsmodels/stats/nonparametric.py b/statsmodels/stats/nonparametric.py index 0270c698290..e9b6415c098 100644 --- a/statsmodels/stats/nonparametric.py +++ b/statsmodels/stats/nonparametric.py @@ -21,6 +21,25 @@ def rankdata_2samp(x1, x2): + """Compute midranks for two samples + + Parameters + ---------- + x1, x2 : array_like + Original data for two samples that will be converted to midranks. + + Returns + ------- + rank1 : ndarray + Midranks of the first sample in the pooled sample. + rank2 : ndarray + Midranks of the second sample in the pooled sample. + ranki1 : ndarray + Internal midranks of the first sample + ranki2 : ndarray + Internal midranks of the second sample + + """ x1 = np.asarray(x1) x2 = np.asarray(x2) @@ -47,9 +66,45 @@ def rankdata_2samp(x1, x2): class RankCompareResult(HolderTuple): """Results for rank comparison + + This is a subclass of HolderTuple that includes results from intermediate + computations, as well as methods for hypothesis tests, confidence intervals + ans summary. """ - def conf_int(self, alpha=0.05, value=None, alternative="two-sided"): + def conf_int(self, value=None, alpha=0.05, alternative="two-sided"): + """ + Confidence interval for probability that sample 1 has larger values + + Confidence interval is for the shifted probability + + P(x1 > x2) + 0.5 * P(x1 = x2) - value + + Parameters + ---------- + value : float + Value, default 0, shifts the confidence interval, + e.g. ``value=0.5`` centers the confidence interval at zero. + alpha : float + Significance level for the confidence interval, coverage is + ``1-alpha`` + alternative : str + The alternative hypothesis, H1, has to be one of the following + + * 'two-sided' : H1: ``prob - value`` not equal to 0. + * 'larger' : H1: ``prob - value > 0`` + * 'smaller' : H1: ``prob - value < 0`` + + Returns + ------- + lower : float or ndarray + Lower confidence limit. This is -inf for the one-sided alternative + "smaller". + upper : float or ndarray + Upper confidence limit. This is inf for the one-sided alternative + "larger". + + """ p0 = value if p0 is None: @@ -67,6 +122,31 @@ def test_prob_superior(self, value=0.5, alternative="two-sided"): """test for superiority probability H0: P(x1 > x2) + 0.5 * P(x1 = x2) = value + + The alternative is that the probability is either not equal, larger + or smaller than the null-value depending on the chosen alternative. + + Parameters + ---------- + value : float + Value of the probability under the Null hypothesis. + alternative : str + The alternative hypothesis, H1, has to be one of the following + + * 'two-sided' : H1: ``prob - value`` not equal to 0. + * 'larger' : H1: ``prob - value > 0`` + * 'smaller' : H1: ``prob - value < 0`` + + Returns + ------- + res : HolderTuple + HolderTuple instance with the following main attributes + + statistic : float + Test statistic for z- or t-test + pvalue : float + Pvalue of the test based on either normal or t distribution. + """ p0 = value # alias @@ -95,8 +175,8 @@ def test_prob_superior(self, value=0.5, alternative="two-sided"): def tost_prob_superior(self, low, upp): '''test of stochastic (non-)equivalence of p = P(x1 > x2) - null hypothesis: p < low or p > upp - alternative hypothesis: low < p < upp + Null hypothesis: p < low or p > upp + Alternative hypothesis: low < p < upp where p is the probability that a random draw from the population of the first sample has a larger value than a random draw from the @@ -116,14 +196,21 @@ def tost_prob_superior(self, low, upp): Returns ------- - pvalue : float - pvalue of the non-equivalence test - t1, pv1, df1 : tuple - test statistic, pvalue and degrees of freedom for lower threshold - test - t2, pv2, df2 : tuple - test statistic, pvalue and degrees of freedom for upper threshold - test + res : HolderTuple + HolderTuple instance with the following main attributes + + pvalue : float + Pvalue of the equivalence test given by the larger pvalue of + the two one-sided tests. + statistic : float + Test statistic of the one-sided test that has the larger + pvalue. + results_larger : HolderTuple + Results instanc with test statistic, pvalue and degrees of + freedom for lower threshold test. + results_smaller : HolderTuple + Results instanc with test statistic, pvalue and degrees of + freedom for upper threshold test. ''' @@ -157,9 +244,25 @@ def confint_lintransf(self, const=-1, slope=2, alpha=0.05, Parameters ---------- const, slope : float - Constant and slope for linear transformation. - alpha : float in [0, 1] - alternative : + Constant and slope for linear (affine) transformation. + alpha : float + Significance level for the confidence interval, coverage is + ``1-alpha`` + alternative : str + The alternative hypothesis, H1, has to be one of the following + + * 'two-sided' : H1: ``prob - value`` not equal to 0. + * 'larger' : H1: ``prob - value > 0`` + * 'smaller' : H1: ``prob - value < 0`` + + Returns + ------- + lower : float or ndarray + Lower confidence limit. This is -inf for the one-sided alternative + "smaller". + upper : float or ndarray + Upper confidence limit. This is inf for the one-sided alternative + "larger". """ @@ -170,22 +273,52 @@ def confint_lintransf(self, const=-1, slope=2, alpha=0.05, low, upp = upp, low return low, upp - def effectsize_normal(self): + def effectsize_normal(self, prob=None): """ Cohen's d, standardized mean difference under normality assumption. - This computes the standardized mean difference effect size that is - equivalent to the rank based probability of superiority estimate, - if we assume that the data is normally distributed. + This computes the standardized mean difference, Cohen's d, effect size + that is equivalent to the rank based probability ``p`` of being + stochastically larger if we assume that the data is normally + distributed, given by + + :math: `d = F^{-1}(p) * \sqrt{2}` + + where :math:`F^{-1}` is the inverse of the cdf of the normal + distribution. + + Parameters + ---------- + prob : float in (0, 1) + Probability to be converted to Cohen's d effect size. + If prob is None, then the ``prob1`` attribute is used. Returns ------- - equivalent smd effect size + equivalent Cohen's d effect size under normality assumption. """ - return stats.norm.ppf(self.prob1) * np.sqrt(2) + if prob is None: + prob = self.prob1 + return stats.norm.ppf(prob) * np.sqrt(2) def summary(self, alpha=0.05, xname=None): + """summary table for probability that random draw x1 is larger than x2 + + Parameters + ---------- + alpha : float + Significance level for confidence intervals. Coverage is 1 - alpha + xname : None or list of str + If None, then each row has a name column with generic names. + If xname is a list of strings, then it will be included as part + of those names. + + Returns + ------- + SimpleTable instance with methods to convert to different output + formats. + """ yname = "None" effect = np.atleast_1d(self.prob1) @@ -229,7 +362,8 @@ def rank_compare_2indep(x1, x2, use_t=True): This is a measure underlying Wilcoxon-Mann-Whitney's U test, Fligner-Policello test and Brunner-Munzel test, and Inference is based on the asymptotic distribution of the Brunner-Munzel - test. + test. The half probability for ties corresponds to the use of midranks + and make it valid for discrete variables. The Null hypothesis for stochastic equality is p = 0.5, which corresponds to the Brunner-Munzel test. @@ -269,7 +403,8 @@ def rank_compare_2indep(x1, x2, use_t=True): Brunner and Munzel recommended to estimate the p-value by t-distribution when the size of data is 50 or less. If the size is lower than 10, it would - be better to use permuted Brunner Munzel test (see [2]_). + be better to use permuted Brunner Munzel test (see [2]_) for the test + of stochastic equality. This measure has been introduced in the literature under many different names relying on a variety of assumptions. @@ -277,8 +412,17 @@ def rank_compare_2indep(x1, x2, use_t=True): continuous, normal distribution case, ... extended it to the nonparameteric continuous distribution case as in Fligner-Policello. - Note: Brunner-Munzel define the probability for x1 to be stochastically - smaller than x2, while here we use stochastically larger. + WMW and related tests can only be interpreted as test of medians or tests + of central location only under very restrictive additional assumptions + such as both distribution are identical under the equality null hypothesis + (assumed by Mann-Whitney) or both distributions are symmetric (shown by + Fligner-Policello). If the distribution of the two samples can differ in + an arbitrary way, then the equality Null hypothesis corresponds to p=0.5 + against an alternative p != 0.5. ee for example Divine et al (2018) [3]_. + + Note: Brunner-Munzel and related literature define the probability that x1 + is stochastically smaller than x2, while here we use stochastically larger. + This equivalent to switching x1 and x2 in the two sample case. References ---------- @@ -288,16 +432,12 @@ def rank_compare_2indep(x1, x2, use_t=True): .. [2] Neubert, K. and Brunner, E. "A studentized permutation test for the non-parametric Behrens-Fisher problem". Computational Statistics and Data Analysis. Vol. 51(2007): 5192-5204. - Examples - -------- - >>> from scipy import stats - >>> x1 = [1,2,1,1,1,1,1,1,1,1,2,4,1,1] - >>> x2 = [3,3,4,3,1,2,3,1,1,5,4] - >>> w, p_value = stats.brunnermunzel(x1, x2) - >>> w - 3.1374674823029505 - >>> p_value - 0.0057862086661515377 + .. [3] Divine, George W., H. James Norton, Anna E. Barón, and Elizabeth + Juarez-Colunga. 2018. “The Wilcoxon–Mann–Whitney Procedure Fails as + a Test of Medians.” The American Statistician 72 (3): 278–86. + https://doi.org/10.1080/00031305.2017.1305291. + + """ x1 = np.asarray(x1) x2 = np.asarray(x2) @@ -354,10 +494,10 @@ def rank_compare_2indep(x1, x2, use_t=True): def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True): - """stochastically larger probability for 2 independend ordinal sample + """stochastically larger probability for 2 independend ordinal samples This is a special case of `rank_compare_2indep` when the data are given as - counts of two independent ordinal, i.e. ordered multinomial samples. + counts of two independent ordinal, i.e. ordered multinomial, samples. The statistic of interest is the probability that a random draw from the population of the first sample has a larger value than a random draw from @@ -386,6 +526,13 @@ def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True): This includes methods for hypothesis tests and confidence intervals for the probability that sample 1 is stochastically larger than sample 2. + + Notes + ----- + The implementation is based on the appendix of Munzel and Hauschke (2003) + with the addition of ``ddof`` so that the results match the general + function `rank_compare_2indep`. + """ count1 = np.asarray(count1) @@ -425,23 +572,23 @@ def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True): def prob_larger_continuous(distr1, distr2): - """probability that distr1 is stochastically larger than distr2 + """probability indicating that distr1 is stochastically larger than distr2 This computes p = P(x1 > x2) - for two distributions. + for two continuous distributions. Parameters ---------- distr1, distr2 : distributions - Two instances of scipy.stats.distributions. Methods that are required - are cdf, pdf and expect. + Two instances of scipy.stats.distributions. The required methods are + cdf of the second distribution and expect of the first distribution. Returns ------- - p : probability x1 is larger than xw + p : probability x1 is larger than x2 Notes @@ -458,9 +605,38 @@ def prob_larger_continuous(distr1, distr2): >>> stats.norm.expect(stats.t(5).cdf) 0.4999999999999999 + # distribution 1 with smaller mean (loc) than distribution 2 >>> prob_larger_continuous(stats.norm, stats.norm(loc=1)) 0.23975006109347669 """ return distr1.expect(distr2.cdf) + + +def cohensd2problarger(d): + """convert Cohen's d effect size to stochastically-larger-probability + + This assumes observations are normally distributed. + + Computed as + + p = Prob(x1 > x2) = F(d / sqrt(2)) + + where `F` is cdf of normal distribution. Cohen's d is defined as + + d = (mean1 - mean2) / std + + where ``std`` is the pooled within standard deviation. + + Parameters + ---------- + d : float or array_like + Cohen's d effect size for difference mean1 - mean2. + + Returns + ------- + prob : Prob(x1 > x2) + """ + + return stats.norm.cdf(d / np.sqrt(2)) diff --git a/statsmodels/stats/tests/test_nonparametric.py b/statsmodels/stats/tests/test_nonparametric.py index 5c1d36b68bc..b6f38764941 100644 --- a/statsmodels/stats/tests/test_nonparametric.py +++ b/statsmodels/stats/tests/test_nonparametric.py @@ -294,10 +294,14 @@ def test_brunnermunzel_one_sided(): x, y = y, x # Results are compared with R's lawstat package. - u1, p1 = rank_compare_2indep(x, y).test_prob_superior(alternative='smaller') - u2, p2 = rank_compare_2indep(y, x).test_prob_superior(alternative='larger') - u3, p3 = rank_compare_2indep(x, y).test_prob_superior(alternative='larger') - u4, p4 = rank_compare_2indep(y, x).test_prob_superior(alternative='smaller') + u1, p1 = rank_compare_2indep(x, y + ).test_prob_superior(alternative='smaller') + u2, p2 = rank_compare_2indep(y, x + ).test_prob_superior(alternative='larger') + u3, p3 = rank_compare_2indep(x, y + ).test_prob_superior(alternative='larger') + u4, p4 = rank_compare_2indep(y, x + ).test_prob_superior(alternative='smaller') assert_approx_equal(p1, p2, significant=significant) assert_approx_equal(p3, p4, significant=significant) @@ -425,6 +429,9 @@ def test_rank_compare_2indep1(): # round trip assert_allclose(p, res.prob1, rtol=1e-13) + ci_tr = res.confint_lintransf(1, -1) + assert_allclose(ci_tr, 1 - np.array(res2_t.ci)[::-1], rtol=0.005) + def test_rank_compare_ord(): # compare ordinal count version with full version