diff --git a/statsmodels/stats/nonparametric.py b/statsmodels/stats/nonparametric.py
index 0270c698290..e9b6415c098 100644
--- a/statsmodels/stats/nonparametric.py
+++ b/statsmodels/stats/nonparametric.py
@@ -21,6 +21,25 @@
 
 
 def rankdata_2samp(x1, x2):
+    """Compute midranks for two samples
+
+    Parameters
+    ----------
+    x1, x2 : array_like
+        Original data for two samples that will be converted to midranks.
+
+    Returns
+    -------
+    rank1 : ndarray
+        Midranks of the first sample in the pooled sample.
+    rank2 : ndarray
+        Midranks of the second sample in the pooled sample.
+    ranki1 : ndarray
+        Internal midranks of the first sample
+    ranki2 : ndarray
+        Internal midranks of the second sample
+
+    """
     x1 = np.asarray(x1)
     x2 = np.asarray(x2)
 
@@ -47,9 +66,45 @@ def rankdata_2samp(x1, x2):
 
 class RankCompareResult(HolderTuple):
     """Results for rank comparison
+
+    This is a subclass of HolderTuple that includes results from intermediate
+    computations, as well as methods for hypothesis tests, confidence intervals
+    ans summary.
     """
 
-    def conf_int(self, alpha=0.05, value=None, alternative="two-sided"):
+    def conf_int(self, value=None, alpha=0.05, alternative="two-sided"):
+        """
+        Confidence interval for probability that sample 1 has larger values
+
+        Confidence interval is for the shifted probability
+
+            P(x1 > x2) + 0.5 * P(x1 = x2) - value
+
+        Parameters
+        ----------
+        value : float
+            Value, default 0, shifts the confidence interval,
+            e.g. ``value=0.5`` centers the confidence interval at zero.
+        alpha : float
+            Significance level for the confidence interval, coverage is
+            ``1-alpha``
+        alternative : str
+            The alternative hypothesis, H1, has to be one of the following
+
+               * 'two-sided' : H1: ``prob - value`` not equal to 0.
+               * 'larger' :   H1: ``prob - value > 0``
+               * 'smaller' :  H1: ``prob - value < 0``
+
+        Returns
+        -------
+        lower : float or ndarray
+            Lower confidence limit. This is -inf for the one-sided alternative
+            "smaller".
+        upper : float or ndarray
+            Upper confidence limit. This is inf for the one-sided alternative
+            "larger".
+
+        """
 
         p0 = value
         if p0 is None:
@@ -67,6 +122,31 @@ def test_prob_superior(self, value=0.5, alternative="two-sided"):
         """test for superiority probability
 
         H0: P(x1 > x2) + 0.5 * P(x1 = x2) = value
+
+        The alternative is that the probability is either not equal, larger
+        or smaller than the null-value depending on the chosen alternative.
+
+        Parameters
+        ----------
+        value : float
+            Value of the probability under the Null hypothesis.
+        alternative : str
+            The alternative hypothesis, H1, has to be one of the following
+
+               * 'two-sided' : H1: ``prob - value`` not equal to 0.
+               * 'larger' :   H1: ``prob - value > 0``
+               * 'smaller' :  H1: ``prob - value < 0``
+
+        Returns
+        -------
+        res : HolderTuple
+            HolderTuple instance with the following main attributes
+
+            statistic : float
+                Test statistic for z- or t-test
+            pvalue : float
+                Pvalue of the test based on either normal or t distribution.
+
         """
 
         p0 = value  # alias
@@ -95,8 +175,8 @@ def test_prob_superior(self, value=0.5, alternative="two-sided"):
     def tost_prob_superior(self, low, upp):
         '''test of stochastic (non-)equivalence of p = P(x1 > x2)
 
-        null hypothesis:  p < low or p > upp
-        alternative hypothesis:  low < p < upp
+        Null hypothesis:  p < low or p > upp
+        Alternative hypothesis:  low < p < upp
 
         where p is the probability that a random draw from the population of
         the first sample has a larger value than a random draw from the
@@ -116,14 +196,21 @@ def tost_prob_superior(self, low, upp):
 
         Returns
         -------
-        pvalue : float
-            pvalue of the non-equivalence test
-        t1, pv1, df1 : tuple
-            test statistic, pvalue and degrees of freedom for lower threshold
-            test
-        t2, pv2, df2 : tuple
-            test statistic, pvalue and degrees of freedom for upper threshold
-            test
+        res : HolderTuple
+            HolderTuple instance with the following main attributes
+
+            pvalue : float
+                Pvalue of the equivalence test given by the larger pvalue of
+                the two one-sided tests.
+            statistic : float
+                Test statistic of the one-sided test that has the larger
+                pvalue.
+            results_larger : HolderTuple
+                Results instanc with test statistic, pvalue and degrees of
+                freedom for lower threshold test.
+            results_smaller : HolderTuple
+                Results instanc with test statistic, pvalue and degrees of
+                freedom for upper threshold test.
 
         '''
 
@@ -157,9 +244,25 @@ def confint_lintransf(self, const=-1, slope=2, alpha=0.05,
         Parameters
         ----------
         const, slope : float
-            Constant and slope for linear transformation.
-        alpha : float in [0, 1]
-        alternative :
+            Constant and slope for linear (affine) transformation.
+        alpha : float
+            Significance level for the confidence interval, coverage is
+            ``1-alpha``
+        alternative : str
+            The alternative hypothesis, H1, has to be one of the following
+
+               * 'two-sided' : H1: ``prob - value`` not equal to 0.
+               * 'larger' :   H1: ``prob - value > 0``
+               * 'smaller' :  H1: ``prob - value < 0``
+
+        Returns
+        -------
+        lower : float or ndarray
+            Lower confidence limit. This is -inf for the one-sided alternative
+            "smaller".
+        upper : float or ndarray
+            Upper confidence limit. This is inf for the one-sided alternative
+            "larger".
 
         """
 
@@ -170,22 +273,52 @@ def confint_lintransf(self, const=-1, slope=2, alpha=0.05,
             low, upp = upp, low
         return low, upp
 
-    def effectsize_normal(self):
+    def effectsize_normal(self, prob=None):
         """
         Cohen's d, standardized mean difference under normality assumption.
 
-        This computes the standardized mean difference effect size that is
-        equivalent to the rank based probability of superiority estimate,
-        if we assume that the data is normally distributed.
+        This computes the standardized mean difference, Cohen's d, effect size
+        that is equivalent to the rank based probability ``p`` of being
+        stochastically larger if we assume that the data is normally
+        distributed, given by
+
+        :math: `d = F^{-1}(p) * \sqrt{2}`
+
+        where :math:`F^{-1}` is the inverse of the cdf of the normal
+        distribution.
+
+        Parameters
+        ----------
+        prob : float in (0, 1)
+            Probability to be converted to Cohen's d effect size.
+            If prob is None, then the ``prob1`` attribute is used.
 
         Returns
         -------
-        equivalent smd effect size
+        equivalent Cohen's d effect size under normality assumption.
 
         """
-        return stats.norm.ppf(self.prob1) * np.sqrt(2)
+        if prob is None:
+            prob = self.prob1
+        return stats.norm.ppf(prob) * np.sqrt(2)
 
     def summary(self, alpha=0.05, xname=None):
+        """summary table for probability that random draw x1 is larger than x2
+
+        Parameters
+        ----------
+        alpha : float
+            Significance level for confidence intervals. Coverage is 1 - alpha
+        xname : None or list of str
+            If None, then each row has a name column with generic names.
+            If xname is a list of strings, then it will be included as part
+            of those names.
+
+        Returns
+        -------
+        SimpleTable instance with methods to convert to different output
+        formats.
+        """
 
         yname = "None"
         effect = np.atleast_1d(self.prob1)
@@ -229,7 +362,8 @@ def rank_compare_2indep(x1, x2, use_t=True):
     This is a measure underlying Wilcoxon-Mann-Whitney's U test,
     Fligner-Policello test and Brunner-Munzel test, and
     Inference is based on the asymptotic distribution of the Brunner-Munzel
-    test.
+    test. The half probability for ties corresponds to the use of midranks
+    and make it valid for discrete variables.
 
     The Null hypothesis for stochastic equality is p = 0.5, which corresponds
     to the Brunner-Munzel test.
@@ -269,7 +403,8 @@ def rank_compare_2indep(x1, x2, use_t=True):
 
     Brunner and Munzel recommended to estimate the p-value by t-distribution
     when the size of data is 50 or less. If the size is lower than 10, it would
-    be better to use permuted Brunner Munzel test (see [2]_).
+    be better to use permuted Brunner Munzel test (see [2]_) for the test
+    of stochastic equality.
 
     This measure has been introduced in the literature under many different
     names relying on a variety of assumptions.
@@ -277,8 +412,17 @@ def rank_compare_2indep(x1, x2, use_t=True):
     continuous, normal distribution case, ... extended it to the nonparameteric
     continuous distribution case as in Fligner-Policello.
 
-    Note: Brunner-Munzel define the probability for x1 to be stochastically
-    smaller than x2, while here we use stochastically larger.
+    WMW and related tests can only be interpreted as test of medians or tests
+    of central location only under very restrictive additional assumptions
+    such as both distribution are identical under the equality null hypothesis
+    (assumed by Mann-Whitney) or both distributions are symmetric (shown by
+    Fligner-Policello). If the distribution of the two samples can differ in
+    an arbitrary way, then the equality Null hypothesis corresponds to p=0.5
+    against an alternative p != 0.5.  ee for example Divine et al (2018) [3]_.
+
+    Note: Brunner-Munzel and related literature define the probability that x1
+    is stochastically smaller than x2, while here we use stochastically larger.
+    This equivalent to switching x1 and x2 in the two sample case.
 
     References
     ----------
@@ -288,16 +432,12 @@ def rank_compare_2indep(x1, x2, use_t=True):
     .. [2] Neubert, K. and Brunner, E. "A studentized permutation test for the
            non-parametric Behrens-Fisher problem". Computational Statistics and
            Data Analysis. Vol. 51(2007): 5192-5204.
-    Examples
-    --------
-    >>> from scipy import stats
-    >>> x1 = [1,2,1,1,1,1,1,1,1,1,2,4,1,1]
-    >>> x2 = [3,3,4,3,1,2,3,1,1,5,4]
-    >>> w, p_value = stats.brunnermunzel(x1, x2)
-    >>> w
-    3.1374674823029505
-    >>> p_value
-    0.0057862086661515377
+    .. [3] Divine, George W., H. James Norton, Anna E. Barón, and Elizabeth
+           Juarez-Colunga. 2018. “The Wilcoxon–Mann–Whitney Procedure Fails as
+           a Test of Medians.” The American Statistician 72 (3): 278–86.
+           https://doi.org/10.1080/00031305.2017.1305291.
+
+
     """
     x1 = np.asarray(x1)
     x2 = np.asarray(x2)
@@ -354,10 +494,10 @@ def rank_compare_2indep(x1, x2, use_t=True):
 
 
 def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True):
-    """stochastically larger probability for 2 independend ordinal sample
+    """stochastically larger probability for 2 independend ordinal samples
 
     This is a special case of `rank_compare_2indep` when the data are given as
-    counts of two independent ordinal, i.e. ordered multinomial samples.
+    counts of two independent ordinal, i.e. ordered multinomial, samples.
 
     The statistic of interest is the probability that a random draw from the
     population of the first sample has a larger value than a random draw from
@@ -386,6 +526,13 @@ def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True):
         This includes methods for hypothesis tests and confidence intervals
         for the probability that sample 1 is stochastically larger than
         sample 2.
+
+    Notes
+    -----
+    The implementation is based on the appendix of Munzel and Hauschke (2003)
+    with the addition of ``ddof`` so that the results match the general
+    function `rank_compare_2indep`.
+
     """
 
     count1 = np.asarray(count1)
@@ -425,23 +572,23 @@ def rank_compare_2ordinal(count1, count2, ddof=1, use_t=True):
 
 
 def prob_larger_continuous(distr1, distr2):
-    """probability that distr1 is stochastically larger than distr2
+    """probability indicating that distr1 is stochastically larger than distr2
 
     This computes
 
         p = P(x1 > x2)
 
-    for two distributions.
+    for two continuous distributions.
 
     Parameters
     ----------
     distr1, distr2 : distributions
-        Two instances of scipy.stats.distributions. Methods that are required
-        are cdf, pdf and expect.
+        Two instances of scipy.stats.distributions. The required methods are
+        cdf of the second distribution and expect of the first distribution.
 
     Returns
     -------
-    p : probability x1 is larger than xw
+    p : probability x1 is larger than x2
 
 
     Notes
@@ -458,9 +605,38 @@ def prob_larger_continuous(distr1, distr2):
     >>> stats.norm.expect(stats.t(5).cdf)
     0.4999999999999999
 
+    # distribution 1 with smaller mean (loc) than distribution 2
     >>> prob_larger_continuous(stats.norm, stats.norm(loc=1))
     0.23975006109347669
 
     """
 
     return distr1.expect(distr2.cdf)
+
+
+def cohensd2problarger(d):
+    """convert Cohen's d effect size to stochastically-larger-probability
+
+    This assumes observations are normally distributed.
+
+    Computed as
+
+    p = Prob(x1 > x2) = F(d / sqrt(2))
+
+    where `F` is cdf of normal distribution. Cohen's d is defined as
+
+    d = (mean1 - mean2) / std
+
+    where ``std`` is the pooled within standard deviation.
+
+    Parameters
+    ----------
+    d : float or array_like
+        Cohen's d effect size for difference mean1 - mean2.
+
+    Returns
+    -------
+    prob : Prob(x1 > x2)
+    """
+
+    return stats.norm.cdf(d / np.sqrt(2))
diff --git a/statsmodels/stats/tests/test_nonparametric.py b/statsmodels/stats/tests/test_nonparametric.py
index 5c1d36b68bc..b6f38764941 100644
--- a/statsmodels/stats/tests/test_nonparametric.py
+++ b/statsmodels/stats/tests/test_nonparametric.py
@@ -294,10 +294,14 @@ def test_brunnermunzel_one_sided():
     x, y = y, x
 
     # Results are compared with R's lawstat package.
-    u1, p1 = rank_compare_2indep(x, y).test_prob_superior(alternative='smaller')
-    u2, p2 = rank_compare_2indep(y, x).test_prob_superior(alternative='larger')
-    u3, p3 = rank_compare_2indep(x, y).test_prob_superior(alternative='larger')
-    u4, p4 = rank_compare_2indep(y, x).test_prob_superior(alternative='smaller')
+    u1, p1 = rank_compare_2indep(x, y
+                                 ).test_prob_superior(alternative='smaller')
+    u2, p2 = rank_compare_2indep(y, x
+                                 ).test_prob_superior(alternative='larger')
+    u3, p3 = rank_compare_2indep(x, y
+                                 ).test_prob_superior(alternative='larger')
+    u4, p4 = rank_compare_2indep(y, x
+                                 ).test_prob_superior(alternative='smaller')
 
     assert_approx_equal(p1, p2, significant=significant)
     assert_approx_equal(p3, p4, significant=significant)
@@ -425,6 +429,9 @@ def test_rank_compare_2indep1():
     # round trip
     assert_allclose(p, res.prob1, rtol=1e-13)
 
+    ci_tr = res.confint_lintransf(1, -1)
+    assert_allclose(ci_tr, 1 - np.array(res2_t.ci)[::-1], rtol=0.005)
+
 
 def test_rank_compare_ord():
     # compare ordinal count version with full version