Merge 360129e into 008d5fe

statsmodels · Jul 10, 2020 · 43c64f3 · 43c64f3
2 parents 008d5fe + 360129e
commit 43c64f3
Show file tree

Hide file tree

Showing 7 changed files with 371 additions and 117 deletions.
diff --git a/docs/source/stats.rst b/docs/source/stats.rst
@@ -421,13 +421,16 @@ proportions that can be used with NormalIndPower.
 Statistics for two independent samples
 Status: experimental, API might change, added in 0.12
 
+.. autosummary::
+   :toctree: generated
+
    test_proportions_2indep
    confint_proportions_2indep
    power_proportions_2indep
    tost_proportions_2indep
    samplesize_proportions_2indep_onetail
-   score_confint_inversion
    score_test_proportions_2indep
+   _score_confint_inversion
 
 
 Rates
@@ -470,14 +473,15 @@ Status: experimental, API might change, added in 0.12
 .. autosummary::
    :toctree: generated
 
+   test_mvmean
    confint_mvmean
    confint_mvmean_fromstats
-   cov_test
-   cov_test_blockdiagonal
-   cov_test_diagonal
-   cov_test_oneway
-   cov_test_spherical
-   test_mvmean
+   test_mvmean_2indep
+   test_cov
+   test_cov_blockdiagonal
+   test_cov_diagonal
+   test_cov_oneway
+   test_cov_spherical
 
 
 .. _oneway_stats:
@@ -516,6 +520,7 @@ Status: experimental, API might change, added in 0.12
    f2_to_wellek
    fstat_to_wellek
    wellek_to_f2
+   _fstat2effectsize
 
    scale_transform
    simulate_power_equivalence_oneway
@@ -666,6 +671,10 @@ Meta-Analysis
 
 Functions for basic meta-analysis of a collection of sample statistics.
 
+Examples can be found in the notebook
+
+ * `Meta-Analysis <examples/notebooks/generated/metaanalysis1.html>`__
+
 Status: experimental, API might change, added in 0.12
 
 .. module:: statsmodels.stats.meta_analysis
@@ -681,7 +690,8 @@ Status: experimental, API might change, added in 0.12
    effectsize_smd
 
 The module also includes internal functions to compute random effects
-variance
+variance.
+
 
 .. autosummary::
    :toctree: generated/

diff --git a/statsmodels/stats/meta_analysis.py b/statsmodels/stats/meta_analysis.py
@@ -11,6 +11,8 @@
 import pandas as pd
 from scipy import stats
 
+from statsmodels.stats.base import HolderTuple
+
 
 class CombineResults(object):
     """Results from combined estimate of means or effect sizes
@@ -38,7 +40,7 @@ def __init__(self, **kwds):
 
     def conf_int_samples(self, alpha=0.05, use_t=None, nobs=None,
                          ci_func=None):
-        """confidence interval for the overall mean estimate
+        """confidence intervals for the effect size estimate of samples
 
         Additional information needs to be provided for confidence intervals
         that are not based on normal distribution using available variance.
@@ -47,15 +49,33 @@ def conf_int_samples(self, alpha=0.05, use_t=None, nobs=None,
         Parameters
         ----------
         alpha : float in (0, 1)
+            Significance level for confidence interval. Nominal coverage is
+            ``1 - alpha``.
         use_t : None or bool
+            If use_t is None, then the attribute `use_t` determines whether
+            normal or t-distribution is used for confidence intervals.
+            Specifying use_t overrides the attribute.
+            If use_t is false, then confidence intervals are based on the
+            normal distribution. If it is true, then the t-distribution is
+            used.
         nobs : None or float
+            Number of observations used for degrees of freedom computation.
+            Only used if use_t is true.
         ci_func : None or callable
+            User provided function to compute confidende intervals.
+            This is not used yet and will allow using non-standard confidence
+            intervals.
 
         Returns
         -------
         ci_eff : tuple of ndarrays
             Tuple (ci_low, ci_upp) with confidence interval computed for each
             sample.
+
+        Notes
+        -----
+        CombineResults currently only has information from the combine_effects
+        function, which does not provide details about individual samples.
         """
         # this is a bit messy, we don't have enough information about
         # computing conf_int already in results for other than normal
@@ -101,6 +121,35 @@ def conf_int_samples(self, alpha=0.05, use_t=None, nobs=None,
     def conf_int(self, alpha=0.05, use_t=None):
         """confidence interval for the overall mean estimate
 
+        Parameters
+        ----------
+        alpha : float in (0, 1)
+            Significance level for confidence interval. Nominal coverage is
+            ``1 - alpha``.
+        use_t : None or bool
+            If use_t is None, then the attribute `use_t` determines whether
+            normal or t-distribution is used for confidence intervals.
+            Specifying use_t overrides the attribute.
+            If use_t is false, then confidence intervals are based on the
+            normal distribution. If it is true, then the t-distribution is
+            used.
+
+        Returns
+        -------
+        ci_eff_fe : tuple of floats
+            Confidence interval for mean effects size based on fixed effects
+            model with scale=1.
+        ci_eff_re : tuple of floats
+            Confidence interval for mean effects size based on random effects
+            model with scale=1
+        ci_eff_fe_wls : tuple of floats
+            Confidence interval for mean effects size based on fixed effects
+            model with estimated scale corresponding to WLS, ie. HKSJ.
+        ci_eff_re_wls : tuple of floats
+            Confidence interval for mean effects size based on random effects
+            model with estimated scale corresponding to WLS, ie. HKSJ.
+            If random effects method is fully iterated, i.e. Paule-Mandel, then
+            the estimated scale is 1.
 
         """
         if use_t is None:
@@ -123,26 +172,56 @@ def conf_int(self, alpha=0.05, use_t=None):
         return ci_eff_fe, ci_eff_re, ci_eff_fe_wls, ci_eff_re_wls
 
     def test_homogeneity(self):
-        """test whether the means of all samples are the same
+        """Test whether the means of all samples are the same
 
         currently no options, test uses chisquare distribution
         default might change depending on `use_t`
 
         Returns
         -------
-        statistic : float
-            test statistic, ``q`` in meta-analysis, this is the pearson_chi2
-            statistic for the fixed effects model.
-        pvalue : float
-        df : float
-            degrees of freedom, equal to number of studies or sample minus 1.
+        res : HolderTuple instance
+            The results include the following attributes:
+
+            - statistic : float
+                Test statistic, ``q`` in meta-analysis, this is the
+                pearson_chi2 statistic for the fixed effects model.
+            - pvalue : float
+                P-value based on chisquare distribution.
+            - df : float
+                Degrees of freedom, equal to number of studies or samples
+                minus 1.
         """
         pvalue = stats.chi2.sf(self.q, self.k - 1)
-        return self.q, pvalue, self.k - 1
+        res = HolderTuple(statistic=self.q,
+                          pvalue=pvalue,
+                          df=self.k - 1,
+                          distr="chi2")
+        return res
 
     def summary_array(self, alpha=0.05, use_t=None):
         """Create array with sample statistics and mean estimates
 
+        Parameters
+        ----------
+        alpha : float in (0, 1)
+            Significance level for confidence interval. Nominal coverage is
+            ``1 - alpha``.
+        use_t : None or bool
+            If use_t is None, then the attribute `use_t` determines whether
+            normal or t-distribution is used for confidence intervals.
+            Specifying use_t overrides the attribute.
+            If use_t is false, then confidence intervals are based on the
+            normal distribution. If it is true, then the t-distribution is
+            used.
+
+        Returns
+        -------
+        res : ndarray
+            Array with columns
+            ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"].
+            Rows include statistics for samples and estimates of overall mean.
+        column_names : list of str
+            The names for the columns, used when creating summary DataFrame.
         """
 
         ci_low, ci_upp = self.conf_int_samples(alpha=alpha, use_t=use_t)
@@ -168,6 +247,26 @@ def summary_array(self, alpha=0.05, use_t=None):
     def summary_frame(self, alpha=0.05, use_t=None):
         """Create DataFrame with sample statistics and mean estimates
 
+        Parameters
+        ----------
+        alpha : float in (0, 1)
+            Significance level for confidence interval. Nominal coverage is
+            ``1 - alpha``.
+        use_t : None or bool
+            If use_t is None, then the attribute `use_t` determines whether
+            normal or t-distribution is used for confidence intervals.
+            Specifying use_t overrides the attribute.
+            If use_t is false, then confidence intervals are based on the
+            normal distribution. If it is true, then the t-distribution is
+            used.
+
+        Returns
+        -------
+        res : DataFrame
+            pandas DataFrame instance with columns
+            ['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"].
+            Rows include statistics for samples and estimates of overall mean.
+
         """
         if use_t is None:
             use_t = self.use_t
@@ -179,6 +278,25 @@ def summary_frame(self, alpha=0.05, use_t=None):
         return results
 
     def plot_forest(self, ax=None, **kwds):
+        """Forest plot with means and confidence intervals
+
+        Parameters
+        ----------
+        ax : None or matplotlib axis instance
+            If ax is provided, then the plot will be added to it.
+        kwds : optional keyword arguments
+            Keywords are forwarded to the dot_plot function that creates the
+            plot.
+
+        Returns
+        -------
+        fig : Matplotlib figure instance
+
+        See Also
+        --------
+        dot_plot
+
+        """
         from statsmodels.graphics.dotplots import dot_plot
         res_df = self.summary_frame()
         hw = np.abs(res_df[["ci_low", "ci_upp"]] - res_df[["eff"]].values)
@@ -190,8 +308,8 @@ def plot_forest(self, ax=None, **kwds):
 def effectsize_smd(mean1, sd1, nobs1, mean2, sd2, nobs2):
     """effect sizes for mean difference for use in meta-analysis
 
-    mean2, sd2, nobs2 are for treatment
-    mean1, sd1, nobs1 are for control
+    mean1, sd1, nobs1 are for treatment
+    mean2, sd2, nobs2 are for control
 
     Effect sizes are computed for the mean difference ``mean1 - mean2``
     standardized by an estimate of the within variance.
@@ -282,9 +400,9 @@ def effectsize_2proportions(count1, nobs1, count2, nobs2, statistic="diff",
     Returns
     -------
     effect size : array
-        effect size for each sample
-    var_smdbc : array
-        estimate of variance of smd_bc
+        Effect size for each sample.
+    var_es : array
+        Estimate of variance of the effect size
 
     Notes
     -----
@@ -295,10 +413,10 @@ def effectsize_2proportions(count1, nobs1, count2, nobs2, statistic="diff",
 
     The statistics are defined as:
 
-    risk difference = p1 - p2
-    log risk ratio = log(p1 / p2)
-    log odds_ratio = log(p1 / (1 - p1) * (1 - p2) / p2)
-    arcsine-sqrt = arcsin(sqrt(p1)) - arcsin(sqrt(p2))
+     - risk difference = p1 - p2
+     - log risk ratio = log(p1 / p2)
+     - log odds_ratio = log(p1 / (1 - p1) * (1 - p2) / p2)
+     - arcsine-sqrt = arcsin(sqrt(p1)) - arcsin(sqrt(p2))
 
     where p1 and p2 are the estimated proportions in sample 1 (treatment) and
     sample 2 (control).