Skip to content

Commit

Permalink
Merge 360129e into 008d5fe
Browse files Browse the repository at this point in the history
  • Loading branch information
josef-pkt committed Jul 10, 2020
2 parents 008d5fe + 360129e commit 43c64f3
Show file tree
Hide file tree
Showing 7 changed files with 371 additions and 117 deletions.
26 changes: 18 additions & 8 deletions docs/source/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -421,13 +421,16 @@ proportions that can be used with NormalIndPower.
Statistics for two independent samples
Status: experimental, API might change, added in 0.12

.. autosummary::
:toctree: generated

test_proportions_2indep
confint_proportions_2indep
power_proportions_2indep
tost_proportions_2indep
samplesize_proportions_2indep_onetail
score_confint_inversion
score_test_proportions_2indep
_score_confint_inversion


Rates
Expand Down Expand Up @@ -470,14 +473,15 @@ Status: experimental, API might change, added in 0.12
.. autosummary::
:toctree: generated

test_mvmean
confint_mvmean
confint_mvmean_fromstats
cov_test
cov_test_blockdiagonal
cov_test_diagonal
cov_test_oneway
cov_test_spherical
test_mvmean
test_mvmean_2indep
test_cov
test_cov_blockdiagonal
test_cov_diagonal
test_cov_oneway
test_cov_spherical


.. _oneway_stats:
Expand Down Expand Up @@ -516,6 +520,7 @@ Status: experimental, API might change, added in 0.12
f2_to_wellek
fstat_to_wellek
wellek_to_f2
_fstat2effectsize

scale_transform
simulate_power_equivalence_oneway
Expand Down Expand Up @@ -666,6 +671,10 @@ Meta-Analysis

Functions for basic meta-analysis of a collection of sample statistics.

Examples can be found in the notebook

* `Meta-Analysis <examples/notebooks/generated/metaanalysis1.html>`__

Status: experimental, API might change, added in 0.12

.. module:: statsmodels.stats.meta_analysis
Expand All @@ -681,7 +690,8 @@ Status: experimental, API might change, added in 0.12
effectsize_smd

The module also includes internal functions to compute random effects
variance
variance.


.. autosummary::
:toctree: generated/
Expand Down
154 changes: 136 additions & 18 deletions statsmodels/stats/meta_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import pandas as pd
from scipy import stats

from statsmodels.stats.base import HolderTuple


class CombineResults(object):
"""Results from combined estimate of means or effect sizes
Expand Down Expand Up @@ -38,7 +40,7 @@ def __init__(self, **kwds):

def conf_int_samples(self, alpha=0.05, use_t=None, nobs=None,
ci_func=None):
"""confidence interval for the overall mean estimate
"""confidence intervals for the effect size estimate of samples
Additional information needs to be provided for confidence intervals
that are not based on normal distribution using available variance.
Expand All @@ -47,15 +49,33 @@ def conf_int_samples(self, alpha=0.05, use_t=None, nobs=None,
Parameters
----------
alpha : float in (0, 1)
Significance level for confidence interval. Nominal coverage is
``1 - alpha``.
use_t : None or bool
If use_t is None, then the attribute `use_t` determines whether
normal or t-distribution is used for confidence intervals.
Specifying use_t overrides the attribute.
If use_t is false, then confidence intervals are based on the
normal distribution. If it is true, then the t-distribution is
used.
nobs : None or float
Number of observations used for degrees of freedom computation.
Only used if use_t is true.
ci_func : None or callable
User provided function to compute confidende intervals.
This is not used yet and will allow using non-standard confidence
intervals.
Returns
-------
ci_eff : tuple of ndarrays
Tuple (ci_low, ci_upp) with confidence interval computed for each
sample.
Notes
-----
CombineResults currently only has information from the combine_effects
function, which does not provide details about individual samples.
"""
# this is a bit messy, we don't have enough information about
# computing conf_int already in results for other than normal
Expand Down Expand Up @@ -101,6 +121,35 @@ def conf_int_samples(self, alpha=0.05, use_t=None, nobs=None,
def conf_int(self, alpha=0.05, use_t=None):
"""confidence interval for the overall mean estimate
Parameters
----------
alpha : float in (0, 1)
Significance level for confidence interval. Nominal coverage is
``1 - alpha``.
use_t : None or bool
If use_t is None, then the attribute `use_t` determines whether
normal or t-distribution is used for confidence intervals.
Specifying use_t overrides the attribute.
If use_t is false, then confidence intervals are based on the
normal distribution. If it is true, then the t-distribution is
used.
Returns
-------
ci_eff_fe : tuple of floats
Confidence interval for mean effects size based on fixed effects
model with scale=1.
ci_eff_re : tuple of floats
Confidence interval for mean effects size based on random effects
model with scale=1
ci_eff_fe_wls : tuple of floats
Confidence interval for mean effects size based on fixed effects
model with estimated scale corresponding to WLS, ie. HKSJ.
ci_eff_re_wls : tuple of floats
Confidence interval for mean effects size based on random effects
model with estimated scale corresponding to WLS, ie. HKSJ.
If random effects method is fully iterated, i.e. Paule-Mandel, then
the estimated scale is 1.
"""
if use_t is None:
Expand All @@ -123,26 +172,56 @@ def conf_int(self, alpha=0.05, use_t=None):
return ci_eff_fe, ci_eff_re, ci_eff_fe_wls, ci_eff_re_wls

def test_homogeneity(self):
"""test whether the means of all samples are the same
"""Test whether the means of all samples are the same
currently no options, test uses chisquare distribution
default might change depending on `use_t`
Returns
-------
statistic : float
test statistic, ``q`` in meta-analysis, this is the pearson_chi2
statistic for the fixed effects model.
pvalue : float
df : float
degrees of freedom, equal to number of studies or sample minus 1.
res : HolderTuple instance
The results include the following attributes:
- statistic : float
Test statistic, ``q`` in meta-analysis, this is the
pearson_chi2 statistic for the fixed effects model.
- pvalue : float
P-value based on chisquare distribution.
- df : float
Degrees of freedom, equal to number of studies or samples
minus 1.
"""
pvalue = stats.chi2.sf(self.q, self.k - 1)
return self.q, pvalue, self.k - 1
res = HolderTuple(statistic=self.q,
pvalue=pvalue,
df=self.k - 1,
distr="chi2")
return res

def summary_array(self, alpha=0.05, use_t=None):
"""Create array with sample statistics and mean estimates
Parameters
----------
alpha : float in (0, 1)
Significance level for confidence interval. Nominal coverage is
``1 - alpha``.
use_t : None or bool
If use_t is None, then the attribute `use_t` determines whether
normal or t-distribution is used for confidence intervals.
Specifying use_t overrides the attribute.
If use_t is false, then confidence intervals are based on the
normal distribution. If it is true, then the t-distribution is
used.
Returns
-------
res : ndarray
Array with columns
['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"].
Rows include statistics for samples and estimates of overall mean.
column_names : list of str
The names for the columns, used when creating summary DataFrame.
"""

ci_low, ci_upp = self.conf_int_samples(alpha=alpha, use_t=use_t)
Expand All @@ -168,6 +247,26 @@ def summary_array(self, alpha=0.05, use_t=None):
def summary_frame(self, alpha=0.05, use_t=None):
"""Create DataFrame with sample statistics and mean estimates
Parameters
----------
alpha : float in (0, 1)
Significance level for confidence interval. Nominal coverage is
``1 - alpha``.
use_t : None or bool
If use_t is None, then the attribute `use_t` determines whether
normal or t-distribution is used for confidence intervals.
Specifying use_t overrides the attribute.
If use_t is false, then confidence intervals are based on the
normal distribution. If it is true, then the t-distribution is
used.
Returns
-------
res : DataFrame
pandas DataFrame instance with columns
['eff', "sd_eff", "ci_low", "ci_upp", "w_fe","w_re"].
Rows include statistics for samples and estimates of overall mean.
"""
if use_t is None:
use_t = self.use_t
Expand All @@ -179,6 +278,25 @@ def summary_frame(self, alpha=0.05, use_t=None):
return results

def plot_forest(self, ax=None, **kwds):
"""Forest plot with means and confidence intervals
Parameters
----------
ax : None or matplotlib axis instance
If ax is provided, then the plot will be added to it.
kwds : optional keyword arguments
Keywords are forwarded to the dot_plot function that creates the
plot.
Returns
-------
fig : Matplotlib figure instance
See Also
--------
dot_plot
"""
from statsmodels.graphics.dotplots import dot_plot
res_df = self.summary_frame()
hw = np.abs(res_df[["ci_low", "ci_upp"]] - res_df[["eff"]].values)
Expand All @@ -190,8 +308,8 @@ def plot_forest(self, ax=None, **kwds):
def effectsize_smd(mean1, sd1, nobs1, mean2, sd2, nobs2):
"""effect sizes for mean difference for use in meta-analysis
mean2, sd2, nobs2 are for treatment
mean1, sd1, nobs1 are for control
mean1, sd1, nobs1 are for treatment
mean2, sd2, nobs2 are for control
Effect sizes are computed for the mean difference ``mean1 - mean2``
standardized by an estimate of the within variance.
Expand Down Expand Up @@ -282,9 +400,9 @@ def effectsize_2proportions(count1, nobs1, count2, nobs2, statistic="diff",
Returns
-------
effect size : array
effect size for each sample
var_smdbc : array
estimate of variance of smd_bc
Effect size for each sample.
var_es : array
Estimate of variance of the effect size
Notes
-----
Expand All @@ -295,10 +413,10 @@ def effectsize_2proportions(count1, nobs1, count2, nobs2, statistic="diff",
The statistics are defined as:
risk difference = p1 - p2
log risk ratio = log(p1 / p2)
log odds_ratio = log(p1 / (1 - p1) * (1 - p2) / p2)
arcsine-sqrt = arcsin(sqrt(p1)) - arcsin(sqrt(p2))
- risk difference = p1 - p2
- log risk ratio = log(p1 / p2)
- log odds_ratio = log(p1 / (1 - p1) * (1 - p2) / p2)
- arcsine-sqrt = arcsin(sqrt(p1)) - arcsin(sqrt(p2))
where p1 and p2 are the estimated proportions in sample 1 (treatment) and
sample 2 (control).
Expand Down

0 comments on commit 43c64f3

Please sign in to comment.