diff --git a/statsmodels/sandbox/stats/multicomp.py b/statsmodels/sandbox/stats/multicomp.py index d12edacdc9d..65277997193 100644 --- a/statsmodels/sandbox/stats/multicomp.py +++ b/statsmodels/sandbox/stats/multicomp.py @@ -967,7 +967,7 @@ def allpairtest(self, testfunc, alpha=0.05, method='bonf', pvalidx=1): return results_table, (res, reject, pvals_corrected, alphacSidak, alphacBonf), resarr - def tukeyhsd(self, alpha=0.05): + def tukeyhsd(self, alpha=0.05, use_var='equal'): """ Tukey's range test to compare means of all pairs of groups @@ -975,6 +975,9 @@ def tukeyhsd(self, alpha=0.05): ---------- alpha : float, optional Value of FWER at which to calculate HSD. + use_var : {"unequal", "equal"} + If ``use_var`` is "unequal", then degrees of freedom is + scalar, also known as Games-Howell Test. Returns ------- @@ -988,9 +991,13 @@ def tukeyhsd(self, alpha=0.05): gmeans = self.groupstats.groupmean gnobs = self.groupstats.groupnobs - # var_ = self.groupstats.groupvarwithin() - # #possibly an error in varcorrection in this case - var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans)) + if use_var == 'unequal': + var_ = self.groupstats.groupvarwithin() + elif use_var == 'equal': + var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans)) + else: + raise ValueError('use_var should be "unequal" or "equal"') + # res contains: 0:(idx1, idx2), 1:reject, 2:meandiffs, 3: std_pairs, # 4:confint, 5:q_crit, 6:df_total, 7:reject2, 8: pvals res = tukeyhsd(gmeans, gnobs, var_, df=None, alpha=alpha, q_crit=None) @@ -1230,8 +1237,6 @@ def varcorrection_pairs_unequal(var_all, nobs_all, df_all): This needs to be multiplies by the joint variance estimate, means square error, MSE. To obtain the correction factor for the standard deviation, square root needs to be taken. - - TODO: something looks wrong with dfjoint, is formula from SPSS ''' #TODO: test and replace with broadcasting v1, v2 = np.meshgrid(var_all, var_all) @@ -1240,7 +1245,7 @@ def varcorrection_pairs_unequal(var_all, nobs_all, df_all): varjoint = v1/n1 + v2/n2 - dfjoint = varjoint**2 / (df1 * (v1/n1)**2 + df2 * (v2/n2)**2) + dfjoint = varjoint**2 / ((v1/n1)**2 / df1 + (v2/n2)**2 / df2) return varjoint, dfjoint @@ -1272,6 +1277,7 @@ def tukeyhsd(mean_all, nobs_all, var_all, df=None, alpha=0.05, q_crit=None): else: df_total = np.sum(df) + df_pairs_ = None if (np.size(nobs_all) == 1) and (np.size(var_all) == 1): #balanced sample sizes and homogenous variance var_pairs = 1. * var_all / nobs_all * np.ones((n_means, n_means)) @@ -1281,7 +1287,7 @@ def tukeyhsd(mean_all, nobs_all, var_all, df=None, alpha=0.05, q_crit=None): var_pairs = var_all * varcorrection_pairs_unbalanced(nobs_all, srange=True) elif np.size(var_all) > 1: - var_pairs, df_sum = varcorrection_pairs_unequal(nobs_all, var_all, df) + var_pairs, df_pairs_ = varcorrection_pairs_unequal(var_all, nobs_all, df) var_pairs /= 2. #check division by two for studentized range @@ -1296,10 +1302,12 @@ def tukeyhsd(mean_all, nobs_all, var_all, df=None, alpha=0.05, q_crit=None): idx1, idx2 = np.triu_indices(n_means, 1) meandiffs = meandiffs_[idx1, idx2] std_pairs = std_pairs_[idx1, idx2] + if df_pairs_ is not None: + df_total = df_pairs_[idx1, idx2] st_range = np.abs(meandiffs) / std_pairs #studentized range statistic - df_total_ = max(df_total, 5) #TODO: smallest df in table + df_total_ = np.maximum(df_total, 5) #TODO: smallest df in table if q_crit is None: q_crit = get_tukeyQcrit2(n_means, df_total, alpha=alpha) @@ -1414,7 +1422,7 @@ def distance_st_range(mean_all, nobs_all, var_all, df=None, triu=False): var_pairs = var_all * varcorrection_pairs_unbalanced(nobs_all, srange=True) elif np.size(var_all) > 1: - var_pairs, df_sum = varcorrection_pairs_unequal(nobs_all, var_all, df) + var_pairs, df_sum = varcorrection_pairs_unequal(var_all, nobs_all, df) var_pairs /= 2. #check division by two for studentized range diff --git a/statsmodels/stats/multicomp.py b/statsmodels/stats/multicomp.py index 79ba99c1ecf..8b67bc197e8 100644 --- a/statsmodels/stats/multicomp.py +++ b/statsmodels/stats/multicomp.py @@ -10,7 +10,7 @@ __all__ = ['tukeyhsd', 'MultiComparison'] -def pairwise_tukeyhsd(endog, groups, alpha=0.05): +def pairwise_tukeyhsd(endog, groups, alpha=0.05, use_var='equal'): """ Calculate all pairwise comparisons with TukeyHSD confidence intervals @@ -22,6 +22,9 @@ def pairwise_tukeyhsd(endog, groups, alpha=0.05): array with groups, can be string or integers alpha : float significance level for the test + use_var : {"unequal", "equal"} + If ``use_var`` is "unequal", then degrees of freedom is + scalar, also known as Games-Howell Test. Returns ------- @@ -40,4 +43,5 @@ def pairwise_tukeyhsd(endog, groups, alpha=0.05): statsmodels.sandbox.stats.multicomp.TukeyHSDResults """ - return MultiComparison(endog, groups).tukeyhsd(alpha=alpha) + return MultiComparison(endog, groups).tukeyhsd(alpha=alpha, + use_var=use_var) diff --git a/statsmodels/stats/tests/test_pairwise.py b/statsmodels/stats/tests/test_pairwise.py index be4ac4fd7bf..97d0f1a4546 100644 --- a/statsmodels/stats/tests/test_pairwise.py +++ b/statsmodels/stats/tests/test_pairwise.py @@ -120,6 +120,14 @@ 1 - 3\t-0.260\t-3.909\t3.389\t- ''' +# result in R: library(rstatix) +# games_howell_test(df, StressReduction ~ Treatment, conf.level = 0.99) +ss2_unequal = '''\ +1\tStressReduction\tmedical\tmental\t1.8888888888888888\t0.7123347940930316\t3.0654429836847461\t0.000196\t*** +2\tStressReduction\tmedical\tphysical\t0.8888888888888888\t-0.8105797509636128\t2.5883575287413905\t0.206000\tns +3\tStressReduction\tmental\tphysical\t-1.0000000000000000\t-2.6647460755237473\t0.6647460755237473\t0.127000\tns +''' + cylinders = np.array([8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, 4, 4, 4, 4, 4, 4, 6, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 4, 4, 4, 4, 4, 8, 4, 6, 6, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -138,6 +146,7 @@ ss2 = asbytes(ss2) ss3 = asbytes(ss3) ss5 = asbytes(ss5) +ss2_unequal = asbytes(ss2_unequal) dta = pd.read_csv(BytesIO(ss), sep=r'\s+', header=None, engine='python') dta.columns = "Rust", "Brand", "Replication" @@ -151,7 +160,10 @@ for col in ('pair', 'sig'): dta5[col] = dta5[col].map(lambda v: v.encode('utf-8')) sas_ = dta5.iloc[[1, 3, 2]] - +games_howell_r_result = pd.read_csv(BytesIO(ss2_unequal), sep=r'\t', header=None, engine='python') +games_howell_r_result.columns = ['idx', 'y', 'group1', 'group2', 'meandiff', 'lower', 'upper', 'pvalue', 'sig'] +for col in ('y', 'group1', 'group2', 'sig'): + games_howell_r_result[col] = games_howell_r_result[col].map(lambda v: v.encode('utf-8')) def get_thsd(mci, alpha=0.05): var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique)) @@ -172,7 +184,10 @@ class CheckTuckeyHSDMixin: @classmethod def setup_class_(cls): cls.mc = MultiComparison(cls.endog, cls.groups) - cls.res = cls.mc.tukeyhsd(alpha=cls.alpha) + if hasattr(cls, 'use_var'): + cls.res = cls.mc.tukeyhsd(alpha=cls.alpha, use_var=cls.use_var) + else: + cls.res = cls.mc.tukeyhsd(alpha=cls.alpha) def test_multicomptukey(self): assert_almost_equal(self.res.meandiffs, self.meandiff2, decimal=14) @@ -180,12 +195,18 @@ def test_multicomptukey(self): assert_equal(self.res.reject, self.reject2) def test_group_tukey(self): + if hasattr(self, 'use_var') and self.use_var == 'unequal': + # in unequal variance case, we feed groupvarwithin, no need to test total variance + return res_t = get_thsd(self.mc, alpha=self.alpha) assert_almost_equal(res_t[4], self.confint2, decimal=2) def test_shortcut_function(self): #check wrapper function - res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha) + if hasattr(self, 'use_var'): + res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha, use_var=self.use_var) + else: + res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha) assert_almost_equal(res.confint, self.res.confint, decimal=14) @pytest.mark.smoke @@ -323,6 +344,23 @@ def setup_class(cls): cls.reject2 = pvals < 0.01 +class TestTukeyHSD2sUnequal(CheckTuckeyHSDMixin): + + @classmethod + def setup_class(cls): + # Games-Howell test + cls.endog = dta2['StressReduction'][3:29] + cls.groups = dta2['Treatment'][3:29] + cls.alpha = 0.01 + cls.use_var = 'unequal' + cls.setup_class_() + + #from R: library(rstatix) + cls.meandiff2 = games_howell_r_result['meandiff'] + cls.confint2 = games_howell_r_result[['lower', 'upper']].astype(float).values.reshape((3, 2)) + cls.reject2 = games_howell_r_result['sig'] == asbytes('***') + + class TestTuckeyHSD3(CheckTuckeyHSDMixin): @classmethod