statsmodels · kuangzhexiang · Mar 22, 2024 · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024
diff --git a/statsmodels/sandbox/stats/multicomp.py b/statsmodels/sandbox/stats/multicomp.py
@@ -967,14 +967,17 @@
         return results_table, (res, reject, pvals_corrected,
                                alphacSidak, alphacBonf), resarr
 
-    def tukeyhsd(self, alpha=0.05):
+    def tukeyhsd(self, alpha=0.05, use_var='equal'):
         """
         Tukey's range test to compare means of all pairs of groups
 
         Parameters
         ----------
         alpha : float, optional
             Value of FWER at which to calculate HSD.
+        use_var : {"unequal", "equal"}
+            If ``use_var`` is "unequal", then degrees of freedom is
+            scalar, also known as Games-Howell Test.
 
         Returns
         -------
@@ -988,9 +991,13 @@
 
         gmeans = self.groupstats.groupmean
         gnobs = self.groupstats.groupnobs
-        # var_ = self.groupstats.groupvarwithin()
-        # #possibly an error in varcorrection in this case
-        var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans))
+        if use_var == 'unequal':
+            var_ = self.groupstats.groupvarwithin()
+        elif use_var == 'equal':
+            var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans))
+        else:
+            raise ValueError('use_var should be "unequal" or "equal"')
+
         # res contains: 0:(idx1, idx2), 1:reject, 2:meandiffs, 3: std_pairs,
         # 4:confint, 5:q_crit, 6:df_total, 7:reject2, 8: pvals
         res = tukeyhsd(gmeans, gnobs, var_, df=None, alpha=alpha, q_crit=None)
@@ -1230,8 +1237,6 @@
     This needs to be multiplies by the joint variance estimate, means square
     error, MSE. To obtain the correction factor for the standard deviation,
     square root needs to be taken.
-
-    TODO: something looks wrong with dfjoint, is formula from SPSS
     '''
     #TODO: test and replace with broadcasting
     v1, v2 = np.meshgrid(var_all, var_all)
@@ -1240,7 +1245,7 @@
 
     varjoint = v1/n1 + v2/n2
 
-    dfjoint = varjoint**2 / (df1 * (v1/n1)**2 + df2 * (v2/n2)**2)
+    dfjoint = varjoint**2 / ((v1/n1)**2 / df1 + (v2/n2)**2 / df2)
 
     return varjoint, dfjoint
 
@@ -1272,6 +1277,7 @@
     else:
         df_total = np.sum(df)
 
+    df_pairs_ = None
     if (np.size(nobs_all) == 1) and (np.size(var_all) == 1):
         #balanced sample sizes and homogenous variance
         var_pairs = 1. * var_all / nobs_all * np.ones((n_means, n_means))
@@ -1281,7 +1287,7 @@
         var_pairs = var_all * varcorrection_pairs_unbalanced(nobs_all,
                                                              srange=True)
     elif np.size(var_all) > 1:
-        var_pairs, df_sum = varcorrection_pairs_unequal(nobs_all, var_all, df)
+        var_pairs, df_pairs_ = varcorrection_pairs_unequal(var_all, nobs_all, df)
         var_pairs /= 2.
         #check division by two for studentized range
 
@@ -1296,10 +1302,12 @@
     idx1, idx2 = np.triu_indices(n_means, 1)
     meandiffs = meandiffs_[idx1, idx2]
     std_pairs = std_pairs_[idx1, idx2]
+    if df_pairs_ is not None:
+        df_total = df_pairs_[idx1, idx2]
 
     st_range = np.abs(meandiffs) / std_pairs #studentized range statistic
 
-    df_total_ = max(df_total, 5)  #TODO: smallest df in table
+    df_total_ = np.maximum(df_total, 5)  #TODO: smallest df in table
     if q_crit is None:
         q_crit = get_tukeyQcrit2(n_means, df_total, alpha=alpha)
 
@@ -1414,7 +1422,7 @@
         var_pairs = var_all * varcorrection_pairs_unbalanced(nobs_all,
                                                              srange=True)
     elif np.size(var_all) > 1:
-        var_pairs, df_sum = varcorrection_pairs_unequal(nobs_all, var_all, df)
+        var_pairs, df_sum = varcorrection_pairs_unequal(var_all, nobs_all, df)
         var_pairs /= 2.
         #check division by two for studentized range
 

diff --git a/statsmodels/stats/multicomp.py b/statsmodels/stats/multicomp.py
@@ -10,7 +10,7 @@
 __all__ = ['tukeyhsd', 'MultiComparison']
 
 
-def pairwise_tukeyhsd(endog, groups, alpha=0.05):
+def pairwise_tukeyhsd(endog, groups, alpha=0.05, use_var='equal'):
     """
     Calculate all pairwise comparisons with TukeyHSD confidence intervals
 
@@ -22,6 +22,9 @@ def pairwise_tukeyhsd(endog, groups, alpha=0.05):
         array with groups, can be string or integers
     alpha : float
         significance level for the test
+    use_var : {"unequal", "equal"}
+        If ``use_var`` is "unequal", then degrees of freedom is
+        scalar, also known as Games-Howell Test.
 
     Returns
     -------
@@ -40,4 +43,5 @@ def pairwise_tukeyhsd(endog, groups, alpha=0.05):
     statsmodels.sandbox.stats.multicomp.TukeyHSDResults
     """
 
-    return MultiComparison(endog, groups).tukeyhsd(alpha=alpha)
+    return MultiComparison(endog, groups).tukeyhsd(alpha=alpha,
+                                                   use_var=use_var)
diff --git a/statsmodels/stats/tests/test_pairwise.py b/statsmodels/stats/tests/test_pairwise.py
@@ -120,6 +120,14 @@
 1 - 3\t-0.260\t-3.909\t3.389\t-
 '''
 
+# result in R: library(rstatix)
+# games_howell_test(df, StressReduction ~ Treatment, conf.level = 0.99)
+ss2_unequal = '''\
+1\tStressReduction\tmedical\tmental\t1.8888888888888888\t0.7123347940930316\t3.0654429836847461\t0.000196\t***
+2\tStressReduction\tmedical\tphysical\t0.8888888888888888\t-0.8105797509636128\t2.5883575287413905\t0.206000\tns
+3\tStressReduction\tmental\tphysical\t-1.0000000000000000\t-2.6647460755237473\t0.6647460755237473\t0.127000\tns
+'''
+
 cylinders = np.array([8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, 4, 4,
                     4, 4, 4, 4, 6, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6,
                     6, 6, 4, 4, 4, 4, 4, 8, 4, 6, 6, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
@@ -138,6 +146,7 @@
 ss2 = asbytes(ss2)
 ss3 = asbytes(ss3)
 ss5 = asbytes(ss5)
+ss2_unequal = asbytes(ss2_unequal)
 
 dta = pd.read_csv(BytesIO(ss), sep=r'\s+', header=None, engine='python')
 dta.columns = "Rust", "Brand", "Replication"
@@ -151,7 +160,10 @@
 for col in ('pair', 'sig'):
     dta5[col] = dta5[col].map(lambda v: v.encode('utf-8'))
 sas_ = dta5.iloc[[1, 3, 2]]
-
+games_howell_r_result = pd.read_csv(BytesIO(ss2_unequal), sep=r'\t', header=None, engine='python')
+games_howell_r_result.columns = ['idx', 'y', 'group1', 'group2', 'meandiff', 'lower', 'upper', 'pvalue', 'sig']
+for col in ('y', 'group1', 'group2', 'sig'):
+    games_howell_r_result[col] = games_howell_r_result[col].map(lambda v: v.encode('utf-8'))
 
 def get_thsd(mci, alpha=0.05):
     var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique))
@@ -172,20 +184,29 @@ class CheckTuckeyHSDMixin:
     @classmethod
     def setup_class_(cls):
         cls.mc = MultiComparison(cls.endog, cls.groups)
-        cls.res = cls.mc.tukeyhsd(alpha=cls.alpha)
+        if hasattr(cls, 'use_var'):
+            cls.res = cls.mc.tukeyhsd(alpha=cls.alpha, use_var=cls.use_var)
+        else:
+            cls.res = cls.mc.tukeyhsd(alpha=cls.alpha)
 
     def test_multicomptukey(self):
         assert_almost_equal(self.res.meandiffs, self.meandiff2, decimal=14)
         assert_almost_equal(self.res.confint, self.confint2, decimal=2)
         assert_equal(self.res.reject, self.reject2)
 
     def test_group_tukey(self):
+        if hasattr(self, 'use_var') and self.use_var == 'unequal':
+            # in unequal variance case, we feed groupvarwithin, no need to test total variance
+            return
         res_t = get_thsd(self.mc, alpha=self.alpha)
         assert_almost_equal(res_t[4], self.confint2, decimal=2)
 
     def test_shortcut_function(self):
         #check wrapper function
-        res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha)
+        if hasattr(self, 'use_var'):
+            res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha, use_var=self.use_var)
+        else:
+            res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha)
         assert_almost_equal(res.confint, self.res.confint, decimal=14)
 
     @pytest.mark.smoke
@@ -323,6 +344,23 @@ def setup_class(cls):
         cls.reject2 = pvals < 0.01
 
 
+class TestTukeyHSD2sUnequal(CheckTuckeyHSDMixin):
+
+    @classmethod
+    def setup_class(cls):
+        # Games-Howell test
+        cls.endog = dta2['StressReduction'][3:29]
+        cls.groups = dta2['Treatment'][3:29]
+        cls.alpha = 0.01
+        cls.use_var = 'unequal'
+        cls.setup_class_()
+
+        #from R: library(rstatix)
+        cls.meandiff2 = games_howell_r_result['meandiff']
+        cls.confint2 = games_howell_r_result[['lower', 'upper']].astype(float).values.reshape((3, 2))
+        cls.reject2 = games_howell_r_result['sig'] == asbytes('***')
+
+
 class TestTuckeyHSD3(CheckTuckeyHSDMixin):
 
     @classmethod