Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix unequal varcorrection in tukeyhsd #9175

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 18 additions & 10 deletions statsmodels/sandbox/stats/multicomp.py
Expand Up @@ -967,14 +967,17 @@
return results_table, (res, reject, pvals_corrected,
alphacSidak, alphacBonf), resarr

def tukeyhsd(self, alpha=0.05):
def tukeyhsd(self, alpha=0.05, use_var='equal'):
"""
Tukey's range test to compare means of all pairs of groups

Parameters
----------
alpha : float, optional
Value of FWER at which to calculate HSD.
use_var : {"unequal", "equal"}
If ``use_var`` is "unequal", then degrees of freedom is
scalar, also known as Games-Howell Test.

Returns
-------
Expand All @@ -988,9 +991,13 @@

gmeans = self.groupstats.groupmean
gnobs = self.groupstats.groupnobs
# var_ = self.groupstats.groupvarwithin()
# #possibly an error in varcorrection in this case
var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans))
if use_var == 'unequal':
var_ = self.groupstats.groupvarwithin()
elif use_var == 'equal':
var_ = np.var(self.groupstats.groupdemean(), ddof=len(gmeans))
else:
raise ValueError('use_var should be "unequal" or "equal"')

Check warning on line 999 in statsmodels/sandbox/stats/multicomp.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/sandbox/stats/multicomp.py#L999

Added line #L999 was not covered by tests

# res contains: 0:(idx1, idx2), 1:reject, 2:meandiffs, 3: std_pairs,
# 4:confint, 5:q_crit, 6:df_total, 7:reject2, 8: pvals
res = tukeyhsd(gmeans, gnobs, var_, df=None, alpha=alpha, q_crit=None)
Expand Down Expand Up @@ -1230,8 +1237,6 @@
This needs to be multiplies by the joint variance estimate, means square
error, MSE. To obtain the correction factor for the standard deviation,
square root needs to be taken.

TODO: something looks wrong with dfjoint, is formula from SPSS
'''
#TODO: test and replace with broadcasting
v1, v2 = np.meshgrid(var_all, var_all)
Expand All @@ -1240,7 +1245,7 @@

varjoint = v1/n1 + v2/n2

dfjoint = varjoint**2 / (df1 * (v1/n1)**2 + df2 * (v2/n2)**2)
dfjoint = varjoint**2 / ((v1/n1)**2 / df1 + (v2/n2)**2 / df2)

return varjoint, dfjoint

Expand Down Expand Up @@ -1272,6 +1277,7 @@
else:
df_total = np.sum(df)

df_pairs_ = None
if (np.size(nobs_all) == 1) and (np.size(var_all) == 1):
#balanced sample sizes and homogenous variance
var_pairs = 1. * var_all / nobs_all * np.ones((n_means, n_means))
Expand All @@ -1281,7 +1287,7 @@
var_pairs = var_all * varcorrection_pairs_unbalanced(nobs_all,
srange=True)
elif np.size(var_all) > 1:
var_pairs, df_sum = varcorrection_pairs_unequal(nobs_all, var_all, df)
var_pairs, df_pairs_ = varcorrection_pairs_unequal(var_all, nobs_all, df)
var_pairs /= 2.
#check division by two for studentized range

Expand All @@ -1296,10 +1302,12 @@
idx1, idx2 = np.triu_indices(n_means, 1)
meandiffs = meandiffs_[idx1, idx2]
std_pairs = std_pairs_[idx1, idx2]
if df_pairs_ is not None:
df_total = df_pairs_[idx1, idx2]

st_range = np.abs(meandiffs) / std_pairs #studentized range statistic

df_total_ = max(df_total, 5) #TODO: smallest df in table
df_total_ = np.maximum(df_total, 5) #TODO: smallest df in table
if q_crit is None:
q_crit = get_tukeyQcrit2(n_means, df_total, alpha=alpha)

Expand Down Expand Up @@ -1414,7 +1422,7 @@
var_pairs = var_all * varcorrection_pairs_unbalanced(nobs_all,
srange=True)
elif np.size(var_all) > 1:
var_pairs, df_sum = varcorrection_pairs_unequal(nobs_all, var_all, df)
var_pairs, df_sum = varcorrection_pairs_unequal(var_all, nobs_all, df)

Check warning on line 1425 in statsmodels/sandbox/stats/multicomp.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/sandbox/stats/multicomp.py#L1425

Added line #L1425 was not covered by tests
var_pairs /= 2.
#check division by two for studentized range

Expand Down
8 changes: 6 additions & 2 deletions statsmodels/stats/multicomp.py
Expand Up @@ -10,7 +10,7 @@
__all__ = ['tukeyhsd', 'MultiComparison']


def pairwise_tukeyhsd(endog, groups, alpha=0.05):
def pairwise_tukeyhsd(endog, groups, alpha=0.05, use_var='equal'):
"""
Calculate all pairwise comparisons with TukeyHSD confidence intervals

Expand All @@ -22,6 +22,9 @@ def pairwise_tukeyhsd(endog, groups, alpha=0.05):
array with groups, can be string or integers
alpha : float
significance level for the test
use_var : {"unequal", "equal"}
If ``use_var`` is "unequal", then degrees of freedom is
scalar, also known as Games-Howell Test.

Returns
-------
Expand All @@ -40,4 +43,5 @@ def pairwise_tukeyhsd(endog, groups, alpha=0.05):
statsmodels.sandbox.stats.multicomp.TukeyHSDResults
"""

return MultiComparison(endog, groups).tukeyhsd(alpha=alpha)
return MultiComparison(endog, groups).tukeyhsd(alpha=alpha,
use_var=use_var)
44 changes: 41 additions & 3 deletions statsmodels/stats/tests/test_pairwise.py
Expand Up @@ -120,6 +120,14 @@
1 - 3\t-0.260\t-3.909\t3.389\t-
'''

# result in R: library(rstatix)
# games_howell_test(df, StressReduction ~ Treatment, conf.level = 0.99)
ss2_unequal = '''\
1\tStressReduction\tmedical\tmental\t1.8888888888888888\t0.7123347940930316\t3.0654429836847461\t0.000196\t***
2\tStressReduction\tmedical\tphysical\t0.8888888888888888\t-0.8105797509636128\t2.5883575287413905\t0.206000\tns
3\tStressReduction\tmental\tphysical\t-1.0000000000000000\t-2.6647460755237473\t0.6647460755237473\t0.127000\tns
'''

cylinders = np.array([8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, 4, 4,
4, 4, 4, 4, 6, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6,
6, 6, 4, 4, 4, 4, 4, 8, 4, 6, 6, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
Expand All @@ -138,6 +146,7 @@
ss2 = asbytes(ss2)
ss3 = asbytes(ss3)
ss5 = asbytes(ss5)
ss2_unequal = asbytes(ss2_unequal)

dta = pd.read_csv(BytesIO(ss), sep=r'\s+', header=None, engine='python')
dta.columns = "Rust", "Brand", "Replication"
Expand All @@ -151,7 +160,10 @@
for col in ('pair', 'sig'):
dta5[col] = dta5[col].map(lambda v: v.encode('utf-8'))
sas_ = dta5.iloc[[1, 3, 2]]

games_howell_r_result = pd.read_csv(BytesIO(ss2_unequal), sep=r'\t', header=None, engine='python')
games_howell_r_result.columns = ['idx', 'y', 'group1', 'group2', 'meandiff', 'lower', 'upper', 'pvalue', 'sig']
for col in ('y', 'group1', 'group2', 'sig'):
games_howell_r_result[col] = games_howell_r_result[col].map(lambda v: v.encode('utf-8'))

def get_thsd(mci, alpha=0.05):
var_ = np.var(mci.groupstats.groupdemean(), ddof=len(mci.groupsunique))
Expand All @@ -172,20 +184,29 @@ class CheckTuckeyHSDMixin:
@classmethod
def setup_class_(cls):
cls.mc = MultiComparison(cls.endog, cls.groups)
cls.res = cls.mc.tukeyhsd(alpha=cls.alpha)
if hasattr(cls, 'use_var'):
cls.res = cls.mc.tukeyhsd(alpha=cls.alpha, use_var=cls.use_var)
else:
cls.res = cls.mc.tukeyhsd(alpha=cls.alpha)

def test_multicomptukey(self):
assert_almost_equal(self.res.meandiffs, self.meandiff2, decimal=14)
assert_almost_equal(self.res.confint, self.confint2, decimal=2)
assert_equal(self.res.reject, self.reject2)

def test_group_tukey(self):
if hasattr(self, 'use_var') and self.use_var == 'unequal':
# in unequal variance case, we feed groupvarwithin, no need to test total variance
return
res_t = get_thsd(self.mc, alpha=self.alpha)
assert_almost_equal(res_t[4], self.confint2, decimal=2)

def test_shortcut_function(self):
#check wrapper function
res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha)
if hasattr(self, 'use_var'):
res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha, use_var=self.use_var)
else:
res = pairwise_tukeyhsd(self.endog, self.groups, alpha=self.alpha)
assert_almost_equal(res.confint, self.res.confint, decimal=14)

@pytest.mark.smoke
Expand Down Expand Up @@ -323,6 +344,23 @@ def setup_class(cls):
cls.reject2 = pvals < 0.01


class TestTukeyHSD2sUnequal(CheckTuckeyHSDMixin):

@classmethod
def setup_class(cls):
# Games-Howell test
cls.endog = dta2['StressReduction'][3:29]
cls.groups = dta2['Treatment'][3:29]
cls.alpha = 0.01
cls.use_var = 'unequal'
cls.setup_class_()

#from R: library(rstatix)
cls.meandiff2 = games_howell_r_result['meandiff']
cls.confint2 = games_howell_r_result[['lower', 'upper']].astype(float).values.reshape((3, 2))
cls.reject2 = games_howell_r_result['sig'] == asbytes('***')


class TestTuckeyHSD3(CheckTuckeyHSDMixin):

@classmethod
Expand Down