In [55]:
import pandas as pd
import numpy as np
import scipy
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.multitest import multipletests
import statsmodels.stats.multicomp as multicomp

# 1G (left)

In [56]:
data_df = pd.read_excel('../data/42255_2025_1311_MOESM4_ESM.xlsx', sheet_name='1G-L')

In [57]:
grp1, grp2, grp3 = data_df['HFD'].dropna().values, data_df['HFD+SAL'].dropna().values, data_df['HFD+SANA '].dropna().values

In [58]:
len(np.concatenate([grp1, grp2, grp3]))

31

In [59]:
scipy.stats.f_oneway(data_df['HFD'].dropna().values, 
                     data_df['HFD+SAL'].dropna().values, 
                     data_df['HFD+SANA '].dropna().values)

F_onewayResult(statistic=2.874378247889131, pvalue=0.0732155926371584)

In [60]:
comp = multicomp.MultiComparison(data=np.concatenate([grp1, grp2, grp3]), 
                                  groups=np.concatenate([['HFD']*len(grp1), 
                                                         ['HFD+SAL']*len(grp2), 
                                                         ['HFD+SANA']*len(grp3)]))
tbl, a1, a2 = comp.allpairtest(scipy.stats.ttest_ind, method= "bonf", alpha=0.05)

In [61]:
tbl

group1,group2,stat,pval,pval_corr,reject
HFD,HFD+SAL,1.9001,0.0736,0.2207,False
HFD,HFD+SANA,-0.1524,0.8806,1.0,False
HFD+SAL,HFD+SANA,-2.5059,0.021,0.0629,False


In [62]:
scipy.stats.tukey_hsd(*[grp1, grp2, grp3]).pvalue

array([[1.        , 0.15648264, 0.9839871 ],
       [0.15648264, 1.        , 0.09107853],
       [0.9839871 , 0.09107853, 1.        ]])

![](../img/pub_fig_1g_l.png)

Here is the left panel of Figure 1G:

![file](https://pubpeer.com/storage/image-1750972850982.png)

This is described as involving a one-way ANOVA followed by Bonferroni post-hoc. Using the [source data](https://www.nature.com/articles/s42255-025-01311-z#Sec38) for this figure, we arrive at F(2, 28) = 2.874, p = 0.07, which supports the authors' statement that treatment did not affect stool mass. However, the p-values for the post-hoc test appear to be wrong for Bonferroni post-hoc comparison (i.e., independent t-tests between groups and Bonferroni FWER correction on p-values for each pairwise test). The p-values shown here are closer to the expected output of Tukey's post-hoc test.

| Comparison | Published p-value | Bonferroni p-value | Tukey p-value |
| -- | --- | --- | --- |
| HFD vs HFD+SANA | 0.98 | 1.000 | 0.984 |
| HFD vs HFD+SAL | 0.16 | 0.221 | 0.156 |
|  HFD+SAL vs HFD+SANA | 0.09 | 0.0629 | 0.0911|

Could the authors clarify?


# 1G (right)

In [63]:
data_df = pd.read_excel('../data/42255_2025_1311_MOESM4_ESM.xlsx', sheet_name='1G-R')

In [64]:
grp1, grp2, grp3 = data_df['HFD'].dropna().values, data_df['HFD+SAL'].dropna().values, data_df['HFD+SANA'].dropna().values

In [65]:
np.concatenate([grp1, grp2, grp3])

array([0.64784531, 1.01854481, 0.43320781, 0.68435827, 1.03398685,
       0.72202543, 0.70547562, 0.66898151, 0.40121503, 0.39080251,
       0.46569904, 0.56799336, 1.11847481, 0.49026001, 0.47385096,
       0.76841862, 1.16442579, 0.85188218, 0.89924941])

In [66]:
scipy.stats.f_oneway(grp1, grp2, grp3)

F_onewayResult(statistic=0.8526603246699854, pvalue=0.4447638594390247)

In [67]:
comp = multicomp.MultiComparison(data=np.concatenate([grp1, grp2, grp3]), 
                                  groups=np.concatenate([['HFD']*len(grp1), 
                                                         ['HFD+SAL']*len(grp2), 
                                                         ['HFD+SANA']*len(grp3)]))
tbl, a1, a2 = comp.allpairtest(scipy.stats.ttest_ind, method= "bonf", alpha=0.05)

In [68]:
tbl

group1,group2,stat,pval,pval_corr,reject
HFD,HFD+SAL,1.091,0.2986,0.8958,False
HFD,HFD+SANA,-0.1925,0.8509,1.0,False
HFD+SAL,HFD+SANA,-1.1129,0.2918,0.8754,False


In [69]:
scipy.stats.tukey_hsd(*[grp1, grp2, grp3]).pvalue

array([[1.        , 0.54985981, 0.981743  ],
       [0.54985981, 1.        , 0.46999669],
       [0.981743  , 0.46999669, 1.        ]])