In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.multitest import multipletests
import statsmodels.stats.multicomp as multicomp

In [4]:
data_df = pd.read_excel('../data/42255_2025_1311_MOESM5_ESM.xlsx', sheet_name='2C')

In [10]:
data_df.columns

Index(['ND', 'HFD', 'HFD+SAL ', 'HFD+SANA '], dtype='object')

In [11]:
grp1, grp2, grp3, grp4 = data_df['ND'].dropna().values, \
data_df['HFD'].dropna().values, \
data_df['HFD+SAL '].dropna().values, \
data_df['HFD+SANA '].dropna().values

In [15]:
collect = [grp1, grp2, grp3, grp4]

In [28]:
def reorder_label(label):
    if label[-2:] == 'ND':
        return 'ND vs ' + label.split('vs')[0].strip()
    else:
        return label

In [36]:
for _ in [0]:

    print('N samples: ' + str(len(np.concatenate(collect))))
    print('ANOVA:')
    f = scipy.stats.f_oneway(grp1, grp2, grp3, grp4)
    print('F(3, ' + str(len(np.concatenate(collect))-4) + ') = ' + str(f.statistic) + ', p = ' + str(f.pvalue))

    #print('Bonferroni:')
    comp = multicomp.MultiComparison(data=np.concatenate(collect), 
                                      groups=np.concatenate([['ND']*len(grp1), 
                                                             ['HFD']*len(grp2), 
                                                             ['HFD+SAL']*len(grp3), 
                                                             ['HFD+SANA']*len(grp4)]))
    tbl, a1, a2 = comp.allpairtest(scipy.stats.ttest_ind, method= "bonf", alpha=0.05)
    bonf = pd.DataFrame(tbl)
    bonf.columns = bonf.loc[0].astype(str)
    bonf = bonf[1:]
    bonf['pval_corr'] = a1[2]
    bonf['g1'] = bonf['group1']
    bonf['g2'] = bonf['group2']
    bonf_df = bonf.copy()
    bonf_df['Bonferroni p-value'] = bonf_df['pval_corr']
    bonf_df['Comparison'] = bonf_df['g1'].astype(str) + ' vs ' + bonf_df['g2'].astype(str)
    bonf_df['Comparison'] = bonf_df['Comparison'].apply(reorder_label)

    #print('Tukey:')
    tukey = scipy.stats.tukey_hsd(*collect).pvalue
    tukey_df = pd.DataFrame({'g1':['ND', 'ND', 'ND', 'HFD', 'HFD', 'HFD+SAL'], 
     'g2':['HFD', 'HFD+SAL', 'HFD+SANA', 'HFD+SAL', 'HFD+SANA', 'HFD+SANA'],
     'Tukey p-value':[tukey[0][1], tukey[0][2], tukey[0][3], tukey[1][2], tukey[1][3], tukey[2][3]]})
    tukey_df['Comparison'] = tukey_df['g1'].astype(str) + ' vs ' + tukey_df['g2'].astype(str)

    combo_df = pd.merge(bonf_df, tukey_df, on=['Comparison'], how='outer')
    combo_df['Published p-value'] = ''
    combo_df = combo_df[['Comparison', 'Published p-value', 'Bonferroni p-value', 'Tukey p-value']].set_index('Comparison')
    print(combo_df.to_markdown())

N samples: 45
ANOVA:
F(3, 41) = 10.409125673977996, p = 3.202301065084262e-05
| Comparison          | Published p-value   |   Bonferroni p-value |   Tukey p-value |
|:--------------------|:--------------------|---------------------:|----------------:|
| HFD vs HFD+SAL      |                     |          1           |     0.355918    |
| HFD vs HFD+SANA     |                     |          0.000654853 |     2.73412e-05 |
| ND vs HFD           |                     |          0.030796    |     0.00946174  |
| HFD+SAL vs HFD+SANA |                     |          0.00381308  |     0.00271201  |
| ND vs HFD+SAL       |                     |          0.359079    |     0.294993    |
| ND vs HFD+SANA      |                     |          0.0499569   |     0.238884    |


In [38]:
tbl

group1,group2,stat,pval,pval_corr,reject
HFD,HFD+SAL,1.3159,0.2031,1.0,False
HFD,HFD+SANA,4.7995,0.0001,0.0007,True
HFD,ND,3.162,0.0051,0.0308,True
HFD+SAL,HFD+SANA,3.9785,0.0006,0.0038,True
HFD+SAL,ND,1.9893,0.0598,0.3591,False
HFD+SANA,ND,-2.9125,0.0083,0.05,True


In [19]:
'ND', 'HFD', 'HFD+SAL', 'HFD+SANA'

('ND', 'HFD', 'HFD+SAL', 'HFD+SANA')

Here is Figure 2C:

![file](../img/pub_fig_2c.png)

This is described as involving a one-way ANOVA followed by Bonferroni post-hoc. Using the [source data](https://www.nature.com/articles/s42255-025-01311-z#Sec38) for this figure, we arrive at F(3, 41) = 10.4, p = 3.20E-5. This implies that there was no significant difference between means. However, the p-values for the post-hoc test appear to be taken selectively from both Bonferroni post-hoc comparisons (i.e., independent t-tests between groups and Bonferroni FWER correction on p-values for each pairwise test) and from Tukey's post-hoc test.

| Comparison          | Published p-value   |   Bonferroni p-value |   Tukey p-value | Matches which? |
|:--------------------|:--------------------|---------------------:|----------------:|----:|
| HFD vs HFD+SAL      |           0.36          |          1.00           |     0.356    | Tukey |
| HFD vs HFD+SANA     |            0.0006         |          0.000655 |     2.73e-05 | Bonferroni |
| ND vs HFD           |               0.009      |          0.0308    |     0.00946  | Tukey |
| HFD+SAL vs HFD+SANA |            0.003         |          0.00381  |     0.00271  | Tukey |
| ND vs HFD+SAL       |            Not shown         |          0.359    |     0.295    | - |
| ND vs HFD+SANA      |            0.24         |          0.0500   |     0.239    | Tukey |

Could the authors clarify? 

The code for this analysis is available at [github.com/reeserich/cal_et_al_2025](https://github.com/reeserich/cal_et_al_2025).

In [37]:
for grp in collect:
    print(np.mean(grp))

4.238545454545454
5.4565
4.8613333333333335
3.5714166666666665
