In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.multitest import multipletests
import statsmodels.stats.multicomp as multicomp

In [2]:
data_df = pd.read_excel('../data/42255_2025_1311_MOESM5_ESM.xlsx', sheet_name='2D')

In [4]:
data_df = data_df.set_index('Hepatic Transaminases (U/L)')

In [5]:
data_df.columns

Index(['ND', 'ND.1', 'ND.2', 'ND.3', 'ND.4', 'HFD', 'HFD.1', 'HFD.2', 'HFD.3',
       'HFD.4', 'HFD.5', 'HFD.6', 'HFD.7', 'HFD.8', 'HFD+SAL', 'HFD+SAL.1',
       'HFD+SAL.2', 'HFD+SAL.3', 'HFD+SAL.4', 'HFD+SAL.5', 'HFD+SAL.6',
       'HFD+SAL.7', 'HFD+SAL.8', 'HFD+SANA', 'HFD+SANA.1', 'HFD+SANA.2',
       'HFD+SANA.3', 'HFD+SANA.4', 'HFD+SANA.5', 'HFD+SANA.6'],
      dtype='object')

In [6]:
nd = ['ND', 'ND.1', 'ND.2', 'ND.3', 'ND.4']
hfd = ['HFD', 'HFD.1', 'HFD.2', 'HFD.3',
       'HFD.4', 'HFD.5', 'HFD.6', 'HFD.7', 'HFD.8']
hfd_sal = ['HFD+SAL', 'HFD+SAL.1',
       'HFD+SAL.2', 'HFD+SAL.3', 'HFD+SAL.4', 'HFD+SAL.5', 'HFD+SAL.6',
       'HFD+SAL.7', 'HFD+SAL.8']
hfd_sana = ['HFD+SANA', 'HFD+SANA.1', 'HFD+SANA.2',
       'HFD+SANA.3', 'HFD+SANA.4', 'HFD+SANA.5', 'HFD+SANA.6']

In [8]:
def reorder_label(label):
    if label[-2:] == 'ND':
        return 'ND vs ' + label.split('vs')[0].strip()
    else:
        return label

In [11]:
for cat in data_df.index:
    
    print(cat)
    grp1, grp2, grp3, grp4 = data_df[nd].loc[cat].dropna().values, \
                        data_df[hfd].loc[cat].dropna().values, \
                        data_df[hfd_sal].loc[cat].dropna().values, \
                        data_df[hfd_sana].loc[cat].dropna().values

    collect = [grp1, grp2, grp3, grp4]
    print('N samples: ' + str(len(np.concatenate(collect))))

    print('ANOVA:')
    f = scipy.stats.f_oneway(grp1, grp2, grp3, grp4)
    print('F(3, ' + str(len(np.concatenate(collect))-4) + ') = ' + str(f.statistic) + ', p = ' + str(f.pvalue))

    #print('Bonferroni:')
    comp = multicomp.MultiComparison(data=np.concatenate(collect), 
                                      groups=np.concatenate([['ND']*len(grp1), 
                                                             ['HFD']*len(grp2), 
                                                             ['HFD+SAL']*len(grp3), 
                                                             ['HFD+SANA']*len(grp4)]))
    tbl, a1, a2 = comp.allpairtest(scipy.stats.ttest_ind, method= "bonf", alpha=0.05)
    bonf = pd.DataFrame(tbl)
    bonf.columns = bonf.loc[0].astype(str)
    bonf = bonf[1:]
    bonf['pval_corr'] = a1[2]
    bonf['g1'] = bonf['group1']
    bonf['g2'] = bonf['group2']
    bonf_df = bonf.copy()
    bonf_df['Bonferroni p-value'] = bonf_df['pval_corr']
    bonf_df['Comparison'] = bonf_df['g1'].astype(str) + ' vs ' + bonf_df['g2'].astype(str)
    bonf_df['Comparison'] = bonf_df['Comparison'].apply(reorder_label)

    #print('Tukey:')
    tukey = scipy.stats.tukey_hsd(*collect).pvalue
    tukey_df = pd.DataFrame({'g1':['ND', 'ND', 'ND', 'HFD', 'HFD', 'HFD+SAL'], 
     'g2':['HFD', 'HFD+SAL', 'HFD+SANA', 'HFD+SAL', 'HFD+SANA', 'HFD+SANA'],
     'Tukey p-value':[tukey[0][1], tukey[0][2], tukey[0][3], tukey[1][2], tukey[1][3], tukey[2][3]]})
    tukey_df['Comparison'] = tukey_df['g1'].astype(str) + ' vs ' + tukey_df['g2'].astype(str)

    combo_df = pd.merge(bonf_df, tukey_df, on=['Comparison'], how='outer')
    combo_df['Published p-value'] = ''
    combo_df = combo_df[['Comparison', 'Published p-value', 'Bonferroni p-value', 'Tukey p-value']].set_index('Comparison')
    print(combo_df.to_markdown())
    
    for grp in collect:
        print(np.mean(grp))

ALT
N samples: 30
ANOVA:
F(3, 26) = 36.41630455831537, p = 1.857143960468119e-09
| Comparison          | Published p-value   |   Bonferroni p-value |   Tukey p-value |
|:--------------------|:--------------------|---------------------:|----------------:|
| HFD vs HFD+SAL      |                     |          1           |     0.662792    |
| HFD vs HFD+SANA     |                     |          6.06159e-06 |     6.24178e-08 |
| ND vs HFD           |                     |          8.27776e-05 |     3.93916e-07 |
| HFD+SAL vs HFD+SANA |                     |          1.64104e-05 |     8.05942e-07 |
| ND vs HFD+SAL       |                     |          0.000185198 |     4.32789e-06 |
| ND vs HFD+SANA      |                     |          1           |     0.999999    |
52.0
581.5555555555555
512.3333333333334
52.857142857142854
AST
N samples: 30
ANOVA:
F(3, 26) = 25.636157776915564, p = 6.256739692291728e-08
| Comparison          | Published p-value   |   Bonferroni p-value |   Tukey p-va

Here is Figure 2D:

![file](../img/pub_fig_2d.png)

This is described as involving a one-way ANOVA followed by Bonferroni post-hoc. I'll assume that a one-way ANOVA was performed for each group within ALT and separately for AST. Using the [source data](https://www.nature.com/articles/s42255-025-01311-z#Sec38) for this figure, most of these p-values are incorrect both for Bonferroni post-hoc comparison (i.e., independent t-tests between groups and Bonferroni FWER correction on p-values for each pairwise test) and for Tukey's post-hoc test. See summary stats below.

### ALT
N samples: 30
ANOVA:
F(3, 26) = 36.4, p = 1.86E-9

| Comparison          | Published p-value   |   Bonferroni p-value |   Tukey p-value |
|:--------------------|:--------------------|---------------------:|----------------:|
| HFD vs HFD+SAL      |              0.64       |          1.00           |     0.663    |
| HFD vs HFD+SANA     |             Not shown        |          6.06E-6 |     6.24E-8 |
| ND vs HFD           |             0.00007 (7E-5)        |          8.28E-5 |     3.94E-7 |
| HFD+SAL vs HFD+SANA |                0.00008 (8E-5)     |          1.64E-5 |     8.06E-7 |
| ND vs HFD+SAL       |            Not shown         |          0.000185 |     4.33E-6 |
| ND vs HFD+SANA      |              Not shown       |          1.00           |     1.00    |

### AST
N samples: 30
ANOVA:
F(3, 26) = 25.6, p = 6.26E-8

| Comparison          | Published p-value   |   Bonferroni p-value |   Tukey p-value |
|:--------------------|:--------------------|---------------------:|----------------:|
| HFD vs HFD+SAL      |           0.91          |          1.00           |     0.911    |
| HFD vs HFD+SANA     |            Not shown         |          6.88E-5 |     9.09E-7 |
| ND vs HFD           |          0.00009           |          0.00118  |     3.18E-5 |
| HFD+SAL vs HFD+SANA |               0.00007      |          3.94E-5 |     4.23E-6 |
| ND vs HFD+SAL       |              Not shown       |          0.000670 |     0.000136 |
| ND vs HFD+SANA      |             Not shown        |          1           |     0.900    |

Could the authors clarify? 

The code for this analysis is available at [github.com/reeserich/cal_et_al_2025](https://github.com/reeserich/cal_et_al_2025).