In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.multitest import multipletests
import statsmodels.stats.multicomp as multicomp

In [5]:
data_df = pd.read_excel('../data/42255_2025_1311_MOESM4_ESM.xlsx', sheet_name='1L')

In [6]:
grp1, grp2, grp3 = data_df['HFD'].dropna().values, data_df['HFD+SAL'].dropna().values, data_df['HFD+SANA'].dropna().values

In [7]:
collect = [grp1, grp2, grp3]

In [8]:
len(np.concatenate([grp1, grp2, grp3]))

24

In [9]:
scipy.stats.f_oneway(grp1, grp2, grp3)

F_onewayResult(statistic=3.225288230628737, pvalue=0.060049883564849756)

In [10]:
comp = multicomp.MultiComparison(data=np.concatenate([grp1, grp2, grp3]), 
                                  groups=np.concatenate([['HFD']*len(grp1), 
                                                         ['HFD+SAL']*len(grp2), 
                                                         ['HFD+SANA']*len(grp3)]))
tbl, a1, a2 = comp.allpairtest(scipy.stats.ttest_ind, method= "bonf", alpha=0.05)
tbl

group1,group2,stat,pval,pval_corr,reject
HFD,HFD+SAL,0.0446,0.9651,1.0,False
HFD,HFD+SANA,-1.906,0.0774,0.2322,False
HFD+SAL,HFD+SANA,-2.3688,0.0328,0.0983,False


In [11]:
a1

(array([[ 0.04455278,  0.96509302],
        [-1.90595357,  0.0773972 ],
        [-2.36879031,  0.03276748]]),
 array([False, False, False]),
 array([1.        , 0.23219159, 0.09830243]),
 0.016952427508441503,
 0.016666666666666666)

In [12]:
scipy.stats.tukey_hsd(*[grp1, grp2, grp3]).pvalue

array([[1.        , 0.99910857, 0.09826946],
       [0.99910857, 1.        , 0.09100813],
       [0.09826946, 0.09100813, 1.        ]])

Here is Figure 1L:

![file](../img/pub_fig_1l.png)

This is described as involving a one-way ANOVA followed by Bonferroni post-hoc. Using the [source data](https://www.nature.com/articles/s42255-025-01311-z#Sec38) for this figure, we arrive at F(2, 21) = 3.23, p = 0.0600. This implies that there was no significant difference between means. Moreover, the p-values for the post-hoc test appear to be incorrect both for Bonferroni post-hoc comparison (i.e., independent t-tests between groups and Bonferroni FWER correction on p-values for each pairwise test) and for Tukey's post-hoc test. **If performed correctly using either of these procedures, none of these comparisons show a significant difference at p < 0.05**. Thus, the authors statement that "Protection against DIO by SANA...[was] "parallelled by an increase in the percentage of lean mass in the SANA-treated mice" is not supported.

| Comparison | Published p-value | Bonferroni p-value | Tukey p-value |
| -- | --- | --- | --- |
| HFD vs HFD+SANA | 0.04 | 0.232 |0.0983|
| HFD vs HFD+SAL | 0.998 | 1.00 |0.999|
|  HFD+SAL vs HFD+SANA | 0.02 |0.0983 |0.0910|

Could the authors clarify? 

The code for this analysis is available at [github.com/reeserich/cal_et_al_2025](https://github.com/reeserich/cal_et_al_2025).