In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.multitest import multipletests
import statsmodels.stats.multicomp as multicomp

In [4]:
data_df = pd.read_excel('../data/42255_2025_1311_MOESM5_ESM.xlsx', sheet_name='2E')

In [6]:
data_df

Unnamed: 0,ND,HFD,HFD+SAL.,HFD+SANA
0,97.0,149.0,158.0,108.0
1,88.0,156.0,134.0,114.0
2,104.0,171.0,137.0,76.0
3,101.0,158.0,159.0,70.0
4,104.0,173.0,150.0,130.0
5,101.0,209.0,155.0,63.0
6,103.0,186.0,189.0,118.0
7,113.0,192.0,158.0,103.0
8,99.0,178.0,177.0,121.0
9,102.0,189.0,175.0,145.0


In [5]:
data_df.columns

Index(['ND', 'HFD', 'HFD+SAL.', 'HFD+SANA '], dtype='object')

In [7]:
grp1, grp2, grp3, grp4 = data_df['ND'].dropna().values, \
data_df['HFD'].dropna().values, \
data_df['HFD+SAL.'].dropna().values, \
data_df['HFD+SANA '].dropna().values

In [8]:
collect = [grp1, grp2, grp3, grp4]

In [9]:
def reorder_label(label):
    if label[-2:] == 'ND':
        return 'ND vs ' + label.split('vs')[0].strip()
    else:
        return label

In [10]:
for _ in [0]:

    print('N samples: ' + str(len(np.concatenate(collect))))
    print('ANOVA:')
    f = scipy.stats.f_oneway(grp1, grp2, grp3, grp4)
    print('F(3, ' + str(len(np.concatenate(collect))-4) + ') = ' + str(f.statistic) + ', p = ' + str(f.pvalue))

    #print('Bonferroni:')
    comp = multicomp.MultiComparison(data=np.concatenate(collect), 
                                      groups=np.concatenate([['ND']*len(grp1), 
                                                             ['HFD']*len(grp2), 
                                                             ['HFD+SAL']*len(grp3), 
                                                             ['HFD+SANA']*len(grp4)]))
    tbl, a1, a2 = comp.allpairtest(scipy.stats.ttest_ind, method= "bonf", alpha=0.05)
    bonf = pd.DataFrame(tbl)
    bonf.columns = bonf.loc[0].astype(str)
    bonf = bonf[1:]
    bonf['pval_corr'] = a1[2]
    bonf['g1'] = bonf['group1']
    bonf['g2'] = bonf['group2']
    bonf_df = bonf.copy()
    bonf_df['Bonferroni p-value'] = bonf_df['pval_corr']
    bonf_df['Comparison'] = bonf_df['g1'].astype(str) + ' vs ' + bonf_df['g2'].astype(str)
    bonf_df['Comparison'] = bonf_df['Comparison'].apply(reorder_label)

    #print('Tukey:')
    tukey = scipy.stats.tukey_hsd(*collect).pvalue
    tukey_df = pd.DataFrame({'g1':['ND', 'ND', 'ND', 'HFD', 'HFD', 'HFD+SAL'], 
     'g2':['HFD', 'HFD+SAL', 'HFD+SANA', 'HFD+SAL', 'HFD+SANA', 'HFD+SANA'],
     'Tukey p-value':[tukey[0][1], tukey[0][2], tukey[0][3], tukey[1][2], tukey[1][3], tukey[2][3]]})
    tukey_df['Comparison'] = tukey_df['g1'].astype(str) + ' vs ' + tukey_df['g2'].astype(str)

    combo_df = pd.merge(bonf_df, tukey_df, on=['Comparison'], how='outer')
    combo_df['Published p-value'] = ''
    combo_df = combo_df[['Comparison', 'Published p-value', 'Bonferroni p-value', 'Tukey p-value']].set_index('Comparison')
    print(combo_df.to_markdown())

N samples: 40
ANOVA:
F(3, 36) = 40.833292720312514, p = 1.120145238250933e-11
| Comparison          | Published p-value   |   Bonferroni p-value |   Tukey p-value |
|:--------------------|:--------------------|---------------------:|----------------:|
| HFD vs HFD+SAL      |                     |          0.296699    |     0.203163    |
| HFD vs HFD+SANA     |                     |          1.18411e-05 |     2.47246e-09 |
| ND vs HFD           |                     |          2.72389e-09 |     7.34025e-10 |
| HFD+SAL vs HFD+SANA |                     |          0.000255797 |     9.60083e-07 |
| ND vs HFD+SAL       |                     |          5.57464e-08 |     2.61851e-07 |
| ND vs HFD+SANA      |                     |          1           |     0.97326     |
