In [11]:
import pandas as pd
import numpy as np
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [12]:
#%%capture
#load the dataset
df = pd.read_csv('4-1-add-dx-lh_aparc.csv')
pd.set_option('display.max_columns', None) # Display the DataFrame with all columns shown
print(list(df.columns))

# 1 Mild Cognitive Impairment
# 2 Dementia
# 3 Subjective Memory Complaint/Normal Cognition
# 4 Unknown - defer to record


# MCI_df = df[df['syndrome_v2_v2'] == 1]
# Dementia_df = df[df['syndrome_v2_v2'] == 2]
# CN_df = df[df['syndrome_v2_v2'] == 3]


['V1', 'Study_ID', 'patientlist_ethnicbackground_v2_v2', 'patientlist_ethnicgroup_v2_v2', 'age_at_onset_v2_v2', 'syndrome_v2_v2', 'syndromeseverityconfidence_v2_v2', 'Age', 'Edu', 'NPT_date', 'FAS_total_raw', 'FAS_total_T', 'Animals_raw', 'Animals_T', 'BNT_totalwstim_raw', 'BNT_totalwstim_T', 'bankssts', 'caudalanteriorcingulate', 'caudalmiddlefrontal', 'cuneus', 'entorhinal', 'fusiform', 'inferiorparietal', 'inferiortemporal', 'isthmuscingulate', 'lateraloccipital', 'lateralorbitofrontal', 'lingual', 'medialorbitofrontal', 'middletemporal', 'parahippocampal', 'paracentral', 'parsopercularis', 'parsorbitalis', 'parstriangularis', 'pericalcarine', 'postcentral', 'posteriorcingulate', 'precentral', 'precuneus', 'rostralanteriorcingulate', 'rostralmiddlefrontal', 'superiorfrontal', 'superiorparietal', 'superiortemporal', 'supramarginal', 'frontalpole', 'temporalpole', 'transversetemporal', 'insula', 'Handedness']


In [15]:
def perform_one_way_anova(df, group_col, test_col):
    '''
    perfrom one-way ANOVA on multiple tests in a dataframe.
    
    Parameters:
        - df (pd.DataFrame): Input dataframe
        - group_col (str) : Column name containing the group labels
        - test_col (str): Column name containing the test results
        
    Returns:
        - f_statistic (float): F-statistic from the ANOVA
        - p_value (float): p_value from the ANOVA
        - posthoc_results (pd.DataFrame): Post hot test results using Tukey's HSD test if p-value < 0.05
        
    '''
    df = df[df[group_col] != 4]  
  # replace missing values in the test column with NaN
    df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)
    
  # drop rows with missing values in the test column
    df = df.dropna(subset = [test_col])

  # extract the 'score' column
    scores = df[test_col]

  # extract the 'syndrome_v2_v2' column
    groups = df[group_col].unique()
  # perform one-way ANOVA
    f_statistic, p_value = f_oneway(*[scores[df[group_col] == group] for group in groups])
 
    
    # perform post hoc test using Tukey's HSD test if p-value < 0.05
    posthoc_results = None
    if p_value < 0.05:
        posthoc_results = pairwise_tukeyhsd(df[test_col],df[group_col])
        
    return f_statistic, p_value, posthoc_results

In [39]:
sig_05 = {}
sig_001 = {}
for i in df.columns[10:]:
    print(i)
    # perform an one-way ANOVA
    f_statistic, p_value, posthoc_results = perform_one_way_anova(df, 'syndrome_v2_v2', i)
    if 0.001 <= p_value < 0.05:
        sig_05[i] = p_value
    elif p_value < 0.001:
        sig_001[i] = p_value
        

    # print ANOVA results
    print("ANOVA Results for" , i)
    print("F-statistic :", f_statistic)
    print("p-value :", p_value)

    # print post hoc test results if applicable
    if posthoc_results is not None:
        print("Post hoc test results:")
        print(posthoc_results) # if the lower and upper contains 0, 
        #it suggests that there may not be a significant difference between the means of the compared groups/


FAS_total_raw
ANOVA Results for FAS_total_raw
F-statistic : 30.108894935405942
p-value : 9.1417578191093e-13
Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower    upper  reject
-----------------------------------------------------
     1      2  -7.9768    0.0 -11.5986 -4.3549   True
     1      3   7.4226 0.0006   2.7913  12.054   True
     2      3  15.3994    0.0  10.5362 20.2626   True
-----------------------------------------------------
FAS_total_T
ANOVA Results for FAS_total_T
F-statistic : 23.56398473348934
p-value : 2.6719777408228393e-10
Post hoc test results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj  lower   upper  reject
---------------------------------------------------
     1      2  -6.4206    0.0 -9.601 -3.2402   True
     1      3   5.3429 0.0057 1.3039  9.3819   True
     2      3  11.7635    0.0 7.5238 16.0032   True
------------------------------------------------

ANOVA Results for lateraloccipital
F-statistic : 20.21415649690674
p-value : 4.445915405989812e-09
Post hoc test results:
   Multiple Comparison of Means - Tukey HSD, FWER=0.05    
group1 group2  meandiff p-adj    lower      upper   reject
----------------------------------------------------------
     1      2 -583.0419 0.0057 -1023.9143 -142.1694   True
     1      3  990.1726 0.0001   425.5724 1554.7727   True
     2      3 1573.2144    0.0   989.3468 2157.0821   True
----------------------------------------------------------
lateralorbitofrontal
ANOVA Results for lateralorbitofrontal
F-statistic : 19.538052280950467
p-value : 8.210425953097296e-09
Post hoc test results:
   Multiple Comparison of Means - Tukey HSD, FWER=0.05   
group1 group2  meandiff p-adj    lower     upper   reject
---------------------------------------------------------
     1      2 -374.8178 0.0002 -591.5142 -158.1214   True
     1      3  359.0545  0.007   81.5438  636.5652   True
     2      3  733.8723    

ANOVA Results for precuneus
F-statistic : 20.290979317022867
p-value : 4.147094052000126e-09
Post hoc test results:
   Multiple Comparison of Means - Tukey HSD, FWER=0.05   
group1 group2  meandiff p-adj    lower     upper   reject
---------------------------------------------------------
     1      2 -395.8039 0.0032 -679.5571 -112.0508   True
     1      3   615.868 0.0002  252.4816  979.2545   True
     2      3  1011.672    0.0  635.8847 1387.4593   True
---------------------------------------------------------
rostralanteriorcingulate
ANOVA Results for rostralanteriorcingulate
F-statistic : 5.088116452636285
p-value : 0.006586703357277027
Post hoc test results:
  Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj    lower    upper   reject
-------------------------------------------------------
     1      2 -99.8257 0.1476 -225.1505  25.4991  False
     1      3 120.9983 0.1798   -39.498 281.4946  False
     2      3  220.824 0.0053   54.8506 386.



In [45]:
for key, value in sig_05.items():
    print(key, ":", value)

for key, value in sig_001.items():
    print(key, ":", value)

caudalanteriorcingulate : 0.010846226613697491
rostralanteriorcingulate : 0.006586703357277027
FAS_total_raw : 9.1417578191093e-13
FAS_total_T : 2.6719777408228393e-10
Animals_raw : 1.4102497458473314e-29
Animals_T : 5.112811463780468e-23
BNT_totalwstim_raw : 7.101818191194911e-10
BNT_totalwstim_T : 9.982408059642277e-09
bankssts : 1.2887927481580009e-11
caudalmiddlefrontal : 1.4251378572482885e-08
cuneus : 0.0002683616141001314
entorhinal : 2.0488484570062532e-09
fusiform : 5.226215359890584e-15
inferiorparietal : 8.325649629609764e-13
inferiortemporal : 2.42928852512253e-10
isthmuscingulate : 2.094335195721117e-06
lateraloccipital : 4.445915405989812e-09
lateralorbitofrontal : 8.210425953097296e-09
lingual : 1.194604303147026e-07
medialorbitofrontal : 8.272521263412245e-08
middletemporal : 2.8479452901285614e-19
parahippocampal : 2.2268493339283926e-11
paracentral : 0.00021110619059520782
parsopercularis : 4.055828431733781e-11
parsorbitalis : 5.099795933217719e-05
parstriangularis :

In [16]:
# Animals_raw

# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df, 'syndrome_v2_v2', 'Animals_raw')

# print ANOVA results
print("ANOVA Results for Animals_raw")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results) # if the lower and upper contains 0, 
    #it suggests that there may not be a significant difference between the means of the compared groups/


ANOVA Results for Animals_raw
F-statistic: 81.38074613037196
p-value: 1.4102497458473314e-29
Post hoc test results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj  lower   upper  reject
---------------------------------------------------
     1      2  -5.1893   0.0 -6.5883 -3.7904   True
     1      3   4.4737   0.0  2.7133   6.234   True
     2      3    9.663   0.0  7.8015 11.5246   True
---------------------------------------------------


In [17]:
# FAS_total_raw  
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'FAS_total_raw')

# print ANOVA results
print("ANOVA Results for FAS_total_raw")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results) 

ANOVA Results for FAS_total_raw
F-statistic: 30.108894935405942
p-value: 9.1417578191093e-13
Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower    upper  reject
-----------------------------------------------------
     1      2  -7.9768    0.0 -11.5986 -4.3549   True
     1      3   7.4226 0.0006   2.7913  12.054   True
     2      3  15.3994    0.0  10.5362 20.2626   True
-----------------------------------------------------


In [18]:
# FAS_total_raw
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'FAS_total_raw')

# print ANOVA results
print("ANOVA Results for FAS_total_raw")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for FAS_total_raw
F-statistic: 30.108894935405942
p-value: 9.1417578191093e-13
Post hoc test results:
 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower    upper  reject
-----------------------------------------------------
     1      2  -7.9768    0.0 -11.5986 -4.3549   True
     1      3   7.4226 0.0006   2.7913  12.054   True
     2      3  15.3994    0.0  10.5362 20.2626   True
-----------------------------------------------------


In [19]:
# Left bankssts
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'bankssts')

# print ANOVA results
print("ANOVA Results for the left bankssts")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for bankssts
F-statistic: 26.762788614515816
p-value: 1.2887927481580009e-11
Post hoc test results:
  Multiple Comparison of Means - Tukey HSD, FWER=0.05   
group1 group2  meandiff p-adj    lower    upper   reject
--------------------------------------------------------
     1      2 -168.8009 0.0001 -264.5912 -73.0106   True
     1      3   220.103 0.0001   97.4299 342.7762   True
     2      3  388.9039    0.0  262.0444 515.7634   True
--------------------------------------------------------


In [20]:
# Left Caudalanteriorcingulate
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'caudalanteriorcingulate')

# print ANOVA results
print("ANOVA Results for the left caudalanteriorcingulate")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for the left caudalanteriorcingulate
F-statistic: 4.576960190112736
p-value: 0.010846226613697491
Post hoc test results:
  Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj    lower    upper   reject
-------------------------------------------------------
     1      2 -101.685 0.0537 -204.6285   1.2585  False
     1      3  56.7977 0.5687  -75.0361 188.6316  False
     2      3 158.4827 0.0179   22.1499 294.8155   True
-------------------------------------------------------


In [21]:
# Left Caudalmiddlefrontal
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'caudalmiddlefrontal')

# print ANOVA results
print("ANOVA Results for Left caudalmiddlefrontal")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Left caudalmiddlefrontal
F-statistic: 18.932068362972913
p-value: 1.4251378572482885e-08
Post hoc test results:
   Multiple Comparison of Means - Tukey HSD, FWER=0.05   
group1 group2  meandiff p-adj    lower     upper   reject
---------------------------------------------------------
     1      2 -285.8619 0.0111 -518.0305  -53.6933   True
     1      3   517.043 0.0002   219.718   814.368   True
     2      3  802.9049    0.0  495.4334 1110.3764   True
---------------------------------------------------------


In [22]:
# Left Cuneus
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'cuneus')

# print ANOVA results
print("ANOVA Results for Left cuneus")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Left cuneus
F-statistic: 8.399482982984619
p-value: 0.0002683616141001314
Post hoc test results:
  Multiple Comparison of Means - Tukey HSD, FWER=0.05   
group1 group2  meandiff p-adj    lower    upper   reject
--------------------------------------------------------
     1      2 -131.6128  0.048 -262.3106  -0.9149   True
     1      3  164.9278 0.0545   -2.4494 332.3051  False
     2      3  296.5406 0.0002  123.4515 469.6298   True
--------------------------------------------------------


In [23]:
# Left Entorhinal
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'entorhinal')

# print ANOVA results
print("ANOVA Results for the left entorhinal")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for the left entorhinal
F-statistic: 21.071084328741936
p-value: 2.0488484570062532e-09
Post hoc test results:
  Multiple Comparison of Means - Tukey HSD, FWER=0.05   
group1 group2  meandiff p-adj    lower    upper   reject
--------------------------------------------------------
     1      2 -140.8885 0.0115 -255.7377 -26.0392   True
     1      3  278.6417    0.0  131.5608 425.7225   True
     2      3  419.5302    0.0    267.43 571.6303   True
--------------------------------------------------------


In [24]:
# Left-Pallidum
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'fusiform')

# print ANOVA results
print("ANOVA Results for the fusiform")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for the fusiform
F-statistic: 35.828642820226634
p-value: 5.226215359890584e-15
Post hoc test results:
  Multiple Comparison of Means - Tukey HSD, FWER=0.05   
group1 group2  meandiff p-adj   lower     upper   reject
--------------------------------------------------------
     1      2 -670.7954   0.0 -995.1125 -346.4783   True
     1      3  850.0427   0.0  434.7083  1265.377   True
     2      3 1520.8381   0.0 1091.3301 1950.3462   True
--------------------------------------------------------


In [25]:
# Left Inferiorparietal
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'inferiorparietal')

# print ANOVA results
print("ANOVA Results for Left Inferiorparietal")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Left Inferiorparietal
F-statistic: 29.901329193495993
p-value: 8.325649629609764e-13
Post hoc test results:
   Multiple Comparison of Means - Tukey HSD, FWER=0.05    
group1 group2  meandiff p-adj    lower      upper   reject
----------------------------------------------------------
     1      2 -863.7503    0.0 -1263.1227 -464.3779   True
     1      3  805.0304 0.0007    293.577 1316.4838   True
     2      3 1668.7808    0.0  1139.8736  2197.688   True
----------------------------------------------------------


In [26]:
# Left-Accumbens-area
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'inferiortemporal')

# print ANOVA results
print("ANOVA Results for inferiortemporal")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for inferiortemporal
F-statistic: 23.447348437815045
p-value: 2.42928852512253e-10
Post hoc test results:
   Multiple Comparison of Means - Tukey HSD, FWER=0.05   
group1 group2  meandiff p-adj    lower     upper   reject
---------------------------------------------------------
     1      2 -834.9056    0.0 -1258.701 -411.1102   True
     1      3  721.1173 0.0054  178.3868 1263.8478   True
     2      3 1556.0229    0.0  994.7712 2117.2746   True
---------------------------------------------------------


In [28]:
# Left isthmuscingulate
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'isthmuscingulate')

# print ANOVA results
print("ANOVA Results for isthmuscingulate")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for isthmuscingulate
F-statistic: 13.525853062397806
p-value: 2.094335195721117e-06
Post hoc test results:
  Multiple Comparison of Means - Tukey HSD, FWER=0.05   
group1 group2  meandiff p-adj    lower    upper   reject
--------------------------------------------------------
     1      2 -144.0112 0.0064 -254.2145 -33.8079   True
     1      3  172.3022 0.0119   31.1711 313.4334   True
     2      3  316.3134    0.0  170.3661 462.2608   True
--------------------------------------------------------
