In [1]:
import numpy as np
import pandas as pd
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [2]:
#%%capture
#load the dataset
df = pd.read_csv('data.csv')
pd.set_option('max_columns', None) # Display the DataFrame with all columns shown
print(list(df.columns))

# 1 Mild Cognitive Impairment
# 2 Dementia
# 3 Subjective Memory Complaint/Normal Cognition
# 4 Unknown - defer to record


# MCI_df = df[df['syndrome_v2_v2'] == 1]
# Dementia_df = df[df['syndrome_v2_v2'] == 2]
# CN_df = df[df['syndrome_v2_v2'] == 3]


FileNotFoundError: [Errno 2] No such file or directory: 'data.csv'

In [3]:
def perform_one_way_anova(df, group_col, test_col):
    '''
    perfrom one-way ANOVA on multiple tests in a dataframe.
    
    Parameters:
        - df (pd.DataFrame): Input dataframe
        - group_col (str) : Column name containing the group labels
        - test_col (str): Column name containing the test results
        
    Returns:
        - f_statistic (float): F-statistic from the ANOVA
        - p_value (float): p_value from the ANOVA
        - posthoc_results (pd.DataFrame): Post hot test results using Tukey's HSD test if p-value < 0.05
        
    '''
    df = df[df[group_col] != 4]  
  # replace missing values in the test column with NaN
    df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)
    
  # drop rows with missing values in the test column
    df = df.dropna(subset = [test_col])

  # extract the 'score' column
    scores = df[test_col]

  # extract the 'syndrome_v2_v2' column
    groups = df[group_col].unique()
  # perform one-way ANOVA
    f_statistic, p_value = f_oneway(*[scores[df[group_col] == group] for group in groups])
 
    
    # perform post hoc test using Tukey's HSD test if p-value < 0.05
    posthoc_results = None
    if p_value < 0.05:
        posthoc_results = pairwaise_tukeyhsd(df[test_col],df[group_col])
        
    return f_statistic, p_value, posthoc_results

In [4]:
# Animals_raw

# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'Animals_raw')

# print ANOVA results
print("ANOVA Results for Animals_raw")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)


NameError: name 'np' is not defined

In [38]:
# FAS_total_raw  
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'FAS_total_raw')

# print ANOVA results
print("ANOVA Results for FAS_total_raw")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for FAS_total_raw
F-statistic: 0.5629508223530136
p-value: 0.5700143626374399


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [39]:
# BNT_totalwstim_raw
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'FAS_total_raw')

# print ANOVA results
print("ANOVA Results for FAS_total_raw")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for FAS_total_raw
F-statistic: 0.5629508223530136
p-value: 0.5700143626374399


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [41]:
# Left Caudate
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'Left-Caudate')

# print ANOVA results
print("ANOVA Results for Left-Caduate")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Left-Caduate
F-statistic: 0.2549603246632527
p-value: 0.7750662476202127


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [42]:
# Right Caudate
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'Right-Caudate')

# print ANOVA results
print("ANOVA Results for Right-Caduate")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Left-Caduate
F-statistic: 0.38680883783412534
p-value: 0.6794609237646184


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [43]:
# Left Putamen
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'Left-Putamen')

# print ANOVA results
print("ANOVA Results for Left-Putamen")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Left-Putamen
F-statistic: 0.9713155038660651
p-value: 0.379427431455273


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [45]:
# Right Putamen
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'Right-Putamen')

# print ANOVA results
print("ANOVA Results for Right-Putamen")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Right-Putamen
F-statistic: 0.2763074309447219
p-value: 0.7587164906523742


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [49]:
# Right-Pallidum
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'Right-Pallidum')

# print ANOVA results
print("ANOVA Results for Right-Pallidum")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Right-Pallidum
F-statistic: 0.41647784613013683
p-value: 0.6596351977918318


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [51]:
# Left-Pallidum
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'Left-Pallidum')

# print ANOVA results
print("ANOVA Results for Left-Pallidum")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Left-Pallidum
F-statistic: 0.6207819649034203
p-value: 0.5380129207198259


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [52]:
# Right-Accumbens-area
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'Right-Accumbens-area')

# print ANOVA results
print("ANOVA Results for Right-Accumbens-area")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Right-Accumbens-area
F-statistic: 0.47975781090651315
p-value: 0.6192696364734006


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [53]:
# Left-Accumbens-area
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'Left-Accumbens-area')

# print ANOVA results
print("ANOVA Results for Left-Accumbens-area")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for Left-Accumbens-area
F-statistic: 1.0847539592356772
p-value: 0.338923229607166


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


In [54]:
#TOPF_raw
# perform an one-way ANOVA
f_statistic, p_value, posthoc_results = perform_one_way_anova(df,'syndrome_v2_v2', 'TOPF_raw')

# print ANOVA results
print("ANOVA Results for TOPF_raw")
print("F-statistic:", f_statistic)
print("p-value:", p_value)

# print post hoc test results if applicable
if posthoc_results is not None:
    print("Post hoc test results:")
    print(posthoc_results)

ANOVA Results for TOPF_raw
F-statistic: 0.32701575711758807
p-value: 0.7218642185083002


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)
