In [1]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
from scipy.stats import zscore
from scipy.stats import f_oneway
from statsmodels.stats.multitest import multipletests

In [2]:
%%capture
df = pd.read_csv('data.csv')
pd.set_option('max_columns', None) # Display the DataFrame with all columns shown
print(list(df.columns))

# MCI_df = df[df['syndrome_v2_v2'] == 1]
# Dementia_df = df[df['syndrome_v2_v2'] == 2] eti_v2_v2
# CN_df = df[df['syndrome_v2_v2'] == 3]

In [3]:
# function to compare correlation coefficients
def compareCC(df, group_col, region_col, test_col):
    '''
    Objectives:
    to compare correaltion coefficients
    
    Parameters:
        - df (pd.DataFrame): Input dataframe
        - group_col (str) : Column name containing the group labels
        - test_col (str): Column name containing the test results
        - region_col (str): Column name containing the volumes of a region
        
    Returns:
        - 
        
    '''
  # Disregard the group not of focus
    df = df[df[group_col] != 4]

  # replace missing values in the test column with NaN
    df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)
    
  # drop rows with missing values in the test column
    df = df.dropna(subset = [test_col])

  # extract the 'score' column
    scores = df[test_col]


  # extract the 'syndrome_v2_v2' column
    groups = df[group_col].unique()
    
    
    correlation_coeffs = {}
    for group in groups:
        #print(group)
        group_data = df[df[group_col] == group]
        correlation_coeff, _ = pearsonr(group_data[region_col],group_data[test_col])
        print("region",len(group_data[region_col]),"test", len(group_data[test_col]))
        correlation_coeffs[group] = correlation_coeff
        # perform Fisher's z transformation
        z = 0.5 * np.log((1+correlation_coeff)/(1-correlation_coeff))
        print(group)
        print("correlation_coeff",correlation_coeff)
        print("z", z)

    # Convert correlation coefficients to a list or arrray
    correlation_coeffs_list = list(correlation_coeff.values())
    correlation_coeffs_z_list = list(z.values())
    print('hum', region_col,test_col,correaltion_coeffs_list,correlation_coeffs_z_list)
    
    # perform post hoc test using Bonferroni correction
    alpha = 0.05
    reject, p_values_corrected, _, _ = multipletests(correlation_coeffs_z_list, alpha = alpha, method = 'bonferroni')
    
 
    if reject[0]:
        print("---a significant---")
        
    else:
        print("no significant difference in correlation coefficient among the groups")


    

In [70]:
compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'TOPF_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'WTAR_raw')

compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'TOPF_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'WTAR_raw')


compareCC(df, 'syndrome_v2_v2', 'Left-Putamen', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Putamen', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Putamen', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Putamen', 'TOPF_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Putamen', 'WTAR_raw')

compareCC(df, 'syndrome_v2_v2', 'Right-Putamen', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Putamen', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Putamen', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Putamen', 'TOPF_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Putamen', 'WTAR_raw')



compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'TOPF_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Caudate', 'WTAR_raw')

compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'TOPF_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Caudate', 'WTAR_raw')


compareCC(df, 'syndrome_v2_v2', 'Left-Pallidum', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Pallidum', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Pallidum', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Pallidum', 'TOPF_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Pallidum', 'WTAR_raw')

compareCC(df, 'syndrome_v2_v2', 'Right-Pallidum', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Pallidum', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Pallidum', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Pallidum', 'TOPF_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Pallidum', 'WTAR_raw')


compareCC(df, 'syndrome_v2_v2', 'Left-Accumbens-area', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Accumbens-area', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Accumbens-area', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Accumbens-area', 'TOPF_raw')
compareCC(df, 'syndrome_v2_v2', 'Left-Accumbens-area', 'WTAR_raw')

compareCC(df, 'syndrome_v2_v2', 'Right-Accumbens-area', 'Animals_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Accumbens-area', 'FAS_total_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Accumbens-area', 'BNT_totalwstim_raw')
compareCC(df, 'syndrome_v2_v2', 'Right-Accumbens-area', 'TOPF_raw')
ompareCC(df, 'syndrome_v2_v2', 'Right-Accumbens-area', 'WTAR_raw')

region 60 test 60
3
correlation_coeff 0.12129967909177913
z 0.12189990578205483
region 170 test 170
1
correlation_coeff 0.12523576562038238
z 0.12589672924757112
region 140 test 140
2
correlation_coeff 0.1431475119707137
z 0.14413746725719528


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[test_col] = df[test_col].replace([np.nan, np.inf, -np.inf], np.nan)


AttributeError: 'numpy.float64' object has no attribute 'values'