# Analyze phenotypic data
### Start by importing packages and loading the dataframe from the previous step

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pingouin
import seaborn as sns

df = pd.read_pickle('df.pkl')

### Check for group differences (Table 1)

In [None]:
# PRINT COHORT-WIDE STATISTICS
print('AVERAGE AGE:',np.mean(df['AGE']),'(',np.std(df['AGE']),')')
print('AVERAGE HAND:',np.mean(df['HAND']),'(',np.std(df['HAND']),')')
print('AVERAGE gFA:',np.mean(df['gFA']),'(',np.std(df['gFA']),')')
print('AVERAGE TBV:',np.mean(df['ICV']),'(',np.std(df['ICV']),') \n')

# Get the mean, sum, and std of stats broken down by groups
mean_stats_ht_lt = df.groupby('TR_RD').mean()
sum_stats_ht_lt = df.groupby('TR_RD').sum()
sd_stats_ht_lt = df.groupby('TR_RD').std()

# Get indices of high and low TOWRE participants
inds_low_t = (df['TR_RD']==1)
inds_high_t = (df['TR_RD']==0)

# Get number of male and females in the low and high TOWRE groups
inds_rd = (df['TR_RD']==1)
inds_tr = (df['TR_RD']==0)
m_rd = sum(inds_rd) - sum(df[inds_rd]['SEX'])
f_rd = sum(inds_rd) - m_rd
m_tr = sum(inds_tr) - sum(df[inds_tr]['SEX'])
f_tr = sum(inds_tr) - m_tr
print(m_rd, f_rd, m_tr, f_tr)

# DIFFERENCES IN SEX (Chi-Square)
#print('SEX: M HIGH T:',m_high_t,', F HIGH T:',f_high_t,', M LOW T',m_low_t,', F LOW T',f_low_t,'\n')

# DIFFERENCES IN AGE (Welch's t-test)
print('COMPARE AGE: HIGH T MEAN (SD):',mean_stats_ht_lt['AGE'][0],'(',sd_stats_ht_lt['AGE'][0],'), LOW T MEAN (SD):', 
      mean_stats_ht_lt['AGE'][1],'(',sd_stats_ht_lt['AGE'][1],')\n')
print(str(pingouin.ttest(df['AGE'][inds_high_t], df['AGE'][inds_low_t]))+'\n')

# DIFFERENCES IN HANDEDNESS (Welch's t-test)
print('COMPARE HAND: HIGH T MEAN (SD):',mean_stats_ht_lt['HAND'][0],'(',sd_stats_ht_lt['HAND'][0],'), LOW T MEAN (SD):', 
      mean_stats_ht_lt['HAND'][1],'(',sd_stats_ht_lt['HAND'][1],')\n')
print(str(pingouin.ttest(df['HAND'][inds_high_t], df['HAND'][inds_low_t]))+'\n')

# DIFFERENCES IN gFA (Welch's t-test)
print('COMPARE gFA: HIGH T MEAN (SD):',mean_stats_ht_lt['gFA'][0],'(',sd_stats_ht_lt['gFA'][0],'), LOW T MEAN (SD):', 
      mean_stats_ht_lt['gFA'][1],'(',sd_stats_ht_lt['gFA'][1],')\n')
print(str(pingouin.ttest(df['gFA'][inds_high_t], df['gFA'][inds_low_t]))+'\n')

# DIFFERENCES IN ICV (Welch's t-test)
print('COMPARE ICV: HIGH T MEAN (SD):',mean_stats_ht_lt['ICV'][0],'(',sd_stats_ht_lt['ICV'][0],'), LOW T MEAN (SD):', 
      mean_stats_ht_lt['ICV'][1],'(',sd_stats_ht_lt['ICV'][1],')\n')
print(str(pingouin.ttest(df['ICV'][inds_high_t], df['ICV'][inds_low_t])))

# DIFFERENCES IN TBV (Welch's t-test)
print('COMPARE TBV: HIGH T MEAN (SD):',mean_stats_ht_lt['TBV'][0],'(',sd_stats_ht_lt['TBV'][0],'), LOW T MEAN (SD):', 
      mean_stats_ht_lt['TBV'][1],'(',sd_stats_ht_lt['TBV'][1],')\n')
print(str(pingouin.ttest(df['TBV'][inds_high_t], df['TBV'][inds_low_t])))

### Plot TOWRE Distribution (Figure 1)

In [None]:
font = {'family' : 'Arial',
        'weight' : 'bold',
        'size'   : 22}
sns.set(font_scale=1.3)
sns.set_style("white")
plt.rc('font', **font)
t_plot = sns.displot(df, x="TOWRE", hue="RD_TR",bins=20, palette=['teal','red'],multiple="stack")
t_plot._legend.remove()
plt.vlines(85,0,90,'k',linewidth=5,linestyle=':')
plt.ylim([0,85])
plt.xlabel('TOWRE Total Score, Standardized',fontsize=20,fontweight='bold')
plt.ylabel('Number of Participants',fontsize=20,fontweight='bold')
plt.savefig('TOWRE.pdf') # use Adobe Illustrator to convert to EPS in order to keep transparent background
plt.show()

## Here is where you can see how different properties covary with each other (Figure 2)

In [None]:
sns.set(font_scale = 2)
sns.set_style("white") # Format background style
# Get dataframe with just covariates
df_covars = df[['AGE','SEX','HAND','gFA','ICV','TBV','TOWRE']]

# Calculate correlations across all pairwise columns
corr = df_covars.corr(method='spearman')

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax = .45, vmin = -.45, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5}, annot=True, annot_kws={"fontsize":15})
#plt.savefig('covars_corr.eps',format='eps')
plt.show()
pingouin.rcorr(df_covars,method='spearman',padjust='fdr_bh')