In [None]:
## Normative Modeling of ABCD Data (Thickness_SubcorticalVolume) using Rutherford Normed Data ##

# The purpose of this script is to essentially concatenate all ABCD data with relevant information, and then apply the pretrained 
# models from Rutherford (2022, 2023 at eLife) to derive normative estimates for each participant. All of the datasets were used in the
# training dataset, so while the "adaptation" code is included, it is irrelevant here.
# This is the version of the code used to derive data for a manuscript submission we had. The only "cleaning"
# Is to remove specific pathways that are specific to our lab. This is removed with a generic "[Path]." Some other
# commented out sections are included as there was troubleshooting but may need to be uncommented.

In [None]:
# Set working and braincharts directory location
import os
WorkingDir = '/[Path]/BLR_Rutherford'
os.chdir(WorkingDir)


In [None]:
#Clone The GitHub Directory if we haven't already
#! git clone https://github.com/predictive-clinical-neuroscience/braincharts.git

In [None]:
# Activate the correct environment and import the toolkit
BrainChartsDir = os.path.join(WorkingDir,'braincharts')
os.chdir(BrainChartsDir)
os.chdir('scripts')
import numpy as np
import pandas as pd
import pickle
from matplotlib import pyplot as plt
import seaborn as sns
import pcntoolkit as pcn
from pcntoolkit.normative import estimate, predict, evaluate
from pcntoolkit.util.utils import compute_MSLL, create_design_matrix
from nm_utils import remove_bad_subjects, load_2d

In [None]:
# Move to the data location
os.chdir('/[Path]/')

In [None]:
# Load ABCD Aparc and Aseg FilesSub Names with NDAR SubjectKey
os.chdir('/[Path]/ABCD_Data')
ABCD_Dem=pd.read_csv("pdem02.txt", sep="\t", skiprows=[1])
ABCD_LongDem=pd.read_csv("abcd_lpds01.txt", sep="\t", skiprows=[1])
ABCD_Aparc=pd.read_csv("abcd_mrisdp10201.txt", sep="\t", skiprows=[1])
ABCD_Aseg = pd.read_csv("abcd_smrip10201.txt", sep="\t", skiprows=[1])
ABCD_Site = pd.read_csv("abcd_lt01.txt", sep="\t", skiprows=[1])
ABCD_Res = pd.read_csv("abcd_rhds01.txt", sep="\t", skiprows=[1])


#HCP_A_LH_Aparc=pd.read_csv("HCP_Aging_Destrieux_Thickness_LH.txt", sep="\t")
#HCP_A_LH_Aparc['src_subject_id']=HCP_A_LH_Aparc['lh.aparc.a2009s.thickness'].str.split('_').str[0]
#HCP_A_MRNames = HCP_A_LH_Aparc.pop('src_subject_id')
#HCP_A_LH_Aparc.insert(0, 'src_subject_id', HCP_A_MRNames)
#HCP_A_RH_Aparc=pd.read_csv("HCP_Aging_Destrieux_Thickness_RH.txt", sep="\t")
#HCP_A_RH_Aparc.insert(0, 'src_subject_id', HCP_A_MRNames)
#HCP_A_Aseg=pd.read_csv("HCP_Aging_AsegStats.txt", sep="\t")
#HCP_A_Aseg_Names=pd.read_csv("SubsFullFile.txt", sep="\t", header=None)
#HCP_A_Aseg_Names['src_subject_id']=HCP_A_Aseg_Names[0].str.split('_').str[0]
#HCP_A_ASeg_SubName=HCP_A_Aseg_Names.pop('src_subject_id')
#HCP_A_Aseg.insert(0, 'src_subject_id', HCP_A_ASeg_SubName)



In [None]:
# Get specific variables for concatenation
ABCD_ResVars = ABCD_Res[['subjectkey', 'eventname', 'reshist_addr1_adi_perc', 'reshist_addr1_no2', 'reshist_addr1_pm25', 'reshist_state_racism_factor', 'reshist_addr1_coi_r_coi_nat']].copy()
ABCD_SiteVars = ABCD_Site[['subjectkey', 'eventname', 'site_id_l']].copy()
ABCD_DemVars = ABCD_Dem[['subjectkey', 'eventname', 'interview_age', 'sex', 'demo_comb_income_v2', 'demo_prnt_ed_v2', 'demo_prtnr_ed_v2', 'demo_prnt_empl_v2', 'demo_prtnr_empl_v2', 'demo_prnt_marital_v2', 'demo_race_a_p___10', 'demo_race_a_p___11', 'demo_race_a_p___12', 'demo_race_a_p___13', 'demo_race_a_p___14', 'demo_race_a_p___15', 'demo_race_a_p___16', 'demo_race_a_p___17', 'demo_race_a_p___18', 'demo_race_a_p___19', 'demo_race_a_p___20', 'demo_race_a_p___21', 'demo_race_a_p___22', 'demo_race_a_p___23', 'demo_race_a_p___24', 'demo_race_a_p___25', 'demo_race_a_p___77', 'demo_race_a_p___99', ]].copy()
ABCD_LongDemVars = ABCD_LongDem[['subjectkey', 'eventname', 'interview_age', 'sex', 'demo_comb_income_v2_l', 'demo_prnt_ed_v2_2yr_l', 'demo_prtnr_ed_v2_2yr_l', 'demo_prnt_empl_v2_l', 'demo_prtnr_empl_v2_l', 'demo_prnt_marital_v2_l']].copy()


In [None]:
# Get Aparc and Aseg Names #
Aparc_Dict = {'mrisdp_1' : 'lh_G&S_frontomargin_thickness',
'mrisdp_2' : 'lh_G&S_occipital_inf_thickness',
'mrisdp_3' : 'lh_G&S_paracentral_thickness',
'mrisdp_4' : 'lh_G&S_subcentral_thickness',
'mrisdp_5' : 'lh_G&S_transv_frontopol_thickness',
'mrisdp_6' : 'lh_G&S_cingul-Ant_thickness',
'mrisdp_7' : 'lh_G&S_cingul-Mid-Ant_thickness',
'mrisdp_8' : 'lh_G&S_cingul-Mid-Post_thickness',
'mrisdp_9' : 'lh_G_cingul-Post-dorsal_thickness',
'mrisdp_10' : 'lh_G_cingul-Post-ventral_thickness',
'mrisdp_11' : 'lh_G_cuneus_thickness',
'mrisdp_12' : 'lh_G_front_inf-Opercular_thickness',
'mrisdp_13' : 'lh_G_front_inf-Orbital_thickness',
'mrisdp_14' : 'lh_G_front_inf-Triangul_thickness',
'mrisdp_15' : 'lh_G_front_middle_thickness',
'mrisdp_16' : 'lh_G_front_sup_thickness',
'mrisdp_17' : 'lh_G_Ins_lg&S_cent_ins_thickness',
'mrisdp_18' : 'lh_G_insular_short_thickness',
'mrisdp_19' : 'lh_G_occipital_middle_thickness',
'mrisdp_20' : 'lh_G_occipital_sup_thickness',
'mrisdp_21' : 'lh_G_oc-temp_lat-fusifor_thickness',
'mrisdp_22' : 'lh_G_oc-temp_med-Lingual_thickness',
'mrisdp_23' : 'lh_G_oc-temp_med-Parahip_thickness',
'mrisdp_24' : 'lh_G_orbital_thickness',
'mrisdp_25' : 'lh_G_pariet_inf-Angular_thickness',
'mrisdp_26' : 'lh_G_pariet_inf-Supramar_thickness',
'mrisdp_27' : 'lh_G_parietal_sup_thickness',
'mrisdp_28' : 'lh_G_postcentral_thickness',
'mrisdp_29' : 'lh_G_precentral_thickness',
'mrisdp_30' : 'lh_G_precuneus_thickness',
'mrisdp_31' : 'lh_G_rectus_thickness',
'mrisdp_32' : 'lh_G_subcallosal_thickness',
'mrisdp_33' : 'lh_G_temp_sup-G_T_transv_thickness',
'mrisdp_34' : 'lh_G_temp_sup-Lateral_thickness',
'mrisdp_35' : 'lh_G_temp_sup-Plan_polar_thickness',
'mrisdp_36' : 'lh_G_temp_sup-Plan_tempo_thickness',
'mrisdp_37' : 'lh_G_temporal_inf_thickness',
'mrisdp_38' : 'lh_G_temporal_middle_thickness',
'mrisdp_39' : 'lh_Lat_Fis-ant-Horizont_thickness',
'mrisdp_40' : 'lh_Lat_Fis-ant-Vertical_thickness',
'mrisdp_41' : 'lh_Lat_Fis-post_thickness',
'mrisdp_42' : 'lh_Pole_occipital_thickness',
'mrisdp_43' : 'lh_Pole_temporal_thickness',
'mrisdp_44' : 'lh_S_calcarine_thickness',
'mrisdp_45' : 'lh_S_central_thickness',
'mrisdp_46' : 'lh_S_cingul-Marginalis_thickness',
'mrisdp_47' : 'lh_S_circular_insula_ant_thickness',
'mrisdp_48' : 'lh_S_circular_insula_inf_thickness',
'mrisdp_49' : 'lh_S_circular_insula_sup_thickness',
'mrisdp_50' : 'lh_S_collat_transv_ant_thickness',
'mrisdp_51' : 'lh_S_collat_transv_post_thickness',
'mrisdp_52' : 'lh_S_front_inf_thickness',
'mrisdp_53' : 'lh_S_front_middle_thickness',
'mrisdp_54' : 'lh_S_front_sup_thickness',
'mrisdp_55' : 'lh_S_interm_prim-Jensen_thickness',
'mrisdp_56' : 'lh_S_intrapariet&P_trans_thickness',
'mrisdp_57' : 'lh_S_oc_middle&Lunatus_thickness',
'mrisdp_58' : 'lh_S_oc_sup&transversal_thickness',
'mrisdp_59' : 'lh_S_occipital_ant_thickness',
'mrisdp_60' : 'lh_S_oc-temp_lat_thickness',
'mrisdp_61' : 'lh_S_oc-temp_med&Lingual_thickness',
'mrisdp_62' : 'lh_S_orbital_lateral_thickness',
'mrisdp_63' : 'lh_S_orbital_med-olfact_thickness',
'mrisdp_64' : 'lh_S_orbital-H_Shaped_thickness',
'mrisdp_65' : 'lh_S_parieto_occipital_thickness',
'mrisdp_66' : 'lh_S_pericallosal_thickness',
'mrisdp_67' : 'lh_S_postcentral_thickness',
'mrisdp_68' : 'lh_S_precentral-inf-part_thickness',
'mrisdp_69' : 'lh_S_precentral-sup-part_thickness',
'mrisdp_70' : 'lh_S_suborbital_thickness',
'mrisdp_71' : 'lh_S_subparietal_thickness',
'mrisdp_72' : 'lh_S_temporal_inf_thickness',
'mrisdp_73' : 'lh_S_temporal_sup_thickness',
'mrisdp_74' : 'lh_S_temporal_transverse_thickness',
'mrisdp_75' : 'rh_G&S_frontomargin_thickness',
'mrisdp_76' : 'rh_G&S_occipital_inf_thickness',
'mrisdp_77' : 'rh_G&S_paracentral_thickness',
'mrisdp_78' : 'rh_G&S_subcentral_thickness',
'mrisdp_79' : 'rh_G&S_transv_frontopol_thickness',
'mrisdp_80' : 'rh_G&S_cingul-Ant_thickness',
'mrisdp_81' : 'rh_G&S_cingul-Mid-Ant_thickness',
'mrisdp_82' : 'rh_G&S_cingul-Mid-Post_thickness',
'mrisdp_83' : 'rh_G_cingul-Post-dorsal_thickness',
'mrisdp_84' : 'rh_G_cingul-Post-ventral_thickness',
'mrisdp_85' : 'rh_G_cuneus_thickness',
'mrisdp_86' : 'rh_G_front_inf-Opercular_thickness',
'mrisdp_87' : 'rh_G_front_inf-Orbital_thickness',
'mrisdp_88' : 'rh_G_front_inf-Triangul_thickness',
'mrisdp_89' : 'rh_G_front_middle_thickness',
'mrisdp_90' : 'rh_G_front_sup_thickness',
'mrisdp_91' : 'rh_G_Ins_lg&S_cent_ins_thickness',
'mrisdp_92' : 'rh_G_insular_short_thickness',
'mrisdp_93' : 'rh_G_occipital_middle_thickness',
'mrisdp_94' : 'rh_G_occipital_sup_thickness',
'mrisdp_95' : 'rh_G_oc-temp_lat-fusifor_thickness',
'mrisdp_96' : 'rh_G_oc-temp_med-Lingual_thickness',
'mrisdp_97' : 'rh_G_oc-temp_med-Parahip_thickness',
'mrisdp_98' : 'rh_G_orbital_thickness',
'mrisdp_99' : 'rh_G_pariet_inf-Angular_thickness',
'mrisdp_100' : 'rh_G_pariet_inf-Supramar_thickness',
'mrisdp_101' : 'rh_G_parietal_sup_thickness',
'mrisdp_102' : 'rh_G_postcentral_thickness',
'mrisdp_103' : 'rh_G_precentral_thickness',
'mrisdp_104' : 'rh_G_precuneus_thickness',
'mrisdp_105' : 'rh_G_rectus_thickness',
'mrisdp_106' : 'rh_G_subcallosal_thickness',
'mrisdp_107' : 'rh_G_temp_sup-G_T_transv_thickness',
'mrisdp_108' : 'rh_G_temp_sup-Lateral_thickness',
'mrisdp_109' : 'rh_G_temp_sup-Plan_polar_thickness',
'mrisdp_110' : 'rh_G_temp_sup-Plan_tempo_thickness',
'mrisdp_111' : 'rh_G_temporal_inf_thickness',
'mrisdp_112' : 'rh_G_temporal_middle_thickness',
'mrisdp_113' : 'rh_Lat_Fis-ant-Horizont_thickness',
'mrisdp_114' : 'rh_Lat_Fis-ant-Vertical_thickness',
'mrisdp_115' : 'rh_Lat_Fis-post_thickness',
'mrisdp_116' : 'rh_Pole_occipital_thickness',
'mrisdp_117' : 'rh_Pole_temporal_thickness',
'mrisdp_118' : 'rh_S_calcarine_thickness',
'mrisdp_119' : 'rh_S_central_thickness',
'mrisdp_120' : 'rh_S_cingul-Marginalis_thickness',
'mrisdp_121' : 'rh_S_circular_insula_ant_thickness',
'mrisdp_122' : 'rh_S_circular_insula_inf_thickness',
'mrisdp_123' : 'rh_S_circular_insula_sup_thickness',
'mrisdp_124' : 'rh_S_collat_transv_ant_thickness',
'mrisdp_125' : 'rh_S_collat_transv_post_thickness',
'mrisdp_126' : 'rh_S_front_inf_thickness',
'mrisdp_127' : 'rh_S_front_middle_thickness',
'mrisdp_128' : 'rh_S_front_sup_thickness',
'mrisdp_129' : 'rh_S_interm_prim-Jensen_thickness',
'mrisdp_130' : 'rh_S_intrapariet&P_trans_thickness',
'mrisdp_131' : 'rh_S_oc_middle&Lunatus_thickness',
'mrisdp_132' : 'rh_S_oc_sup&transversal_thickness',
'mrisdp_133' : 'rh_S_occipital_ant_thickness',
'mrisdp_134' : 'rh_S_oc-temp_lat_thickness',
'mrisdp_135' : 'rh_S_oc-temp_med&Lingual_thickness',
'mrisdp_136' : 'rh_S_orbital_lateral_thickness',
'mrisdp_137' : 'rh_S_orbital_med-olfact_thickness',
'mrisdp_138' : 'rh_S_orbital-H_Shaped_thickness',
'mrisdp_139' : 'rh_S_parieto_occipital_thickness',
'mrisdp_140' : 'rh_S_pericallosal_thickness',
'mrisdp_141' : 'rh_S_postcentral_thickness',
'mrisdp_142' : 'rh_S_precentral-inf-part_thickness',
'mrisdp_143' : 'rh_S_precentral-sup-part_thickness',
'mrisdp_144' : 'rh_S_suborbital_thickness',
'mrisdp_145' : 'rh_S_subparietal_thickness',
'mrisdp_146' : 'rh_S_temporal_inf_thickness',
'mrisdp_147' : 'rh_S_temporal_sup_thickness',
'mrisdp_148' : 'rh_S_temporal_transverse_thickness' }
ABCD_AparcRename = ABCD_Aparc.rename(columns=Aparc_Dict)

ABCD_Cortical = ABCD_AparcRename[['subjectkey', 'eventname', 'lh_G&S_frontomargin_thickness', 'lh_G&S_occipital_inf_thickness', 'lh_G&S_paracentral_thickness', 'lh_G&S_subcentral_thickness', 'lh_G&S_transv_frontopol_thickness', 'lh_G&S_cingul-Ant_thickness', 'lh_G&S_cingul-Mid-Ant_thickness', 'lh_G&S_cingul-Mid-Post_thickness', 'lh_G_cingul-Post-dorsal_thickness', 'lh_G_cingul-Post-ventral_thickness', 'lh_G_cuneus_thickness', 'lh_G_front_inf-Opercular_thickness', 'lh_G_front_inf-Orbital_thickness', 'lh_G_front_inf-Triangul_thickness', 'lh_G_front_middle_thickness', 'lh_G_front_sup_thickness', 'lh_G_Ins_lg&S_cent_ins_thickness', 'lh_G_insular_short_thickness', 'lh_G_occipital_middle_thickness', 'lh_G_occipital_sup_thickness', 'lh_G_oc-temp_lat-fusifor_thickness', 'lh_G_oc-temp_med-Lingual_thickness', 'lh_G_oc-temp_med-Parahip_thickness', 'lh_G_orbital_thickness', 'lh_G_pariet_inf-Angular_thickness', 'lh_G_pariet_inf-Supramar_thickness', 'lh_G_parietal_sup_thickness', 'lh_G_postcentral_thickness', 'lh_G_precentral_thickness', 'lh_G_precuneus_thickness', 'lh_G_rectus_thickness', 'lh_G_subcallosal_thickness', 'lh_G_temp_sup-G_T_transv_thickness', 'lh_G_temp_sup-Lateral_thickness', 'lh_G_temp_sup-Plan_polar_thickness', 'lh_G_temp_sup-Plan_tempo_thickness', 'lh_G_temporal_inf_thickness', 'lh_G_temporal_middle_thickness', 'lh_Lat_Fis-ant-Horizont_thickness', 'lh_Lat_Fis-ant-Vertical_thickness', 'lh_Lat_Fis-post_thickness', 'lh_Pole_occipital_thickness', 'lh_Pole_temporal_thickness', 'lh_S_calcarine_thickness', 'lh_S_central_thickness', 'lh_S_cingul-Marginalis_thickness', 'lh_S_circular_insula_ant_thickness', 'lh_S_circular_insula_inf_thickness', 'lh_S_circular_insula_sup_thickness', 'lh_S_collat_transv_ant_thickness', 'lh_S_collat_transv_post_thickness', 'lh_S_front_inf_thickness', 'lh_S_front_middle_thickness', 'lh_S_front_sup_thickness', 'lh_S_interm_prim-Jensen_thickness', 'lh_S_intrapariet&P_trans_thickness', 'lh_S_oc_middle&Lunatus_thickness', 'lh_S_oc_sup&transversal_thickness', 'lh_S_occipital_ant_thickness', 'lh_S_oc-temp_lat_thickness', 'lh_S_oc-temp_med&Lingual_thickness', 'lh_S_orbital_lateral_thickness', 'lh_S_orbital_med-olfact_thickness', 'lh_S_orbital-H_Shaped_thickness', 'lh_S_parieto_occipital_thickness', 'lh_S_pericallosal_thickness', 'lh_S_postcentral_thickness', 'lh_S_precentral-inf-part_thickness', 'lh_S_precentral-sup-part_thickness', 'lh_S_suborbital_thickness', 'lh_S_subparietal_thickness', 'lh_S_temporal_inf_thickness', 'lh_S_temporal_sup_thickness', 'lh_S_temporal_transverse_thickness', 'rh_G&S_frontomargin_thickness', 'rh_G&S_occipital_inf_thickness', 'rh_G&S_paracentral_thickness', 'rh_G&S_subcentral_thickness', 'rh_G&S_transv_frontopol_thickness', 'rh_G&S_cingul-Ant_thickness', 'rh_G&S_cingul-Mid-Ant_thickness', 'rh_G&S_cingul-Mid-Post_thickness', 'rh_G_cingul-Post-dorsal_thickness', 'rh_G_cingul-Post-ventral_thickness', 'rh_G_cuneus_thickness', 'rh_G_front_inf-Opercular_thickness', 'rh_G_front_inf-Orbital_thickness', 'rh_G_front_inf-Triangul_thickness', 'rh_G_front_middle_thickness', 'rh_G_front_sup_thickness', 'rh_G_Ins_lg&S_cent_ins_thickness', 'rh_G_insular_short_thickness', 'rh_G_occipital_middle_thickness', 'rh_G_occipital_sup_thickness', 'rh_G_oc-temp_lat-fusifor_thickness', 'rh_G_oc-temp_med-Lingual_thickness', 'rh_G_oc-temp_med-Parahip_thickness', 'rh_G_orbital_thickness', 'rh_G_pariet_inf-Angular_thickness', 'rh_G_pariet_inf-Supramar_thickness', 'rh_G_parietal_sup_thickness', 'rh_G_postcentral_thickness', 'rh_G_precentral_thickness', 'rh_G_precuneus_thickness', 'rh_G_rectus_thickness', 'rh_G_subcallosal_thickness', 'rh_G_temp_sup-G_T_transv_thickness', 'rh_G_temp_sup-Lateral_thickness', 'rh_G_temp_sup-Plan_polar_thickness', 'rh_G_temp_sup-Plan_tempo_thickness', 'rh_G_temporal_inf_thickness', 'rh_G_temporal_middle_thickness', 'rh_Lat_Fis-ant-Horizont_thickness', 'rh_Lat_Fis-ant-Vertical_thickness', 'rh_Lat_Fis-post_thickness', 'rh_Pole_occipital_thickness', 'rh_Pole_temporal_thickness', 'rh_S_calcarine_thickness', 'rh_S_central_thickness', 'rh_S_cingul-Marginalis_thickness', 'rh_S_circular_insula_ant_thickness', 'rh_S_circular_insula_inf_thickness', 'rh_S_circular_insula_sup_thickness', 'rh_S_collat_transv_ant_thickness', 'rh_S_collat_transv_post_thickness', 'rh_S_front_inf_thickness', 'rh_S_front_middle_thickness', 'rh_S_front_sup_thickness', 'rh_S_interm_prim-Jensen_thickness', 'rh_S_intrapariet&P_trans_thickness', 'rh_S_oc_middle&Lunatus_thickness', 'rh_S_oc_sup&transversal_thickness', 'rh_S_occipital_ant_thickness', 'rh_S_oc-temp_lat_thickness', 'rh_S_oc-temp_med&Lingual_thickness', 'rh_S_orbital_lateral_thickness', 'rh_S_orbital_med-olfact_thickness', 'rh_S_orbital-H_Shaped_thickness', 'rh_S_parieto_occipital_thickness', 'rh_S_pericallosal_thickness', 'rh_S_postcentral_thickness', 'rh_S_precentral-inf-part_thickness', 'rh_S_precentral-sup-part_thickness', 'rh_S_suborbital_thickness', 'rh_S_subparietal_thickness', 'rh_S_temporal_inf_thickness', 'rh_S_temporal_sup_thickness', 'rh_S_temporal_transverse_thickness']]


Aseg_Dict = {'smri_vol_scs_hpuslh' : 'Left-Hippocampus', 'smri_vol_scs_amygdalalh' : 'Left-Amygdala', 'smri_vol_scs_hpusrh' : 'Right-Hippocampus', 'smri_vol_scs_amygdalarh' : 'Right-Amygdala', 'smri_vol_scs_intracranialv' : 'eICV'}
ABCD_AsegRename = ABCD_Aseg.rename(columns=Aseg_Dict)
         
ABCD_Subcortical = ABCD_AsegRename[['subjectkey', 'eventname', 'Left-Hippocampus', 'Left-Amygdala', 'Right-Hippocampus',  'Right-Amygdala', 'eICV']]
             
             
             
             

In [None]:
#Find any duplicates and remove them
ABCD_Cortical.drop_duplicates()
ABCD_Subcortical.drop_duplicates()
ABCD_DemVars.drop_duplicates()
ABCD_ResVars.drop_duplicates()
ABCD_SiteVars.drop_duplicates()

In [None]:
# Separate Datasets by their follow-up points and rename
#ABCD_DemRes = ABCD_DemVars.merge(ABCD_ResVars, on=['subjectkey'], how='outer')
Base_ABCD_Dem = ABCD_DemVars[ABCD_DemVars['eventname'] == 'baseline_year_1_arm_1']
Base_ABCD_Long = ABCD_LongDemVars[ABCD_LongDemVars['eventname'] == 'baseline_year_1_arm_1']
Second_ABCD_Long = ABCD_LongDemVars[ABCD_LongDemVars['eventname'] == '2_year_follow_up_y_arm_1']

#Base_ABCD_Long = ABCD_LongDemVars[ABCD_LongDemVars['eventname'] == 'baseline_year_1_arm_1']
#BaseLongDict = {'subjectkey', 'eventname', 'interview_age', 'sex', 'demo_comb_income_v2_l', 'demo_prnt_ed_v2_2yr_l', 'demo_prtnr_ed_v2_2yr_l', 'demo_prnt_empl_v2_l', 'demo_prtnr_empl_v2_l', 'demo_prnt_marital_v2_l'}
#Base_ABCD_Long = Base_ABCD_Long.rename(columns=BaseLongDict)
#Second_ABCD_Long = ABCD_LongDemVars[ABCD_LongDemVars['eventname'] == '2_year_follow_up_y_arm_1']
#SecondLongDict = {'subjectkey', 'eventname', 'interview_age', 'sex', 'demo_comb_income_v2_l', 'demo_prnt_ed_v2_2yr_l', 'demo_prtnr_ed_v2_2yr_l', 'demo_prnt_empl_v2_l', 'demo_prtnr_empl_v2_l', 'demo_prnt_marital_v2_l''}
#Second_ABCD_Long = Second_ABCD_Long.rename(columns=SecondLongDict)

Base_ABCD_Res = ABCD_ResVars[ABCD_ResVars['eventname'] == 'baseline_year_1_arm_1']
BaseResDict = {'reshist_addr1_adi_perc' : 'Base_ADI', 'reshist_addr1_no2' : 'Base_N02', 'reshist_addr1_pm25' : 'Base_PM25', 'reshist_state_racism_factor' : 'Base_Racism',  'reshist_addr1_coi_r_coi_nat' : 'Base_COI'}
Base_ABCD_Res = Base_ABCD_Res.rename(columns=BaseResDict)
Second_ABCD_Res = ABCD_ResVars[ABCD_ResVars['eventname'] == '2_year_follow_up_y_arm_1']
SecondResDict = {'reshist_addr1_adi_perc' : '2nd_ADI', 'reshist_addr1_no2' : '2nd_N02', 'reshist_addr1_pm25' : '2nd_PM25', 'reshist_state_racism_factor' : '2nd_Racism',  'reshist_addr1_coi_r_coi_nat' : '2nd_COI'}
Second_ABCD_Res = Second_ABCD_Res.rename(columns=SecondResDict)

Base_ABCD_Site = ABCD_SiteVars[ABCD_SiteVars['eventname'] == 'baseline_year_1_arm_1']
Second_ABCD_Site = ABCD_SiteVars[ABCD_SiteVars['eventname'] == '2_year_follow_up_y_arm_1']
BaseSiteVar = {'site_id_l' : 'Base_Site'}
SecondSiteVar = {'site_id_l' : '2nd_Site'}
Base_ABCD_Site = Base_ABCD_Site.rename(columns=BaseSiteVar)
Second_ABCD_Site = Second_ABCD_Site.rename(columns=SecondSiteVar)

Base_ABCD_Aparc = ABCD_Cortical[ABCD_Cortical['eventname'] == 'baseline_year_1_arm_1']
Second_ABCD_Aparc = ABCD_Cortical[ABCD_Cortical['eventname'] == '2_year_follow_up_y_arm_1']
Second_ABCD_Aparc = Second_ABCD_Aparc.add_suffix('_2nd')
Second_ABCD_Aparc = Second_ABCD_Aparc.rename(columns={'subjectkey_2nd' : 'subjectkey', 'eventname_2nd' : 'eventname'})

Base_ABCD_Aseg = ABCD_Subcortical[ABCD_Subcortical['eventname'] == 'baseline_year_1_arm_1']
Second_ABCD_Aseg = ABCD_Subcortical[ABCD_Subcortical['eventname'] == '2_year_follow_up_y_arm_1']
Second_ABCD_Aseg = Second_ABCD_Aseg.add_suffix('_2nd')
Second_ABCD_Aseg = Second_ABCD_Aseg.rename(columns={'subjectkey_2nd' : 'subjectkey', 'eventname_2nd' : 'eventname'})

New_ABCD_Long = Base_ABCD_Long.merge(Second_ABCD_Long, on='subjectkey', how='left')
New_ABCD_Res = Base_ABCD_Res.merge(Second_ABCD_Res, on='subjectkey', how='left')
New_ABCD_Site = Base_ABCD_Site.merge(Second_ABCD_Site, on='subjectkey', how='left')
New_Cortical = Base_ABCD_Aparc.merge(Second_ABCD_Aparc, on='subjectkey', how='left')
New_Subcortical = Base_ABCD_Aseg.merge(Second_ABCD_Aseg, on='subjectkey', how='left')
Dem0 = New_ABCD_Long.merge(Base_ABCD_Dem, on='subjectkey', how='left')
Dem1 = Dem0.merge(New_ABCD_Res, on='subjectkey', how='left')
ABCD_SocioDem = Dem1.merge(New_ABCD_Site, on='subjectkey', how='left')
ABCD_PartFull = ABCD_SocioDem.merge(New_Cortical, on='subjectkey', how='left')
ABCD_Full = ABCD_PartFull.merge(New_Subcortical, on='subjectkey', how='left')

print(ABCD_Full)


In [None]:
#Prepare Datasets Specifically for BrainCharts
#NB: We can only use Year 2 info for this...so, we need to drop anything in the but the year 2 data

BLR_Model_Data_2ndWave = ABCD_Full[['subjectkey', 'interview_age_y', 'sex_y', 'demo_race_a_p___10', 'demo_race_a_p___11', 'demo_race_a_p___12', 'demo_race_a_p___13', 'demo_race_a_p___14', 'demo_race_a_p___15', 'demo_race_a_p___16', 'demo_race_a_p___17', 'demo_race_a_p___18', 'demo_race_a_p___19', 'demo_race_a_p___20', 'demo_race_a_p___21', 'demo_race_a_p___22', 'demo_race_a_p___23', 'demo_race_a_p___24', 'demo_race_a_p___25', 'demo_race_a_p___77', 'demo_race_a_p___99', 'demo_comb_income_v2_l_y', 'demo_prnt_ed_v2_2yr_l_y', 'demo_prtnr_ed_v2_2yr_l_y', 'demo_prnt_empl_v2_l_y', 'demo_prtnr_empl_v2_l_y', 'demo_prnt_marital_v2_l_y', 'Base_ADI', 'Base_N02', 'Base_PM25', 'Base_Racism', 'Base_COI', 'eventname_y_y', '2nd_ADI', '2nd_N02', '2nd_PM25', '2nd_Racism', '2nd_COI', '2nd_Site', 'lh_G&S_frontomargin_thickness_2nd', 'lh_G&S_occipital_inf_thickness_2nd', 'lh_G&S_paracentral_thickness_2nd', 'lh_G&S_subcentral_thickness_2nd', 'lh_G&S_transv_frontopol_thickness_2nd', 'lh_G&S_cingul-Ant_thickness_2nd', 'lh_G&S_cingul-Mid-Ant_thickness_2nd', 'lh_G&S_cingul-Mid-Post_thickness_2nd', 'lh_G_cingul-Post-dorsal_thickness_2nd', 'lh_G_cingul-Post-ventral_thickness_2nd', 'lh_G_cuneus_thickness_2nd', 'lh_G_front_inf-Opercular_thickness_2nd', 'lh_G_front_inf-Orbital_thickness_2nd', 'lh_G_front_inf-Triangul_thickness_2nd', 'lh_G_front_middle_thickness_2nd', 'lh_G_front_sup_thickness_2nd', 'lh_G_Ins_lg&S_cent_ins_thickness_2nd', 'lh_G_insular_short_thickness_2nd', 'lh_G_occipital_middle_thickness_2nd', 'lh_G_occipital_sup_thickness_2nd', 'lh_G_oc-temp_lat-fusifor_thickness_2nd', 'lh_G_oc-temp_med-Lingual_thickness_2nd', 'lh_G_oc-temp_med-Parahip_thickness_2nd', 'lh_G_orbital_thickness_2nd', 'lh_G_pariet_inf-Angular_thickness_2nd', 'lh_G_pariet_inf-Supramar_thickness_2nd', 'lh_G_parietal_sup_thickness_2nd', 'lh_G_postcentral_thickness_2nd', 'lh_G_precentral_thickness_2nd', 'lh_G_precuneus_thickness_2nd', 'lh_G_rectus_thickness_2nd', 'lh_G_subcallosal_thickness_2nd', 'lh_G_temp_sup-G_T_transv_thickness_2nd', 'lh_G_temp_sup-Lateral_thickness_2nd', 'lh_G_temp_sup-Plan_polar_thickness_2nd', 'lh_G_temp_sup-Plan_tempo_thickness_2nd', 'lh_G_temporal_inf_thickness_2nd', 'lh_G_temporal_middle_thickness_2nd', 'lh_Lat_Fis-ant-Horizont_thickness_2nd', 'lh_Lat_Fis-ant-Vertical_thickness_2nd', 'lh_Lat_Fis-post_thickness_2nd', 'lh_Pole_occipital_thickness_2nd', 'lh_Pole_temporal_thickness_2nd', 'lh_S_calcarine_thickness_2nd', 'lh_S_central_thickness_2nd', 'lh_S_cingul-Marginalis_thickness_2nd', 'lh_S_circular_insula_ant_thickness_2nd', 'lh_S_circular_insula_inf_thickness_2nd', 'lh_S_circular_insula_sup_thickness_2nd', 'lh_S_collat_transv_ant_thickness_2nd', 'lh_S_collat_transv_post_thickness_2nd', 'lh_S_front_inf_thickness_2nd', 'lh_S_front_middle_thickness_2nd', 'lh_S_front_sup_thickness_2nd', 'lh_S_interm_prim-Jensen_thickness_2nd', 'lh_S_intrapariet&P_trans_thickness_2nd', 'lh_S_oc_middle&Lunatus_thickness_2nd', 'lh_S_oc_sup&transversal_thickness_2nd', 'lh_S_occipital_ant_thickness_2nd', 'lh_S_oc-temp_lat_thickness_2nd', 'lh_S_oc-temp_med&Lingual_thickness_2nd', 'lh_S_orbital_lateral_thickness_2nd', 'lh_S_orbital_med-olfact_thickness_2nd', 'lh_S_orbital-H_Shaped_thickness_2nd', 'lh_S_parieto_occipital_thickness_2nd', 'lh_S_pericallosal_thickness_2nd', 'lh_S_postcentral_thickness_2nd', 'lh_S_precentral-inf-part_thickness_2nd', 'lh_S_precentral-sup-part_thickness_2nd', 'lh_S_suborbital_thickness_2nd', 'lh_S_subparietal_thickness_2nd', 'lh_S_temporal_inf_thickness_2nd', 'lh_S_temporal_sup_thickness_2nd', 'lh_S_temporal_transverse_thickness_2nd', 'rh_G&S_frontomargin_thickness_2nd', 'rh_G&S_occipital_inf_thickness_2nd', 'rh_G&S_paracentral_thickness_2nd', 'rh_G&S_subcentral_thickness_2nd', 'rh_G&S_transv_frontopol_thickness_2nd', 'rh_G&S_cingul-Ant_thickness_2nd', 'rh_G&S_cingul-Mid-Ant_thickness_2nd', 'rh_G&S_cingul-Mid-Post_thickness_2nd', 'rh_G_cingul-Post-dorsal_thickness_2nd', 'rh_G_cingul-Post-ventral_thickness_2nd', 'rh_G_cuneus_thickness_2nd', 'rh_G_front_inf-Opercular_thickness_2nd', 'rh_G_front_inf-Orbital_thickness_2nd', 'rh_G_front_inf-Triangul_thickness_2nd', 'rh_G_front_middle_thickness_2nd', 'rh_G_front_sup_thickness_2nd', 'rh_G_Ins_lg&S_cent_ins_thickness_2nd', 'rh_G_insular_short_thickness_2nd', 'rh_G_occipital_middle_thickness_2nd', 'rh_G_occipital_sup_thickness_2nd', 'rh_G_oc-temp_lat-fusifor_thickness_2nd', 'rh_G_oc-temp_med-Lingual_thickness_2nd', 'rh_G_oc-temp_med-Parahip_thickness_2nd', 'rh_G_orbital_thickness_2nd', 'rh_G_pariet_inf-Angular_thickness_2nd', 'rh_G_pariet_inf-Supramar_thickness_2nd', 'rh_G_parietal_sup_thickness_2nd', 'rh_G_postcentral_thickness_2nd', 'rh_G_precentral_thickness_2nd', 'rh_G_precuneus_thickness_2nd', 'rh_G_rectus_thickness_2nd', 'rh_G_subcallosal_thickness_2nd', 'rh_G_temp_sup-G_T_transv_thickness_2nd', 'rh_G_temp_sup-Lateral_thickness_2nd', 'rh_G_temp_sup-Plan_polar_thickness_2nd', 'rh_G_temp_sup-Plan_tempo_thickness_2nd', 'rh_G_temporal_inf_thickness_2nd', 'rh_G_temporal_middle_thickness_2nd', 'rh_Lat_Fis-ant-Horizont_thickness_2nd', 'rh_Lat_Fis-ant-Vertical_thickness_2nd', 'rh_Lat_Fis-post_thickness_2nd', 'rh_Pole_occipital_thickness_2nd', 'rh_Pole_temporal_thickness_2nd', 'rh_S_calcarine_thickness_2nd', 'rh_S_central_thickness_2nd', 'rh_S_cingul-Marginalis_thickness_2nd', 'rh_S_circular_insula_ant_thickness_2nd', 'rh_S_circular_insula_inf_thickness_2nd', 'rh_S_circular_insula_sup_thickness_2nd', 'rh_S_collat_transv_ant_thickness_2nd', 'rh_S_collat_transv_post_thickness_2nd', 'rh_S_front_inf_thickness_2nd', 'rh_S_front_middle_thickness_2nd', 'rh_S_front_sup_thickness_2nd', 'rh_S_interm_prim-Jensen_thickness_2nd', 'rh_S_intrapariet&P_trans_thickness_2nd', 'rh_S_oc_middle&Lunatus_thickness_2nd', 'rh_S_oc_sup&transversal_thickness_2nd', 'rh_S_occipital_ant_thickness_2nd', 'rh_S_oc-temp_lat_thickness_2nd', 'rh_S_oc-temp_med&Lingual_thickness_2nd', 'rh_S_orbital_lateral_thickness_2nd', 'rh_S_orbital_med-olfact_thickness_2nd', 'rh_S_orbital-H_Shaped_thickness_2nd', 'rh_S_parieto_occipital_thickness_2nd', 'rh_S_pericallosal_thickness_2nd', 'rh_S_postcentral_thickness_2nd', 'rh_S_precentral-inf-part_thickness_2nd', 'rh_S_precentral-sup-part_thickness_2nd', 'rh_S_suborbital_thickness_2nd', 'rh_S_subparietal_thickness_2nd', 'rh_S_temporal_inf_thickness_2nd', 'rh_S_temporal_sup_thickness_2nd', 'rh_S_temporal_transverse_thickness_2nd', 'Left-Hippocampus_2nd', 'Left-Amygdala_2nd', 'Right-Hippocampus_2nd', 'Right-Amygdala_2nd']]
GenderDict = {"M" : '1', "F" : '0'}
BLR_Model_Data_2ndWave = BLR_Model_Data_2ndWave.replace({'sex_y' : GenderDict})
BLR_Model_Data_2ndWave = BLR_Model_Data_2ndWave.dropna(subset=['Left-Hippocampus_2nd'])



In [None]:
#Prep PCN Toolkit
os.chdir(WorkingDir)
BLR_Model_Data_2ndWave['age'] = BLR_Model_Data_2ndWave['interview_age_y'].astype(int)/12
BLR_Model_Data_2ndWave['sex'] = BLR_Model_Data_2ndWave['sex_y']
BLR_Model_Data_2ndWave['site'] = BLR_Model_Data_2ndWave['2nd_Site']

SiteDict = {'site01' : 'ABCD_01', 'site02' : 'ABCD_02', 'site03' : 'ABCD_03', 'site04' : 'ABCD_04', 'site05' : 'ABCD_05', 'site06' : 'ABCD_06', 'site07' : 'ABCD_07', 'site08' : 'ABCD_08', 'site09' : 'ABCD_09', 'site10' : 'ABCD_10', 'site11' : 'ABCD_11', 'site12' : 'ABCD_12', 'site13' : 'ABCD_13', 'site14' : 'ABCD_14', 'site15' : 'ABCD_15', 'site16' : 'ABCD_16', 'site17' : 'ABCD_17', 'site18' : 'ABCD_18', 'site19' : 'ABCD_19', 'site20' : 'ABCD_20', 'site21' : 'ABCD_21'}
BLR_Model_Data_2ndWave = BLR_Model_Data_2ndWave.replace({'site' : SiteDict})
BLR_Model_Data_2ndWave.columns = BLR_Model_Data_2ndWave.columns.str.rstrip('_2nd')
print(BLR_Model_Data_2ndWave)

In [None]:
##Print Dataset

BLR_Model_Data_2ndWave_unique = BLR_Model_Data_2ndWave.drop_duplicates()
print(BLR_Model_Data_2ndWave_unique)
#BLR_Model_Data_2ndWave_unique.to_csv('BLR_Model_Data_2ndWave.csv')



In [None]:
### Run the BrainCharts Transfer for the HCP Portion of the Analysis ###

In [None]:
# Unzip Pretrained Models
os.chdir(os.path.join(BrainChartsDir,'models/'))
#! unzip lifespan_57K_82sites.zip

In [None]:
# Configure Model Variables
# which model do we wish to use?
model_name = 'lifespan_57K_82sites'
site_names = 'site_ids_ct_82sites.txt'

# where the analysis takes place
root_dir = WorkingDir

# where the data files live
data_dir = os.path.join(BrainChartsDir,'docs')

# where the models live
out_dir = os.path.join(BrainChartsDir, 'models', model_name)

# load a set of site ids from this model. This must match the training data
with open(os.path.join(BrainChartsDir,'docs', site_names)) as f:
    site_ids_tr = f.read().splitlines()

In [None]:
# Define Test Data

#Note, we are going to use data derived from the 'SRG_2023_HCP_PCN_Analyses_04_25_23' notebook to start. We will need
#fix a couple things for full use including grabbing site informaiton. Do not use this for publication or post to github
#other than to demonstrate the inner workings of your mind

# Big fix is to grab all HCP-A/D site info from the ndar_subject01 file. Also change interview_age to age. We may also need to
# switch the 0 and 1 for sex but then we're good to go.

test_data = os.path.join(WorkingDir,'BLR_Model_Data_2ndWave.csv')
df_te = pd.read_csv(test_data)
# extract a list of unique site ids from the test set

site_ids_te =  sorted(set(df_te['site'].to_list()))



In [None]:
print(BLR_Model_Data_2ndWave['sex_y'])

In [None]:
# Adaptation Dataset 
#adaptation_data = os.path.join(data_dir, 'OpenNeuroTransfer_ct_ad.csv')
#
#df_ad = pd.read_csv(adaptation_data)
#
## extract a list of unique site ids from the test set
#site_ids_ad =  sorted(set(df_ad['site'].to_list()))
#
#if not all(elem in site_ids_ad for elem in site_ids_te):
#    print('Warning: some of the testing sites are not in the adaptation data')

In [None]:
# Identify IDPs and Covariates
idp_ids = ['lh_G&S_frontomargin_thickness', 'lh_G&S_occipital_inf_thickness', 'lh_G&S_paracentral_thickness', 'lh_G&S_subcentral_thickness', 'lh_G&S_transv_frontopol_thickness', 'lh_G&S_cingul-Ant_thickness', 'lh_G&S_cingul-Mid-Ant_thickness', 'lh_G&S_cingul-Mid-Post_thickness', 'lh_G_cingul-Post-dorsal_thickness', 'lh_G_cingul-Post-ventral_thickness', 'lh_G_cuneus_thickness', 'lh_G_front_inf-Opercular_thickness', 'lh_G_front_inf-Orbital_thickness', 'lh_G_front_inf-Triangul_thickness', 'lh_G_front_middle_thickness', 'lh_G_front_sup_thickness', 'lh_G_Ins_lg&S_cent_ins_thickness', 'lh_G_insular_short_thickness', 'lh_G_occipital_middle_thickness', 'lh_G_occipital_sup_thickness', 'lh_G_oc-temp_lat-fusifor_thickness', 'lh_G_oc-temp_med-Lingual_thickness', 'lh_G_oc-temp_med-Parahip_thickness', 'lh_G_orbital_thickness', 'lh_G_pariet_inf-Angular_thickness', 'lh_G_pariet_inf-Supramar_thickness', 'lh_G_parietal_sup_thickness', 'lh_G_postcentral_thickness', 'lh_G_precentral_thickness', 'lh_G_precuneus_thickness', 'lh_G_rectus_thickness', 'lh_G_subcallosal_thickness', 'lh_G_temp_sup-G_T_transv_thickness', 'lh_G_temp_sup-Lateral_thickness', 'lh_G_temp_sup-Plan_polar_thickness', 'lh_G_temp_sup-Plan_tempo_thickness', 'lh_G_temporal_inf_thickness', 'lh_G_temporal_middle_thickness', 'lh_Lat_Fis-ant-Horizont_thickness', 'lh_Lat_Fis-ant-Vertical_thickness', 'lh_Lat_Fis-post_thickness', 'lh_Pole_occipital_thickness', 'lh_Pole_temporal_thickness', 'lh_S_calcarine_thickness', 'lh_S_central_thickness', 'lh_S_cingul-Marginalis_thickness', 'lh_S_circular_insula_ant_thickness', 'lh_S_circular_insula_inf_thickness', 'lh_S_circular_insula_sup_thickness', 'lh_S_collat_transv_ant_thickness', 'lh_S_collat_transv_post_thickness', 'lh_S_front_inf_thickness', 'lh_S_front_middle_thickness', 'lh_S_front_sup_thickness', 'lh_S_interm_prim-Jensen_thickness', 'lh_S_intrapariet&P_trans_thickness', 'lh_S_oc_middle&Lunatus_thickness', 'lh_S_oc_sup&transversal_thickness', 'lh_S_occipital_ant_thickness', 'lh_S_oc-temp_lat_thickness', 'lh_S_oc-temp_med&Lingual_thickness', 'lh_S_orbital_lateral_thickness', 'lh_S_orbital_med-olfact_thickness', 'lh_S_orbital-H_Shaped_thickness', 'lh_S_parieto_occipital_thickness', 'lh_S_pericallosal_thickness', 'lh_S_postcentral_thickness', 'lh_S_precentral-inf-part_thickness', 'lh_S_precentral-sup-part_thickness', 'lh_S_suborbital_thickness', 'lh_S_subparietal_thickness', 'lh_S_temporal_inf_thickness', 'lh_S_temporal_sup_thickness', 'lh_S_temporal_transverse_thickness', 'rh_G&S_frontomargin_thickness', 'rh_G&S_occipital_inf_thickness', 'rh_G&S_paracentral_thickness', 'rh_G&S_subcentral_thickness', 'rh_G&S_transv_frontopol_thickness', 'rh_G&S_cingul-Ant_thickness', 'rh_G&S_cingul-Mid-Ant_thickness', 'rh_G&S_cingul-Mid-Post_thickness', 'rh_G_cingul-Post-dorsal_thickness', 'rh_G_cingul-Post-ventral_thickness', 'rh_G_cuneus_thickness', 'rh_G_front_inf-Opercular_thickness', 'rh_G_front_inf-Orbital_thickness', 'rh_G_front_inf-Triangul_thickness', 'rh_G_front_middle_thickness', 'rh_G_front_sup_thickness', 'rh_G_Ins_lg&S_cent_ins_thickness', 'rh_G_insular_short_thickness', 'rh_G_occipital_middle_thickness', 'rh_G_occipital_sup_thickness', 'rh_G_oc-temp_lat-fusifor_thickness', 'rh_G_oc-temp_med-Lingual_thickness', 'rh_G_oc-temp_med-Parahip_thickness', 'rh_G_orbital_thickness', 'rh_G_pariet_inf-Angular_thickness', 'rh_G_pariet_inf-Supramar_thickness', 'rh_G_parietal_sup_thickness', 'rh_G_postcentral_thickness', 'rh_G_precentral_thickness', 'rh_G_precuneus_thickness', 'rh_G_rectus_thickness', 'rh_G_subcallosal_thickness', 'rh_G_temp_sup-G_T_transv_thickness', 'rh_G_temp_sup-Lateral_thickness', 'rh_G_temp_sup-Plan_polar_thickness', 'rh_G_temp_sup-Plan_tempo_thickness', 'rh_G_temporal_inf_thickness', 'rh_G_temporal_middle_thickness', 'rh_Lat_Fis-ant-Horizont_thickness', 'rh_Lat_Fis-ant-Vertical_thickness', 'rh_Lat_Fis-post_thickness', 'rh_Pole_occipital_thickness', 'rh_Pole_temporal_thickness', 'rh_S_calcarine_thickness', 'rh_S_central_thickness', 'rh_S_cingul-Marginalis_thickness', 'rh_S_circular_insula_ant_thickness', 'rh_S_circular_insula_inf_thickness', 'rh_S_circular_insula_sup_thickness', 'rh_S_collat_transv_ant_thickness', 'rh_S_collat_transv_post_thickness', 'rh_S_front_inf_thickness', 'rh_S_front_middle_thickness', 'rh_S_front_sup_thickness', 'rh_S_interm_prim-Jensen_thickness', 'rh_S_intrapariet&P_trans_thickness', 'rh_S_oc_middle&Lunatus_thickness', 'rh_S_oc_sup&transversal_thickness', 'rh_S_occipital_ant_thickness', 'rh_S_oc-temp_lat_thickness', 'rh_S_oc-temp_med&Lingual_thickness', 'rh_S_orbital_lateral_thickness', 'rh_S_orbital_med-olfact_thickness', 'rh_S_orbital-H_Shaped_thickness', 'rh_S_parieto_occipital_thickness', 'rh_S_pericallosal_thickness', 'rh_S_postcentral_thickness', 'rh_S_precentral-inf-part_thickness', 'rh_S_precentral-sup-part_thickness', 'rh_S_suborbital_thickness', 'rh_S_subparietal_thickness', 'rh_S_temporal_inf_thickness', 'rh_S_temporal_sup_thickness', 'rh_S_temporal_transverse_thickness', 'Left-Hippocampus', 'Left-Amygdala','Right-Hippocampus', 'Right-Amygdala', ]

# which data columns do we wish to use as covariates? 
cols_cov = ['age','sex']

# limits for cubic B-spline basis 
xmin = -5 
xmax = 110

# Absolute Z treshold above which a sample is considered to be an outlier (without fitting any model)
outlier_thresh = 7

In [None]:
# Predictions for each IDP

for idp_num, idp in enumerate(idp_ids): 
    print('Running IDP', idp_num, idp, ':')
    idp_dir = os.path.join(out_dir, idp)
    os.chdir(idp_dir)
    
    # extract and save the response variables for the test set
    y_te = df_te[idp].to_numpy()
    
    # save the variables
    resp_file_te = os.path.join(idp_dir, 'resp_te.txt') 
    np.savetxt(resp_file_te, y_te)
        
    # configure and save the design matrix
    cov_file_te = os.path.join(idp_dir, 'cov_bspline_te.txt')
    X_te = create_design_matrix(df_te[cols_cov], 
                                site_ids = df_te['site'],
                                all_sites = site_ids_tr,
                                basis = 'bspline', 
                                xmin = xmin, 
                                xmax = xmax)
    ###Modified Save to Fix some random issue###
    np.savetxt(cov_file_te, X_te, fmt='%s')
    
    # check whether all sites in the test set are represented in the training set
    if all(elem in site_ids_tr for elem in site_ids_te):
        print('All sites are present in the training data')
        
        # just make predictions
        yhat_te, s2_te, Z = predict(cov_file_te, 
                                    alg='blr', 
                                    respfile=resp_file_te, 
                                    model_path=os.path.join(idp_dir,'Models'))
    else:
        print('Some sites missing from the training data. Adapting model')
        
        # save the covariates for the adaptation data
        X_ad = create_design_matrix(df_ad[cols_cov], 
                                    site_ids = df_ad['site'],
                                    all_sites = site_ids_tr,
                                    basis = 'bspline', 
                                    xmin = xmin, 
                                    xmax = xmax)
        cov_file_ad = os.path.join(idp_dir, 'cov_bspline_ad.txt')          
        np.savetxt(cov_file_ad, X_ad)
        
        # save the responses for the adaptation data
        resp_file_ad = os.path.join(idp_dir, 'resp_ad.txt') 
        y_ad = df_ad[idp].to_numpy()
        np.savetxt(resp_file_ad, y_ad)
       
        # save the site ids for the adaptation data
        sitenum_file_ad = os.path.join(idp_dir, 'sitenum_ad.txt') 
        site_num_ad = df_ad['sitenum'].to_numpy(dtype=int)
        np.savetxt(sitenum_file_ad, site_num_ad)
        
        # save the site ids for the test data 
        sitenum_file_te = os.path.join(idp_dir, 'sitenum_te.txt')
        site_num_te = df_te['sitenum'].to_numpy(dtype=int)
        np.savetxt(sitenum_file_te, site_num_te)
         
        yhat_te, s2_te, Z = predict(cov_file_te, 
                                    alg = 'blr', 
                                    respfile = resp_file_te, 
                                    model_path = os.path.join(idp_dir,'Models'),
                                    adaptrespfile = resp_file_ad,
                                    adaptcovfile = cov_file_ad,
                                    adaptvargroupfile = sitenum_file_ad,
                                    testvargroupfile = sitenum_file_te)

In [None]:
# Make Centile Plots by Covariate
# which sex do we want to plot? 
sex = 1 # 1 = male 0 = female
if sex == 1: 
    clr = 'blue';
else:
    clr = 'red'

# create dummy data for visualisation
print('configuring dummy data ...')
xx = np.arange(xmin, xmax, 0.5)
X0_dummy = np.zeros((len(xx), 2))
X0_dummy[:,0] = xx
X0_dummy[:,1] = sex

# create the design matrix
X_dummy = create_design_matrix(X0_dummy, xmin=xmin, xmax=xmax, site_ids=None, all_sites=site_ids_tr)

# save the dummy covariates
cov_file_dummy = os.path.join(out_dir,'cov_bspline_dummy_mean.txt')
np.savetxt(cov_file_dummy, X_dummy)

In [None]:
sns.set(style='whitegrid')

for idp_num, idp in enumerate(idp_ids): 
    print('Running IDP', idp_num, idp, ':')
    idp_dir = os.path.join(out_dir, idp)
    os.chdir(idp_dir)
    
    # load the true data points
    yhat_te = load_2d(os.path.join(idp_dir, 'yhat_predict.txt'))
    s2_te = load_2d(os.path.join(idp_dir, 'ys2_predict.txt'))
    y_te = load_2d(os.path.join(idp_dir, 'resp_te.txt'))
            
    # set up the covariates for the dummy data
    print('Making predictions with dummy covariates (for visualisation)')
    yhat, s2 = predict(cov_file_dummy, 
                       alg = 'blr', 
                       respfile = None, 
                       model_path = os.path.join(idp_dir,'Models'), 
                       outputsuffix = '_dummy')
    
    # load the normative model
    with open(os.path.join(idp_dir,'Models', 'NM_0_0_estimate.pkl'), 'rb') as handle:
        nm = pickle.load(handle) 
    
    # get the warp and warp parameters
    W = nm.blr.warp
    warp_param = nm.blr.hyp[1:nm.blr.warp.get_n_params()+1] 
        
    # first, we warp predictions for the true data and compute evaluation metrics
    med_te = W.warp_predictions(np.squeeze(yhat_te), np.squeeze(s2_te), warp_param)[0]
    med_te = med_te[:, np.newaxis]
    print('metrics:', evaluate(y_te, med_te))
    
    # then, we warp dummy predictions to create the plots
    med, pr_int = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param)
    
    # extract the different variance components to visualise
    beta, junk1, junk2 = nm.blr._parse_hyps(nm.blr.hyp, X_dummy)
    s2n = 1/beta # variation (aleatoric uncertainty)
    s2s = s2-s2n # modelling uncertainty (epistemic uncertainty)
    
    # plot the data points
    y_te_rescaled_all = np.zeros_like(y_te)
    for sid, site in enumerate(site_ids_te):
        # plot the true test data points 
        if all(elem in site_ids_tr for elem in site_ids_te):
            # all data in the test set are present in the training set
            
            # first, we select the data points belonging to this particular site
            idx = np.where(np.bitwise_and(X_te[:,2] == sex, X_te[:,sid+len(cols_cov)+1] !=0))[0]
            if len(idx) == 0:
                print('No data for site', sid, site, 'skipping...')
                continue
            
            # then directly adjust the data
            idx_dummy = np.bitwise_and(X_dummy[:,1] > X_te[idx,1].min(), X_dummy[:,1] < X_te[idx,1].max())
            y_te_rescaled = y_te[idx] - np.median(y_te[idx]) + np.median(med[idx_dummy])
        else:
            # we need to adjust the data based on the adaptation dataset 
            
            # first, select the data point belonging to this particular site
            idx = np.where(np.bitwise_and(X_te[:,2] == sex, (df_te['site'] == site).to_numpy()))[0]
            
            # load the adaptation data
            y_ad = load_2d(os.path.join(idp_dir, 'resp_ad.txt'))
            X_ad = load_2d(os.path.join(idp_dir, 'cov_bspline_ad.txt'))
            idx_a = np.where(np.bitwise_and(X_ad[:,2] == sex, (df_ad['site'] == site).to_numpy()))[0]
            if len(idx) < 2 or len(idx_a) < 2:
                print('Insufficent data for site', sid, site, 'skipping...')
                continue
            
            # adjust and rescale the data
            y_te_rescaled, s2_rescaled = nm.blr.predict_and_adjust(nm.blr.hyp, 
                                                                   X_ad[idx_a,:], 
                                                                   np.squeeze(y_ad[idx_a]), 
                                                                   Xs=None, 
                                                                   ys=np.squeeze(y_te[idx]))
        # plot the (adjusted) data points
        plt.scatter(X_te[idx,1], y_te_rescaled, s=4, color=clr, alpha = 0.1)
       
    # plot the median of the dummy data
    plt.plot(xx, med, clr)
    
    # fill the gaps in between the centiles
    junk, pr_int25 = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param, percentiles=[0.25,0.75])
    junk, pr_int95 = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param, percentiles=[0.05,0.95])
    junk, pr_int99 = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2), warp_param, percentiles=[0.01,0.99])
    plt.fill_between(xx, pr_int25[:,0], pr_int25[:,1], alpha = 0.1,color=clr)
    plt.fill_between(xx, pr_int95[:,0], pr_int95[:,1], alpha = 0.1,color=clr)
    plt.fill_between(xx, pr_int99[:,0], pr_int99[:,1], alpha = 0.1,color=clr)
            
    # make the width of each centile proportional to the epistemic uncertainty
    junk, pr_int25l = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2-0.5*s2s), warp_param, percentiles=[0.25,0.75])
    junk, pr_int95l = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2-0.5*s2s), warp_param, percentiles=[0.05,0.95])
    junk, pr_int99l = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2-0.5*s2s), warp_param, percentiles=[0.01,0.99])
    junk, pr_int25u = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2+0.5*s2s), warp_param, percentiles=[0.25,0.75])
    junk, pr_int95u = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2+0.5*s2s), warp_param, percentiles=[0.05,0.95])
    junk, pr_int99u = W.warp_predictions(np.squeeze(yhat), np.squeeze(s2+0.5*s2s), warp_param, percentiles=[0.01,0.99])    
    plt.fill_between(xx, pr_int25l[:,0], pr_int25u[:,0], alpha = 0.3,color=clr)
    plt.fill_between(xx, pr_int95l[:,0], pr_int95u[:,0], alpha = 0.3,color=clr)
    plt.fill_between(xx, pr_int99l[:,0], pr_int99u[:,0], alpha = 0.3,color=clr)
    plt.fill_between(xx, pr_int25l[:,1], pr_int25u[:,1], alpha = 0.3,color=clr)
    plt.fill_between(xx, pr_int95l[:,1], pr_int95u[:,1], alpha = 0.3,color=clr)
    plt.fill_between(xx, pr_int99l[:,1], pr_int99u[:,1], alpha = 0.3,color=clr)

    # plot actual centile lines
    plt.plot(xx, pr_int25[:,0],color=clr, linewidth=0.5)
    plt.plot(xx, pr_int25[:,1],color=clr, linewidth=0.5)
    plt.plot(xx, pr_int95[:,0],color=clr, linewidth=0.5)
    plt.plot(xx, pr_int95[:,1],color=clr, linewidth=0.5)
    plt.plot(xx, pr_int99[:,0],color=clr, linewidth=0.5)
    plt.plot(xx, pr_int99[:,1],color=clr, linewidth=0.5)
    
    plt.xlabel('Age')
    plt.ylabel(idp) 
    plt.title(idp)
    plt.xlim((0,90))
    plt.savefig(os.path.join(idp_dir, 'centiles_' + str(sex)),  bbox_inches='tight')
    plt.show()
    
os.chdir(out_dir)

In [None]:
#Get the DeviationScores togeher
iterator = 1
Z_Spread = pd.DataFrame()

for idp_num, idp in enumerate(idp_ids): 
    idp_dir = os.path.join(out_dir, idp)
    os.chdir(idp_dir)
    col_name = 'Z_' + idp
    Zvalues = pd.read_csv('Z_predict.txt',header=None)
    Z_Spread.loc[:, col_name] = Zvalues
    iterator += 1 


print(Z_Spread)



In [None]:
#Properly Merge Data

Z_Data = Z_Spread
Z_Data['subjectkey'] = BLR_Model_Data_2ndWave_unique['subjectkey'].values
print(Z_Data)


In [None]:
#Make Final Data
#os.chdir(WorkingDir)
#FinalDataset = BLR_Model_Data_2ndWave_unique.join(Z_Spread)
#FinalDataset.to_csv('BLR_Transfer_FinalDataset_05_31_23.csv')
#Z_Spread.to_csv('Z_Deviations_Full_02_23_24.csv')
#BLR_Model_Data_2ndWave_unique.to_csv('BLR_Model_Data_2ndWave_02_23_24.csv')

### NOTE: This section isn't actually used given a previous dumb error where I accidentally merged things wrong.
### Removal of non-white/black participants happened offline for this section, hence why later files called are named
### "_forRandSPSS" as those are then used. This really is just some logic to remove participants who did not
### only report being "white" or "black." Sorry about that potential reader. There IS a variable the ABCD team
### created that bins people into white/black/hispanic/other that you could use, but isnt what we previously did so...
### some consistency is necessary I guess...

In [None]:
## Combine with Discrimination Data ##
# Here, we cleaned the Final Dataset to remove people of non-white/black and multiracial identity outside of Python. We will now read in the discrimination scores from
# Year 2 and merge with the dataset. We're also going to add in the FreeSurfer QC as well
os.chdir('/[Path]/ABCD_Data')
ABCD_Dis=pd.read_csv("abcd_ydmes01.txt", sep="\t", skiprows=[1])
ABCD_DisYear2 = ABCD_Dis.loc[ABCD_Dis['eventname'] == '2_year_follow_up_y_arm_1']
ABCD_DisYear2MergeData = ABCD_DisYear2[['subjectkey', 'dim_yesno_q1', 'dim_matrix_q1', 'dim_matrix_q2', 'dim_matrix_q3', 'dim_matrix_q4', 'dim_matrix_q5', 'dim_matrix_q6', 'dim_matrix_q7']]
ABCD_T1QC = pd.read_csv("mriqcrp10301.txt", sep="\t", skiprows=[1])
ABCD_T1QCYear2 = ABCD_T1QC.loc[ABCD_T1QC['eventname'] == '2_year_follow_up_y_arm_1']
ABCD_T1QCYear2MergeData = ABCD_T1QCYear2[['subjectkey', 'iqc_t1_1_qc_score']]
os.chdir(WorkingDir)

Rdata = pd.read_csv("BLR_Model_Data_2ndWave_02_23_24_Redux_forRandSPSS.csv")
NewRdata = Rdata.merge(ABCD_DisYear2MergeData, on='subjectkey', how='left')
New2Rdata = NewRdata.merge(ABCD_T1QCYear2MergeData, on='subjectkey', how='left')
New3Rdata = New2Rdata.merge(Z_Data, on='subjectkey', how='left')

New3Rdata.to_csv('BLR_Transfer_FinalDataset_withDisQC_02_23_24_Redux_forRandSPSS.csv')