In [1]:
import pandas as pd

import os

In [2]:
load_path = '../input'

In [6]:
# read in .csv file to form dictionaries
df_dict = pd.read_csv(os.path.join(load_path, 'genon_pheno_names_dict.csv'))

# make dictionary mapping variable description to specific ABCD variable names
description_varname_dict = dict(zip(df_dict['Description'], df_dict['ABCD Variable']))

# make dictionary mapping variable description to ABCD table names
description_table_dict = dict(zip(df_dict['Description'], df_dict['ABCD Table']))

In [37]:
# get all variables to loop over
key_list = list(description_varname_dict.keys())

# initialize dataframe with race data
df_behav = pd.read_csv(os.path.join(load_path, 'race_data.csv'))
df_behav = df_behav[['src_subject_id', 'Race']].reset_index(drop=True)
df_behav = df_behav[df_behav.Race.isin(['Black', 'White'])].reset_index(drop=True)

# adjust IDs
df_behav['src_subject_id'] = df_behav['src_subject_id'].apply(lambda x: 'NDAR_INV'+x[11:]) 

# loop over all variables
for key in key_list:
    
    # get filename
    fname = '{:s}.csv'.format(description_table_dict[key] )
    
    # get variable name
    varname = description_varname_dict[key]
    
    # load in dataframe
    df_tmp = pd.read_csv(os.path.join(load_path, fname), low_memory=False)
    
    # restrict to baseline data only
    df_tmp = df_tmp[df_tmp.eventname=='baseline_year_1_arm_1'].reset_index(drop=True)
    
    # merge in data
    df_behav = df_behav.merge(df_tmp[['src_subject_id', varname]], on='src_subject_id', how='left')

# save data
df_behav.to_csv('../output/combined_genon_behaviors.csv', index=False)
    
df_behav.head()

Unnamed: 0,src_subject_id,Race,pea_ravlt_sd_trial_vi_tc,pea_ravlt_ld_trial_vii_tc,pea_wiscv_trs,nihtbx_flanker_uncorrected,nihtbx_list_uncorrected,nihtbx_cardsort_uncorrected,nihtbx_reading_uncorrected,nihtbx_pattern_uncorrected,...,pps_y_ss_severity_score,upps_y_ss_negative_urgency,upps_y_ss_positive_urgency,upps_y_ss_lack_of_planning,upps_y_ss_lack_of_perseverance,upps_y_ss_sensation_seeking,bis_y_ss_bis_sum,bis_y_ss_bas_rr,bis_y_ss_bas_drive,bis_y_ss_bas_fs
0,NDAR_INV00LJVZK2,Black,1.0,1.0,10.0,97.0,94.0,81.0,90.0,94.0,...,2.0,5.0,4.0,10.0,6.0,4.0,17.0,13.0,1.0,4.0
1,NDAR_INV052HU3CU,Black,13.0,12.0,23.0,107.0,105.0,91.0,103.0,82.0,...,0.0,8.0,4.0,7.0,5.0,7.0,17.0,15.0,10.0,4.0
2,NDAR_INV05ATJ1V1,Black,14.0,14.0,15.0,84.0,109.0,94.0,90.0,92.0,...,8.0,12.0,8.0,9.0,4.0,11.0,9.0,10.0,3.0,5.0
3,NDAR_INV0889M0JE,Black,13.0,14.0,20.0,105.0,120.0,104.0,99.0,111.0,...,59.0,11.0,13.0,8.0,8.0,10.0,7.0,8.0,3.0,6.0
4,NDAR_INV08FUB58A,Black,6.0,6.0,14.0,97.0,74.0,92.0,89.0,94.0,...,0.0,4.0,4.0,11.0,4.0,7.0,3.0,14.0,0.0,3.0
