This file calculates pairwise correlations among variables in the ABCD datset and creates dataframes specifying an undirected weighted graph with edges representing correlations between variables, in the format that `graph_from_data_frame()` in the R library igraph expects.

More specifically, the hierarchy dataframe specifies the tree structure of the dendrogram (which determines how edges are bundled), the vertex dataframe specifies the vertices and corresponding data (group), and the connections dataframe specifices the edges connecting vertices.

In [10]:
#@title Variables

variable_groups = {  # T2 Variables
# *****Bio or Objective Child

        'Residential Characteristics': ['neighborhood_safety_ss_p', 'neighborhood_safe_y',  'resid_density', 'resid_walkability', 'resid_prox_roads', 'resid_crime_tot', 'resid_crime_violent',
            'resid_crime_drug', 'resid_crime_dui', 'resid_lead_risk_poverty', 'resid_lead_risk_houses_perc',
            'resid_lead_risk', 'resid_no2_avg', 'resid_pm25_avg', 'resid_sexism', 'resid_sex_orient_bias',
            'resid_immigrant_bias', 'resid_racism'], # 'L_site_id'],


        'Ethnicity/Nationality': ['desc_african_AFR_B', 'desc_native_american_AMR_B', 'desc_alaska_native_AMR_B',
            'desc_chinese_EAS_B', 'desc_japanese_EAS_B', 'desc_korean_EAS_B', 'desc_vietnamese_EAS_B',
            'desc_european_EUR_B', 'desc_asian_indian_SAS_B', 'desc_other_south_asian_SAS_B', 'desc_latin_B'
            'pc_gene_aces1', 'pc_gene_aces2', 'pc_gene_aces3', 'pc_gene_aces4',
            'pc_gene_aces5', 'pc_gene_aces6', 'pc_gene_aces7', 'pc_gene_aces8',
            'pc_gene_aces9', 'pc_gene_aces10', 'pc_gene_aces11', 'pc_gene_aces12',
            'pc_gene_aces13', 'pc_gene_aces14', 'pc_gene_aces15', 'pc_gene_aces16',
            'pc_gene_aces17', 'pc_gene_aces18', 'pc_gene_aces19', 'pc_gene_aces20',
            'pc_gene_aces21', 'pc_gene_aces22', 'pc_gene_aces23', 'pc_gene_aces24',
            'pc_gene_aces25', 'pc_gene_aces26', 'pc_gene_aces27', 'pc_gene_aces28',
            'pc_gene_aces29', 'pc_gene_aces30', 'pc_gene_aces31', 'pc_gene_aces32'],


        'Diet/Nutrition': ['fruit_intake', 'vegetable_intake', 'protein_sources_intake', 'legume_intake',
            'added_sugar', 'sugary_beverage_freq', 'dairy_intake', 'whole_grain_intake', 'total_calories',
            'protein_intake', 'carbohydrate_intake', 'fiber_intake', 'sodium_intake', 'potassium_intake',
            'total_sugar', 'saturated_fat', 'bad_diet_p'],

        'Physical Activity/Features': ['height', 'weight', 'waist', 'puberty_k', 'sex', 'no_sports_activities_p',
            'birth_weight_p', 'fitbit_resting_hr', 'fitbit_steps', 'fitbit_sedentary_mins', 'fitbit_lightlyactive_mins', 'fitbit_fairlyactive_mins', 'fitbit_veryactive_mins'],



# *****Social Parent/Child (Some Quasi Objective)


        'Technology Use': ['socialmedia_daysperweek_k', 'videogames_daysperweek_k', 'bullied_on_internet_k', 'vgame_thinking'],


        'Religion': [
            'child_religion', 'religious_service_frequency', 'relig_importance'],


        'Family Dynamics & Parenting': ['p_comm_cohesion_ss', 'p_comm_ctrl_ss', 'p_comm_collective_efficacy_ss',
            'fam_fight_often_k', 'fam_no_open_anger_k', 'fam_throw_things_k', 'fam_no_lose_temps_k',
            'fam_criticize_often_k', 'fam_hit_each_other_k', 'fam_keep_peace_k', 'fam_try_one_up_k',
            'fam_no_raise_voices_k', 'family_not_talk_aboutfeelings_p', 'family_peaceful_p',
            'family_open_discussing_anything_p', 'family_lose_temper_rare_p', 'family_believe_not_raise_voice_p',
            'frequent_family_conflict_p', 'family_conflict_ss_p', 'family_expression_ss_p', 'family_intellectual_ss_p',
            'family_activities_ss_p', 'family_organisation_ss_p', 'parents_argue_more_p', 'family_emotionprob_p',
            'parents_divorced_p', 'death_in_family_p', 'family_move_p', 'family_conflict_ss_k',
            'parent_monitoring_ss_k', 'marital_status', 'parent_age', 'sex_P', 'num_brothers_p', 'num_sisters_p',
            'religious_service_frequency', 'relig_importance',  'parent_family_responsibilities_p'],

        'Parent Social Functioning': [
            'parent_bad_relationships_p', 'parent_bad_family_relationship_p', 'parent_not_liked_by_others_p',
            'parent_friendship_trouble_p', 'parent_prefers_older_people_p', 'parent_associates_with_trouble_p',
            'parent_bad_opposite_sex_relationship_p', 'parent_meets_family_duties_p', 'parent_clowns_or_shows_off_p',
            'parent_teases_others_p', 'parent_stands_up_rights_p'
        ],


        'Social Relationship Quality': ['not_liked_p', 'doesnt_get_along_p', 'prosocial_ss_p', 'close_boy_friends_k',
            'close_girl_friends_k', 'peer_net_protective_ss_k', 'peers_beh_prosocial_ss_k', 'peers_beh_delinquent_ss_k',
            'feels_leftout_k', 'not_invited_k', 'excluded_k', 'otherkids_spreadneg_rumors_k', 'otherkids_gossip_k',
            'feels_threatned_k', 'saysmeanthings_others_k', 'otherkids_saymeanthings_k', 'discrimination_ss_k',
            'feels_discriminated_k', 'senses_racism_k', 'doesnt_feel_accepted_k', 'bullied_on_internet_k',
            'prosocial_ss_k', 'socialinfluence_meanfinal_k', 'relational_victimization_ss_k',
            'reputational_aggression_ss_k', 'reputational_victimization_ss_k', 'overt_aggression_ss_k',
            'overt_victimization_ss_k', 'relational_aggression_ss_k', 'peer_net_protective_ss_k',
            'relational_victimization_ss_k', 'overt_aggression_ss_k', 'relational_aggression_ss_k',
            'feels_discriminated_teachers_k', 'feels_discriminated_adults_not_school_k', 'feels_discriminated_students_k',
            'feels_unwanted_american_society_k', 'feels_discriminated_americans_k'],

        'School Dynamics': ['disobeys_at_school_k', 'getalong_teachers_k', 'feelsafe_at_school_k', 'feels_smart_k',
            'enjoys_school_k', 'grades_important_k', 'school_environment_ss_k', 'school_involvement_ss_k',
            'school_disengagement_ss_k', 'bad_grades', 'repeated_grade', 'grades_dropped', 'school_detension_suspension',
            'child_newschool_p', 'finds_schoolboring_k'],

        'Adverse Life Events': ['g_lifeevents_ss_k', 'b_lifeevents_ss_k', 'b_lifeevents_affected_ss_k',
            'experienced_crime_p', 'g_lifeevents_ss_p', 'b_lifeevents_ss_p', 'b_lifeevents_affected_ss_p',
            'car_accident_hurt_p', 'big_accident_need_treatment_p', 'fire_victim_p', 'natural_disaster_victim_p',
            'terrorism_victim_p', 'war_death_witness_p', 'stabbing_shooting_witness_p',
            'stabbing_shooting_victim_community_p', 'stabbing_shooting_victim_home_p', 'beating_victim_home_p',
            'stranger_threatened_child_victim_p', 'family_threatened_child_victim_p', 'adult_family_fighting_victim_p',
            'domestic_child_sexually_abuse_victim_p', 'foreign_child_sexually_abuse_victim_p',
            'peer_child_sexually_abuse_victim_p', 'sudden_death_in_family_p'],


# *****Psycho Child - Bridge in Psycho Parent


        'Cognitive Task Outcomes': ['tb_picvocab', 'tb_picture', 'tb_reading', 'tb_flanker', 'tb_list',
            'tb_cardsort', 'tb_pattern', 'gd_safebets', 'gd_riskybets',
            'ravlt_s_total', 'ravlt_s_repitition', 'ravlt_s_intrusions',
            'ravlt_l_total', 'ravlt_l_repitition', 'ravlt_l_intrusions',
            'nb_correct_nt', 'nb_correct_mrt', 'nb_correct_nt_2back', 'nb_correct_mrt_2back',
            'nb_correct_nt_pos', 'nb_correct_mrt_pos', 'nb_correct_nt_neg', 'nb_correct_mrt_neg',
            'nb2_accuracy_pos', 'nb2_resp_bias_pos', 'nb2_D_prime_pos',
            'nb2_accuracy_neg', 'nb2_resp_bias_neg', 'nb2_D_prime_neg',
            'sst_ssrt_mean_est', 'sst_ssrt_int_est', 'sst_acceptable_performance',
            'mid_mrt_smrw', 'mid_mrt_lgrw', 'mid_total_payout',
            'mid_acceptable_performance', 'mid_num_trials',
            'lmt_accuracy', 'lmt_correct_nt', 'lmt_mrt',
            'lmt_correct_mrt', 'lmt_efficiency'],

        'Sleep Problems': ['difficulty_goingtosleep_p', 'difficulty_wakingup_p', 'nightmares_p', 'fallsleeptime',
            'wakeuptime', 'wakesleepcalc', 'chronotype'],

        'Medical/Somatic Problems': ['medhx_p', 'medhx_doctorvisit_p', 'medhx_emergencyroom_p', 'pain_last_month_k',
            'seriously_sick_lastyear_k', 'body_aches_p', 'frequent_headaches_p', 'nausea_p', 'eye_problems_p',
            'skin_problems_p', 'frequent_stomachaches_p', 'vomiting_p', 'constipated_p', 'bad_toilet_habits_p',
            'wets_bed_p'],

        'Externalizing': ['argues_p', 'stubborn_p', 'temper_tantrums_p', 'bullies_others_p',
            'destroys_own_things_p', 'destroys_others_things_p', 'disobedient_home_p', 'disobedient_school_p',
            'breaks_rules_p', 'fights_p', 'lying_p', 'attacks_others_p', 'steals_home_p', 'steals_outside_p',
            'threatens_others_p', 'whines_p', 'demands_attention_p'],

        'ADHD': ['cant_concentrate_p', 'doesnt_finish_p', 'hyperactive_p', 'impulsive_p', 'easily_distracted_p'],


        'Anxiety': ['social_fear_present_PK', 'worries_p', 'clings_to_adults_p', 'nervous_general_p',
            'nervous_twitching_p', 'fears_excl_school_p', 'fears_school_p', 'fears_being_bad_p', 'paranoid_p'],

        'Other Personality Features': ['easily_offended_p', 'blames_others_p', 'sociable_p', 'school_excitement_p',
            'not_critical_others_p', 'scared_dark_p', 'disagreeable_p', 'goal_continuity_p', 'up_negative_urgency_ss_k',
            'up_lackofplanning_ss_k', 'up_sensationseeking_ss_k', 'up_positiveurgency_ss_k', 'up_lackperseverance_ss_k',
            'bis_behav_inhibition_ss_k', 'bis_reward_responsive_ss_k', 'bis_drive_ss_k', 'bis_funseeking_ss_k',
            'loquacious_p', 'bragadocious_p', 'easily_jealous_p', 'wishes_other_sex_p', 'easily_embarrassed_p',
            'secretive_p', 'perfectionist_p', 'sex_orient_y'],

        'Child Mood Issues': ['enjoys_little_p', 'sad_p', 'suicidal_p', 'guilty_p', 'withdrawn_p'],

        'Child Delta': ["delta_anxdisord_D_p",
            "delta_adhd_D_p", "delta_not_liked_p", "delta_doesnt_get_along_p",
            "delta_family_conflict_ss_p", "delta_family_conflict_ss_k", "delta_bad_grades", "delta_social_problems_D_p",
            "delta_somatic_problems_D_p", "delta_adhd_D_p",
            "delta_bad_diet_p", "delta_atschool_total_problems_ss_t", "delta_b_lifeevents_ss_p"],


# Parent Bridge>>>


        'Parent Delta': [
            'delta_parent_sleep_trouble_p', 'delta_parent_worries_about_family_p',
            'delta_parent_friendship_trouble_p', 'delta_parent_poor_work_performance_p',
            'delta_parent_aches_pains_p', 'delta_parent_not_liked_by_others_p',
            'delta_parent_feels_overwhelmed_p', 'delta_parent_feels_unloved_p',
            'delta_parent_bad_family_relationship_p', 'delta_parent_worries_about_future_p',
            'delta_parent_worries_a_lot_p', 'delta_parent_depressed_p',
            'delta_parent_concentration_trouble_p', 'delta_parent_stubborn_irritable_p',
            'delta_parent_drinks_too_much_p', 'delta_parent_financial_failures_p',
            'delta_parent_meets_family_duties_p', 'delta_parent_planning_trouble_p',
            'delta_parent_bad_relationships_p', 'delta_parent_drug_use_p'
        ],

        'Parent Mood Issues': [
            'parent_cries_a_lot_p', 'parent_lonely_p', 'parent_feels_unloved_p', 'parent_paranoid_p',
            'parent_feels_inferior_p', 'parent_depressed_p', 'parent_feels_unsuccessful_p',
            'parent_tired_no_reason_p', 'parent_low_energy_p', 'parent_sleep_trouble_p',
            'parent_enjoys_little_p', 'parent_sudden_mood_changes_p', 'parent_suicidal_thoughts_p', 'parent_happy_person_p'
        ],

        'Parent Anxiety': [
            'parent_fearful_or_anxious_p', 'parent_specific_fears_p', 'parent_fear_of_bad_thoughts_p',
            'parent_worries_about_future_p', 'parent_worries_about_family_p', 'parent_worries_a_lot_p',
            'parent_relationship_concerns_p'
        ],

        'Parent Cognitive and Attention Issues': [
            'parent_forgetful_p', 'parent_concentration_trouble_p', 'parent_confused_p', 'parent_planning_trouble_p',
            'parent_not_good_at_details_p', 'parent_obsessive_thoughts_p',
            'parent_repeats_acts_p', 'parent_max_effort_p', 'parent_disorganized_p', 'parent_loses_things_p',
            'parent_decision_trouble_p', 'parent_priority_trouble_p'
        ],

        'Parent Personality': [
            'parent_bragging_p', 'parent_honest_p', 'parent_secretive_p', 'parent_stubborn_irritable_p',
            'parent_clumsy_p', 'parent_strange_thoughts_p', 'parent_self_conscious_p', 'parent_uses_opportunities_p',
            'parent_louder_than_others_p', 'parent_yells_a_lot_p', 'parent_shy_or_timid_p', 'parent_restless_p',
            'parent_easily_bored_p', 'parent_hyperactive_p', 'parent_talks_too_much_p', 'parent_avoids_talking_p',
            'parent_prefers_to_be_alone_p', 'parent_no_guilt_p', 'parent_sense_of_fairness_p',
            'parent_high_sleep_duration_p'
        ],


        'Family Drug Use': [
          'hallucinogen_use_history_B_p', 'hallucinogen_current_B_p', 'sedative_hypnotic_anxiolytic_use_B_p',
          'father_alcohol', 'mother_alcohol', 'father_druguse', 'mother_druguse',
          'cigs_during_pregnancy_p', 'alcohol_during_pregnancy_p', 'weed_during_pregnancy_p',
          'cocaine_during_pregnancy_p', 'heroin_during_pregnancy_p',
          'prescriptionmed_pregnancy_p', 'cigs_before_pregnancy_p', 'alcohol_before_pregnancy_p',
          'weed_before_pregnancy_p', 'cocaine_before_pregnancy_p', 'heroin_before_pregnancy_p',
          'drugs_before_pregnancy_p', 'drinksperweek_during_pregnancy_p', 'drugs_during_pregnancy_p',
          'caffeine_during_pregnancy_p', 'parent_tobacco_use_frequency_p', 'parent_drug_use_p', 'parent_drinks_too_much_p',
          'parent_drinks_frequency_p', 'parent_drunk_days_p', 'parent_drug_days_nonmedical_p'],

       'SES & Mobility': ['parent_education', 'parent_income', 'struggle_food_expenses', 'positive_finance_p', 'parent_work_absences_p', 'parent_financial_trouble_p', 'parent_fails_to_pay_debts_p'],

       'Dynamic Cognitive Control Parameters': ['sst_mean_absdelta', 'sst_dG', 'sst_theta1', 'sst_tG', 'sst_mu', 'sst_aG1', 'sst_median_absdelta', 'sst_kappa0', 'sst_gamma0', 'sst_pp', 'sst_aG2', 'sst_aS', 'sst_dS', 'sst_absdeltaCV', 'sst_pdrgCV', 'sst_mean_CV', 'sst_mean_PDR', 'sst_sM', 'sst_absdeltaMax', 'sst_median_ssrt', 'sst_bG', 'sst_betasCV', 'sst_absdeltaRV'],

# ***** Bridge back into Bio Ojective.

}

In [11]:
import pandas as pd
import numpy as np

# Load data for time point 2 from cleaned and extracted dataset
data = pd.read_csv("/content/CLEAN_ABCD_5.1_panel_20250321.csv", low_memory=False)
data = data[data["time"] == 2]

In [12]:
# Check for duplicates in grouping

flattened = list(set(sum(variable_groups.values(), []))) # Get flat list of variables without repeats
with_dupes = sum(variable_groups.values(), []) # Get flat list of variables with repeats

duplicate_set = set()
d_seen = set()

for x in with_dupes:
    if x in d_seen:
        duplicate_set.add(x)
    else:
        d_seen.add(x)

duplicate_set

{'bullied_on_internet_k',
 'delta_adhd_D_p',
 'overt_aggression_ss_k',
 'peer_net_protective_ss_k',
 'relational_aggression_ss_k',
 'relational_victimization_ss_k',
 'relig_importance',
 'religious_service_frequency'}

In [13]:
numeric = data.select_dtypes(include='number')[[f for f in flattened if f in data.columns]] # Get numeric columns
corr_matrix = numeric.corr() # Calculate correlation matrix

In [14]:
# Create variable grouping without duplicates

no_dupes_vars = {}
seen_vars = set()

for v, k in variable_groups.items():
  new_vars = set(k) - seen_vars
  if new_vars:
    no_dupes_vars[v] = list(new_vars)
    seen_vars.update(new_vars)

In [15]:
available = set(sum(variable_groups.values(), [])) # Get set of variables

In [16]:
# Iterate over correlation matrix, creating connections list
# Include only absolute correlations >= threshold

df = corr_matrix
connections_list = []

threshold = 0.25
columns = list(df.columns)

for i in range(0, df.shape[0]):
  for j in range(0, i):
    value = df.iloc[i, j]
    row_name = columns[i]
    col_name = columns[j]

    if row_name not in available or col_name not in available:
      continue

    if abs(value) >= threshold:
      connections_list += [(row_name, col_name, value)]

In [17]:
# Create hierarchy df
hierarchy_df = []

# Initialize vertices array with origin
vertices = [("origin", "origin")]

# Add group vertices + edges first
for v in no_dupes_vars.keys():
  hierarchy_df += [("origin", v)]
  vertices += [(v, v)]

# And then variables vertices + edges
for v, k in no_dupes_vars.items():
  hierarchy_df += [(v, l) for l in k]
  vertices += [(l, v) for l in k]

hierarchy_df

[('origin', 'Residential Characteristics'),
 ('origin', 'Ethnicity/Nationality'),
 ('origin', 'Diet/Nutrition'),
 ('origin', 'Physical Activity/Features'),
 ('origin', 'Technology Use'),
 ('origin', 'Religion'),
 ('origin', 'Family Dynamics & Parenting'),
 ('origin', 'Parent Social Functioning'),
 ('origin', 'Social Relationship Quality'),
 ('origin', 'School Dynamics'),
 ('origin', 'Adverse Life Events'),
 ('origin', 'Cognitive Task Outcomes'),
 ('origin', 'Sleep Problems'),
 ('origin', 'Medical/Somatic Problems'),
 ('origin', 'Externalizing'),
 ('origin', 'ADHD'),
 ('origin', 'Anxiety'),
 ('origin', 'Other Personality Features'),
 ('origin', 'Child Mood Issues'),
 ('origin', 'Child Delta'),
 ('origin', 'Parent Delta'),
 ('origin', 'Parent Mood Issues'),
 ('origin', 'Parent Anxiety'),
 ('origin', 'Parent Cognitive and Attention Issues'),
 ('origin', 'Parent Personality'),
 ('origin', 'Family Drug Use'),
 ('origin', 'SES & Mobility'),
 ('origin', 'Dynamic Cognitive Control Parameters')

In [18]:
# Saving CSVs

hierarchy_pandas_df = pd.DataFrame(hierarchy_df, columns=['from', 'to'])
hierarchy_pandas_df.to_csv('hierarchy_df.csv', index=False)

connections_data_frame = pd.DataFrame(connections_list, columns=['from', 'to', 'value'])
connections_data_frame.to_csv('connections_df.csv', index=False)

vertex_df = pd.DataFrame(vertices, columns=['name', 'group'])
vertex_df.to_csv('vertex_df.csv', index=False)