This file creates dataframes specifying an undirected weighted graph with edges representing calculated SHAP values between variables, in the format that `graph_from_data_frame()` in the R library igraph expects.

More specifically, the vertex dataframe specifies the vertices and corresponding data (group), and the connections dataframe specifices the edges connecting vertices.

In [None]:
import pandas as pd

# Calculated SHAP values for parents
edge_values = [
  ("parent_worries_a_lot_p", "parent_aggressive_D_p", 5.4972e-03),
  ("parent_attention_D_p", "parent_aggressive_D_p", 5.4311e-03),
  ("parent_worries_a_lot_p", "parent_avoidant_person_D_p", 4.8946e-03),
  ("parent_worries_a_lot_p", "parent_confused_p", 3.6692e-03),
  ("parent_obsessive_thoughts_p", "parent_aggressive_D_p", 3.3266e-03),
  ("parent_aggressive_D_p", "parent_somatic_problems_D_p", 3.2423e-03),
  ("parent_confused_p", "parent_somatic_problems_D_p", 3.2090e-03),
  ("parent_confused_p", "parent_avoidant_person_D_p", 3.0738e-03),
  ("parent_worries_a_lot_p", "parent_attention_D_p", 3.0641e-03),
  ("parent_obsessive_thoughts_p", "parent_attention_D_p", 3.0040e-03),
  ("parent_worries_a_lot_p", "parent_stubborn_irritable_p", 2.7371e-03),
  ("parent_worries_a_lot_p", "parent_somatic_problems_D_p", 2.7319e-03),
  ("parent_aggressive_D_p", "parent_avoidant_person_D_p", 2.7225e-03),
  ("parent_attention_D_p", "parent_somatic_problems_D_p", 2.3812e-03),
  ("parent_somatic_problems_D_p", "parent_avoidant_person_D_p", 2.2592e-03),
  ("parent_stubborn_irritable_p", "parent_somatic_problems_D_p", 2.1331e-03),
  ("delta_parent_feels_unloved_p", "parent_aggressive_D_p", 2.1303e-03),
  ("sad_p", "parent_somatic_problems_D_p", 2.1287e-03),
  ("parent_uses_opportunities_p", "parent_avoidant_person_D_p", 2.0250e-03),
  ("sad_p", "parent_confused_p", 2.0007e-03),
  ("delta_parent_stubborn_irritable_p", "parent_aggressive_D_p", 1.8830e-03),
  ("sad_p", "parent_aggressive_D_p", 1.8818e-03),
  ("parent_confused_p", "parent_aggressive_D_p", 1.8471e-03),
  ("sad_p", "delta_parent_feels_unloved_p", 1.8264e-03),
  ("sad_p", "parent_avoidant_person_D_p", 1.8094e-03),
  ("parent_obsessive_thoughts_p", "parent_avoidant_person_D_p", 1.7878e-03),
  ("parent_restless_p", "parent_avoidant_person_D_p", 1.7240e-03),
  ("parent_attention_D_p", "parent_avoidant_person_D_p", 1.6884e-03),
  ("parent_obsessive_thoughts_p", "parent_restless_p", 1.6586e-03),
  ("sad_p", "parent_obsessive_thoughts_p", 1.6395e-03),
]

# Create flat list of variables
shap_var_list = list(set(sum([[a, b] for a, b, _ in edge_values], [])))

In [None]:
#@title Variables

variable_groups = {
    'Family Dynamics & Parenting': ['p_comm_cohesion_ss', 'p_comm_ctrl_ss', 'p_comm_collective_efficacy_ss',
            'fam_fight_often_k', 'fam_no_open_anger_k', 'fam_throw_things_k', 'fam_no_lose_temps_k',
            'fam_criticize_often_k', 'fam_hit_each_other_k', 'fam_keep_peace_k', 'fam_try_one_up_k',
            'fam_no_raise_voices_k', 'family_not_talk_aboutfeelings_p', 'family_peaceful_p',
            'family_open_discussing_anything_p', 'family_lose_temper_rare_p', 'family_believe_not_raise_voice_p',
            'frequent_family_conflict_p', 'family_conflict_ss_p', 'family_expression_ss_p', 'family_intellectual_ss_p',
            'family_activities_ss_p', 'family_organisation_ss_p', 'parents_argue_more_p', 'family_emotionprob_p',
            'parents_divorced_p', 'death_in_family_p', 'family_move_p', 'family_conflict_ss_k',
            'parent_monitoring_ss_k', 'marital_status', 'parent_age', 'sex_P', 'num_brothers_p', 'num_sisters_p',
            'religious_service_frequency', 'relig_importance'],

        'SES & Mobility': ['parent_education', 'parent_income', 'struggle_food_expenses', 'couldnt_afford_phone',
            'couldnt_afford_rent_mortgage', 'evicted', 'gas_electric_oil_turned_off', 'parent_work_absences_p', 'parent_financial_trouble_p', 'parent_fails_to_pay_debts_p'
        ],

        'Residential Characteristics': ['neighborhood_safety_ss_p', 'neighborhood_safe_y', 'resid_density', 'resid_walkability', 'resid_prox_roads', 'resid_crime_tot', 'resid_crime_violent',
            'resid_crime_drug', 'resid_crime_dui', 'resid_lead_risk_poverty', 'resid_lead_risk_houses_perc',
            'resid_lead_risk', 'resid_no2_avg', 'resid_pm25_avg', 'resid_sexism', 'resid_sex_orient_bias',
            'resid_immigrant_bias', 'resid_racism', 'L_site_id'],

        'Child School Dynamics': ['disobeys_at_school_k', 'getalong_teachers_k', 'feelsafe_at_school_k', 'feels_smart_k',
            'enjoys_school_k', 'grades_important_k', 'school_environment_ss_k', 'school_involvement_ss_k',
            'school_disengagement_ss_k', 'bad_grades', 'repeated_grade', 'grades_dropped', 'school_detension_suspension',
            'child_newschool_p', 'finds_schoolboring_k'],

        'Child Mood': ['enjoys_little_p', 'sad_p', 'suicidal_p', 'guilty_p', 'withdrawn_p'],

        'Other Psychopathology': ['obsessions_present_B_p', 'poor_eye_contact_B_p','nightmares_B_p',
            'parent_elevated_mood_B_p', 'parent_excessive_worry_B_p', 'parent_lying_B_p', 'parent_social_anxiety_disorder_B_p',
            'parent_sleep_problem_B_p', 'parent_bulimia_B_p',
            'parent_anxdisord_D_p', 'parent_antisocial_D_p', 'parent_hyperactive_D_p',
            'parent_intrusive_thoughts_D_p', 'parent_avoidant_person_D_p',
            'd_grandfather_dep', 'd_grandmother_dep', 'm_grandfather_dep', 'm_grandmother_dep', 'father_mania', 'mother_mania',
            'father_trouble', 'parent_hospitalized_emo', 'parent_therapy_emo'],

        'Somatic Problems': [
            'parent_somatic_problems_D_p'
        ],

        'Personality': [
            'parent_bragging_p', 'parent_honest_p', 'parent_secretive_p', 'parent_stubborn_irritable_p',
            'parent_clumsy_p', 'parent_strange_thoughts_p', 'parent_self_conscious_p', 'parent_uses_opportunities_p',
            'parent_louder_than_others_p', 'parent_yells_a_lot_p', 'parent_shy_or_timid_p', 'parent_restless_p',
            'parent_easily_bored_p', 'parent_hyperactive_p', 'parent_talks_too_much_p', 'parent_avoids_talking_p',
            'parent_prefers_to_be_alone_p', 'parent_no_guilt_p', 'parent_sense_of_fairness_p',
            'parent_high_sleep_duration_p', 'parent_opposite_sex_wish_p'
        ],

        'Anxiety': [
            'parent_fearful_or_anxious_p', 'parent_specific_fears_p', 'parent_fear_of_bad_thoughts_p',
            'parent_worries_about_future_p', 'parent_worries_about_family_p', 'parent_worries_a_lot_p',
            'parent_relationship_concerns_p'
        ],

         'Impulsivity and Behavior Regulation': [
            'parent_impulsive_p', 'parent_risky_decisions_p', 'parent_drives_too_fast_p', 'parent_tardy_often_p',
            'parent_money_management_trouble_p', 'parent_priority_trouble_p', 'parent_behavior_changeable_p',
            'parent_hot_temper_p', 'parent_attention_seeking_p', 'parent_destroys_own_things_p',
            'parent_destroys_others_things_p', 'parent_doesnt_finish_tasks_p', 'parent_strange_behavior_p',
            'parent_illegal_behavior_p','parent_doesnt_eat_well_p', 'parent_self_harm_p',
        ],

        'Social Functioning': [
            'parent_bad_relationships_p', 'parent_bad_family_relationship_p', 'parent_not_liked_by_others_p',
            'parent_friendship_trouble_p', 'parent_prefers_older_people_p', 'parent_associates_with_trouble_p',
            'parent_bad_opposite_sex_relationship_p', 'parent_meets_family_duties_p', 'parent_clowns_or_shows_off_p',
            'parent_teases_others_p', 'parent_stands_up_rights_p'
        ],

        'Cognitive and Attention Issues': [
            'parent_attention_D_p',
            'parent_forgetful_p', 'parent_concentration_trouble_p', 'parent_confused_p', 'parent_planning_trouble_p',
            'parent_not_good_at_details_p', 'parent_obsessive_thoughts_p',
            'parent_repeats_acts_p', 'parent_max_effort_p', 'parent_disorganized_p', 'parent_loses_things_p',
            'parent_decision_trouble_p', 'parent_priority_trouble_p', 'parent_sees_things_p', 'parent_hears_voices_p', 'parent_speech_problems_p',
        ],


        'Delta Psychopathology': [
            'delta_parent_sleep_trouble_p', 'delta_parent_worries_about_family_p',
            'delta_parent_friendship_trouble_p', 'delta_parent_poor_work_performance_p',
            'delta_parent_aches_pains_p', 'delta_parent_not_liked_by_others_p',
            'delta_parent_feels_overwhelmed_p', 'delta_parent_feels_unloved_p',
            'delta_parent_bad_family_relationship_p', 'delta_parent_worries_about_future_p',
            'delta_parent_worries_a_lot_p',
            'delta_parent_concentration_trouble_p', 'delta_parent_stubborn_irritable_p',
            'delta_parent_drinks_too_much_p', 'delta_parent_financial_failures_p',
            'delta_parent_meets_family_duties_p', 'delta_parent_planning_trouble_p',
            'delta_parent_bad_relationships_p', 'delta_parent_drug_use_p'
        ],

        'Family Drug Use': [
          'hallucinogen_use_history_B_p', 'hallucinogen_current_B_p', 'sedative_hypnotic_anxiolytic_use_B_p',
          'father_alcohol', 'mother_alcohol', 'father_druguse', 'mother_druguse',
          'cigs_during_pregnancy_p', 'alcohol_during_pregnancy_p', 'weed_during_pregnancy_p',
          'cocaine_during_pregnancy_p', 'heroin_during_pregnancy_p',
          'prescriptionmed_pregnancy_p', 'cigs_before_pregnancy_p', 'alcohol_before_pregnancy_p',
          'weed_before_pregnancy_p', 'cocaine_before_pregnancy_p', 'heroin_before_pregnancy_p',
          'drugs_before_pregnancy_p', 'drinksperweek_during_pregnancy_p', 'drugs_during_pregnancy_p',
          'caffeine_during_pregnancy_p', 'parent_tobacco_use_p','parent_alcohol_excess_p', 'parent_alcohol_freq_p',
          'parent_alcohol_days_drunk_p', 'parent_drug_days_p'],

        'Ethnicity/Nationality': ['desc_african_AFR_B', 'desc_native_american_AMR_B', 'desc_alaska_native_AMR_B',
            'desc_chinese_EAS_B', 'desc_japanese_EAS_B', 'desc_korean_EAS_B', 'desc_vietnamese_EAS_B',
            'desc_european_EUR_B', 'desc_asian_indian_SAS_B', 'desc_other_south_asian_SAS_B', 'desc_latin_B'
            'pc_gene_aces1', 'pc_gene_aces2', 'pc_gene_aces3', 'pc_gene_aces4',
            'pc_gene_aces5', 'pc_gene_aces6', 'pc_gene_aces7', 'pc_gene_aces8',
            'pc_gene_aces9', 'pc_gene_aces10', 'pc_gene_aces11', 'pc_gene_aces12',
            'pc_gene_aces13', 'pc_gene_aces14', 'pc_gene_aces15', 'pc_gene_aces16',
            'pc_gene_aces17', 'pc_gene_aces18', 'pc_gene_aces19', 'pc_gene_aces20',
            'pc_gene_aces21', 'pc_gene_aces22', 'pc_gene_aces23', 'pc_gene_aces24',
            'pc_gene_aces25', 'pc_gene_aces26', 'pc_gene_aces27', 'pc_gene_aces28',
            'pc_gene_aces29', 'pc_gene_aces30', 'pc_gene_aces31', 'pc_gene_aces32'],

        'Child/Parent Adverse Life Events': ['g_lifeevents_ss_k', 'b_lifeevents_ss_k', 'b_lifeevents_affected_ss_k',
            'experienced_crime_p', 'g_lifeevents_ss_p', 'b_lifeevents_ss_p', 'b_lifeevents_affected_ss_p',
            'car_accident_hurt_p', 'big_accident_need_treatment_p', 'fire_victim_p', 'natural_disaster_victim_p',
            'terrorism_victim_p', 'war_death_witness_p', 'stabbing_shooting_witness_p',
            'stabbing_shooting_victim_community_p', 'stabbing_shooting_victim_home_p', 'beating_victim_home_p',
            'stranger_threatened_child_victim_p', 'family_threatened_child_victim_p', 'adult_family_fighting_victim_p',
            'domestic_child_sexually_abuse_victim_p', 'foreign_child_sexually_abuse_victim_p',
            'peer_child_sexually_abuse_victim_p', 'sudden_death_in_family_p'],

        'Child ADHD': ['cant_concentrate_p'],

        'Externalising': [
            'parent_aggressive_D_p', 'parent_external_D_p',
        ],

        'Child Other Personality Features': ['easily_offended_p', 'blames_others_p', 'sociable_p', 'school_excitement_p',
            'not_critical_others_p', 'scared_dark_p', 'disagreeable_p', 'goal_continuity_p', 'up_negative_urgency_ss_k',
            'up_lackofplanning_ss_k', 'up_sensationseeking_ss_k', 'up_positiveurgency_ss_k', 'up_lackperseverance_ss_k',
            'bis_behav_inhibition_ss_k', 'bis_reward_responsive_ss_k', 'bis_drive_ss_k', 'bis_funseeking_ss_k',
            'loquacious_p', 'bragadocious_p', 'easily_jealous_p', 'wishes_other_sex_p', 'easily_embarrassed_p',
            'secretive_p', 'perfectionist_p', 'sex_orient_y'
          ],

        'Child Diet/Nutrition': ['fruit_intake', 'vegetable_intake', 'protein_sources_intake', 'legume_intake',
        'added_sugar', 'sugary_beverage_freq', 'dairy_intake', 'whole_grain_intake', 'total_calories',
        'protein_intake', 'carbohydrate_intake', 'fiber_intake', 'sodium_intake', 'potassium_intake',
        'total_sugar', 'saturated_fat', 'bad_diet_p'],

}

In [None]:
# Create variable grouping without duplicates
no_dupes_vars = {}
seen_vars = set()

for v, k in variable_groups.items():
  new_vars = set(k) - seen_vars
  if new_vars:
    no_dupes_vars[v] = list(new_vars)
    seen_vars.update(new_vars)

# Create inverse mapping from variables to groups
inv_map = {}

for v, k in no_dupes_vars.items():
  for l in k:
    inv_map[l] = v

In [None]:
# Iterate over edge values to create connections list

connections_list = []
missing = set()

for from_var, to_var, value in edge_values:
  if from_var not in inv_map:
    missing.add(from_var)
    continue
  if to_var not in inv_map:
    missing.add(to_var)
    continue
  connections_list += [(from_var, to_var, value)]

print(missing) # Check to see if any variables were missed in the process

set()


In [None]:
# Create list of all groups

shap_var_map = []

for v in shap_var_list:
  shap_var_map += [(v, inv_map[v])]

In [None]:
# Create vertex df
vertices = []

# Add variables vertices + edges
for v, group in shap_var_map:
  vertices += [(v, group)]

In [None]:
# Save DFs
connections_data_frame = pd.DataFrame(connections_list, columns=['from', 'to', 'value'])
connections_data_frame.to_csv('connections_df.csv', index=False)

vertex_df = pd.DataFrame(vertices, columns=['name', 'group'])
vertex_df.to_csv('vertex_df.csv', index=False)