In [1]:
import os
import seaborn as sns
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np

%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

In [2]:
## RUN THIS CELL ## 
from hbn.constants import Defaults
from hbn.scripts import preprocess_phenotype, make_phenotype_specs
from hbn.data import make_dataset
from hbn.features.feature_selection import phenotype_features

# Preprocess data
preprocess_phenotype.run()

# get specs
make_phenotype_specs.run()

# HARDCODE PARTICIPANTS + FEATURE_SPEC
participants = make_dataset.get_participants(
                            split='all', 
                            disorders=['ADHD-Combined Type', 
                                        'ADHD-Inattentive Type', 
                                        'ADHD-Hyperactive_Impulsive_Type', 
                                        'No_Diagnosis_Given']
                                        )

feature_spec = 'features-Parent_Measures-Interview_of_Emotional_and_Psychological_Function-Intake_Interview-spec.json'

# get data
df = phenotype_features(
                        target_spec=None,
                        feature_spec=os.path.join(Defaults.FEATURE_DIR, feature_spec),
                        participants=participants,
                        preprocess=False,
                        drop_identifiers=False
                        )

# get summary of clinical diagnosis + other demographics
dx = make_dataset.make_summary(save=False)
dx = make_dataset._add_race_ethnicity(dataframe=dx)

# get data from intake interview and merge with clinical summary
df = df.merge(dx, on='Identifiers')


phenotypic data have already been parsed...
phenotypic data have already been parsed...
phenotypic data have already been parsed...
phenotypic data have already been parsed...


created new clinical diagnosis file


writing train and test participants to file for ADHD
writing train and test participants to file for Anxiety_Disorders
writing train and test participants to file for Autism_Spectrum_Disorder
writing train and test participants to file for Bipolar_and_Related_Disorders
writing train and test participants to file for Depressive_Disorders
writing train and test participants to file for Disruptive__Impulse_Control_and_Conduct_Disorders
writing train and test participants to file for Elimination_Disorders
writing train and test participants to file for Feeding_and_Eating_Disorders
could not write out train and test participants for Gender_Dysphoria -- likely too few samples
could not write out train and test participants for Neurocognitive_Disorders -- likely too few samples
writing train and test participants to file for Neurodevelopmental_Disorders
writing train and test participants to file for No_Diagnosis_Given
writing train and test participants to file for No_Diagnosis_Given:_Incomp

writing train and test participants to file for Substance_Related_and_Addictive_Disorders
writing train and test participants to file for Trauma_and_Stressor_Related_Disorders
writing train and test participants to file for ADHD-Combined_Type
writing train and test participants to file for ADHD-Hyperactive_Impulsive_Type
writing train and test participants to file for ADHD-Inattentive_Type
writing train and test participants to file for Acute_Stress_Disorder
writing train and test participants to file for Adjustment_Disorders
writing train and test participants to file for Agoraphobia
could not write out train and test participants for Alcohol_Use_Disorder -- likely too few samples
writing train and test participants to file for Autism_Spectrum_Disorder
could not write out train and test participants for Avoidant_Restrictive_Food_Intake_Disorder -- likely too few samples
writing train and test participants to file for Bipolar_I_Disorder
could not write out train and test participants f

writing train and test participants to file for Encopresis
writing train and test participants to file for Enuresis
could not write out train and test participants for Excoriation_(Skin-Picking)_Disorder -- likely too few samples
could not write out train and test participants for Gender_Dysphoria_in_Adolescents_and_Adults -- likely too few samples
could not write out train and test participants for Gender_Dysphoria_in_Children -- likely too few samples
writing train and test participants to file for Generalized_Anxiety_Disorder
writing train and test participants to file for Intellectual_Disability-Mild
writing train and test participants to file for Intellectual_Disability-Moderate
could not write out train and test participants for Intellectual_Disability-Severe -- likely too few samples
writing train and test participants to file for Intermittent_Explosive_Disorder
writing train and test participants to file for Language_Disorder
writing train and test participants to file for Majo

writing train and test participants to file for Other_Specified_Trauma-_and_Stressor-Related_Disorder
writing train and test participants to file for Panic_Disorder
could not write out train and test participants for Parent-Child_Relational_Problem -- likely too few samples
writing train and test participants to file for Persistent_(Chronic)_Motor_or_Vocal_Tic_Disorder
writing train and test participants to file for Persistent_Depressive_Disorder_(Dysthymia)
writing train and test participants to file for Posttraumatic_Stress_Disorder
writing train and test participants to file for Provisional_Tic_Disorder
could not write out train and test participants for Reactive_Attachment_Disorder -- likely too few samples
could not write out train and test participants for Schizophrenia -- likely too few samples
could not write out train and test participants for Selective_Mutism -- likely too few samples
writing train and test participants to file for Separation_Anxiety
writing train and test pa

spec file and features saved to disk for features-Child_Measures-Cognitive_Testing-NIH_Toolbox
spec file and features saved to disk for features-Child_Measures-Cognitive_Testing-Adaptive_Cognitive_Evaluation
spec file and features saved to disk for features-Child_Measures-Cognitive_Testing-Temporal_Discounting_Task
spec file and features saved to disk for features-Child_Measures-Cognitive_Testing-Wechsler_Intelligence_Scale_for_Children_-_V
spec file and features saved to disk for features-Child_Measures-Cognitive_Testing-Wechsler_Adult_Intelligence_Scale-IV
spec file and features saved to disk for features-Child_Measures-Cognitive_Testing-Kaufman_Brief_Intelligence_Test-II
spec file and features saved to disk for features-Child_Measures-Cognitive_Testing-Wechsler_Individual_Achievement_Test_-_III
spec file and features saved to disk for features-Child_Measures-Language_Tasks-Clinical_Evaluation_of_Language_Fundamentals
spec file and features saved to disk for features-Child_Measures-L

### Basic Demographics

### What is the race breakdown of children with adhd? 

In [3]:
demographics = df.groupby(['DX_01', 'PreInt_Demos_Fam,Child_Race_cat']
                          ).agg({'Identifiers': 'count',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="Identifiers", color="PreInt_Demos_Fam,Child_Race_cat")
fig.show()

### What is the sex breakdown of children with adhd? 
#### largest M/F ratios are combined and hyperactive, but not inattentive

In [4]:
demographics = df.groupby(['DX_01', 'Sex']
                          ).agg({'Identifiers': 'count',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="Identifiers", color="Sex")
fig.show()

### What is the age breakdown of children with adhd? 

In [5]:
demographics = df.groupby(['DX_01', 'Age_bracket']
                          ).agg({'Identifiers': 'count',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="Identifiers", color="Age_bracket")
fig.show()

### How many comorbidities do children with adhd have?
#### Girls have more combordities on average than boys (except for impulsive type)

In [6]:
demographics = df.groupby(['DX_01', 'Sex']
                          ).agg({'comorbidities': 'mean',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="comorbidities", color='Sex')
fig.show()

### How many comorbidities do children with adhd have?
#### Childrenn over10 have more combordities on average than children under10 

In [7]:
demographics = df.groupby(['DX_01', 'Age_bracket']
                          ).agg({'comorbidities': 'mean',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="comorbidities", color='Age_bracket')
fig.show()

### get data from parent intake interview

### What % of children with adhd have parents with adhd? Very few!

### Previous diagnoses
#### Many children with adhd have been previously diagnosed with a psych/learning disorder

In [8]:
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']

for color in colors:
    tmp = df.groupby(['DX_01', color]).agg({'PreInt_TxHx,Past_DX': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp['PreInt_TxHx,Past_DX'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

### about 25% of children with adhd are currently taking psych medication

In [9]:
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({'PreInt_TxHx,psych_meds_cur': 'sum',
                                            'PreInt_TxHx,psych_meds_past': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent_curr'] = tmp['PreInt_TxHx,psych_meds_cur'] / tmp['Identifiers']
    tmp['percent_past'] = tmp['PreInt_TxHx,psych_meds_past'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent_curr', color=color,orientation='v', barmode="group")
    fig.show()

### few children had immunication reactions

In [10]:
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
var = 'immunization_reaction'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_TxHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_TxHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

### 10-20% of children have had food allergies

In [11]:
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
var = 'food_allergy'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_TxHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_TxHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

### Most children have attended an average of 2 schools

In [12]:
colors = ['Sex', 'Age_bracket']
var = 'number_schools'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[1,4])
    fig.show()

### 50-60% of children have an individualized education plan
#### more children over10 with hyperactive/impulsive have an IEP but more children under10 with inattentive have an IEP

In [13]:
colors = ['Sex', 'Age_bracket']
var = 'IEP'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[.1,.7])
    fig.show()

### learning disability?
#### few children with adhd diagnosed with a learning disability

In [14]:
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'learning_disability'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

### neuropsych testing? pretty low numbers ...

In [15]:
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'NeuroPsych'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

### Recent grades (1-excellent, 5-failing)

In [16]:
colors = ['Sex','Age_bracket']
var = 'recent_grades'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[1,3])
    fig.show()

### number of friends

In [17]:
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'number_friends'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[1,4])
    fig.show()

### outside school tutoring
#### 40% of children with inattentive type adhd have outside tutoring

In [18]:
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'tutor'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

### start of puberty
#### girls with adhd are starting puberty a lot earlier than boys - this tracks with children without a diagnosis. exception is boys with hyperactive adhd

In [19]:

colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
var = 'puberty_age'

for color in colors:
    tmp = df.groupby(['DX_01',color]).agg({f'PreInt_DevHx,{var}': 'mean',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_DevHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y=f'PreInt_DevHx,{var}', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[8,12])
    fig.show()

### girls with hyperactive/impulsive adhd are starting menstruation earlier than other subtypes

In [20]:
var = 'menstruation_age'

tmp = df.groupby(['DX_01']).agg({f'PreInt_DevHx,{var}': 'mean',
                               'Identifiers': 'count'}
                                    ).reset_index()

fig = px.bar(tmp, x="DX_01", y=f'PreInt_DevHx,{var}',orientation='v', barmode="group")
fig.update_yaxes(range=[10,12])
fig.show()