In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# initalize file paths
metrics_file = "/Users/armanimanov/Downloads/cons_stat.csv"
subjects_file = "/Users/armanimanov/Downloads/abcd_p_demo.csv"

In [3]:
tract_metrics = pd.read_csv(metrics_file)
demo_data = pd.read_csv(subjects_file)

tract_metrics['src_subject_id'] = tract_metrics['src_subject_id'].str.replace('sub-', '')
demo_data['src_subject_id'] = demo_data['src_subject_id'].str.replace('NDAR_', 'NDAR')

# remove rows with eventname column not equal to baseline_year_1_arm_1
demo_data = demo_data[demo_data['eventname'] == 'baseline_year_1_arm_1']

merged_data = pd.merge(tract_metrics, demo_data[["src_subject_id", "demo_brthdat_v2", "demo_sex_v2"]], on="src_subject_id")
merged_data['demo_sex_v2'] = merged_data['demo_sex_v2'].map({1.0: 'M', 2.0: 'F'})


In [4]:
demo_data

Unnamed: 0,src_subject_id,eventname,demoi_p_select_language___1,demo_prim,demo_brthdat_v2,demo_ed_v2,demo_adopt_agex_v2,demo_adopt_agex_v2_bl_dk,demo_sex_v2,demo_gender_id_v2,...,demo_nat_lang_3_yrs_other_p_14,demo_nat_lang_3_p,demo_nat_lang_3_yrs_eng_p___1,demo_nat_lang_3_yrs_eng_p___2,demo_nat_lang_3_yrs_eng_p___3,demo_nat_lang_3_yrs_eng_p___4,demo_nat_lang_3_yrs_eng_p___5,demo_nat_lang_3_yrs_eng_p___6,race_ethnicity,acs_raked_propensity_score
0,NDARINV003RTV85,baseline_year_1_arm_1,0,1.0,10.0,5.0,,,2.0,2.0,...,,,,,,,,,1.0,466.092707
4,NDARINV005V6D2C,baseline_year_1_arm_1,1,1.0,10.0,4.0,,,2.0,2.0,...,,,,,,,,,3.0,520.488325
8,NDARINV007W6H7B,baseline_year_1_arm_1,0,2.0,10.0,4.0,,,1.0,1.0,...,,,,,,,,,1.0,479.185338
11,NDARINV00BD7VDC,baseline_year_1_arm_1,0,1.0,9.0,4.0,,,1.0,1.0,...,,,,,,,,,1.0,414.643009
14,NDARINV00CY2MDM,baseline_year_1_arm_1,0,1.0,10.0,5.0,,,1.0,1.0,...,,,,,,,,,1.0,1433.061575
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48785,NDARINVZZNX6W2P,baseline_year_1_arm_1,0,1.0,10.0,5.0,,,1.0,1.0,...,,,,,,,,,1.0,593.431278
48790,NDARINVZZPKBDAC,baseline_year_1_arm_1,0,2.0,9.0,4.0,,,2.0,2.0,...,,,,,,,,,1.0,568.508011
48795,NDARINVZZZ2ALR6,baseline_year_1_arm_1,0,2.0,10.0,5.0,,,2.0,2.0,...,,,,,,,,,5.0,164.792483
48800,NDARINVZZZNB0XC,baseline_year_1_arm_1,0,1.0,9.0,3.0,,,2.0,2.0,...,,,,,,,,,3.0,1119.800572


In [None]:
merged_data

In [None]:
pd.DataFrame.to_csv(merged_data, "/Users/armanimanov/Downloads/merged_data.csv", index=False)

### Mean Tract Length, seperated by age and sex

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# mean length vs age
sns.boxplot(x="demo_brthdat_v2", y="mean length(mm)", hue="demo_sex_v2", data=merged_data, ax=ax1)
ax1.set_title("Mean Tract Length by Age and Sex")
ax1.set_xlabel("Age (years)")
ax1.set_ylabel("Mean Length")

# total vol vs age
sns.boxplot(x="demo_brthdat_v2", y="total volume(mm^3)", hue="demo_sex_v2", data=merged_data, ax=ax2)
ax2.set_title("Total Tract Volume by Age and Sex")
ax2.set_xlabel("Age (years)")
ax2.set_ylabel("Total Volume")

plt.tight_layout()
plt.show()

### Mean Tract Length, seperated by sex

In [5]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=1)

# FEMALE
fig.add_trace(go.Box(
    x=['F'] * len(merged_data[merged_data['demo_sex_v2'] == 'F']['mean length(mm)']),
    y=merged_data[merged_data['demo_sex_v2'] == 'F']['mean length(mm)'],
    name='F',
    marker_color='rgb(229,134,6)',
    fillcolor='rgb(229,134,6)',
    line=dict(color='rgb(50,50,50)'),
    boxpoints='outliers',
    jitter=0.1,
    pointpos=0,
    width=0.5,
    showlegend=False),
    row=1, col=1)

# MALE
fig.add_trace(go.Box(
    x=['M'] * len(merged_data[merged_data['demo_sex_v2'] == 'M']['mean length(mm)']),
    y=merged_data[merged_data['demo_sex_v2'] == 'M']['mean length(mm)'],
    name='M',
    marker_color='rgb(29,90,120)',
    fillcolor='rgb(29,90,120)',
    line=dict(color='rgb(0,0,0)'),
    boxpoints='outliers',
    jitter=0.1,
    pointpos=0,
    width=0.5,
    showlegend=False),
    row=1, col=1)

fig.update_layout(
    xaxis=dict(title="Sex", title_font=dict(size=18),tickfont=dict(size=18)),
    yaxis=dict(title="Mean Length (mm)", title_font=dict(size=18), gridcolor='lightgray', gridwidth=1, zerolinecolor='lightgray', tickfont=dict(size=18)),
    plot_bgcolor='white',
    boxmode='group',
    boxgroupgap=0.1,
    height=400,
    width=300,
    margin=dict(l=50, r=50, t=10, b=50),
)

fig.show()
