# Generate paper figures

Run this notebook to reproduce all the figures presented in the paper

### Libray imports and data loading

In [None]:
# Import 
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


#  Only sociodemograph data
sociodemograph_SVC_sex = './results_paper/sociodemograph_SVC_sex.csv'
sociodemograph_SVC_ADHD = './results_paper/sociodemograph_SVC_adhd.csv'
sociodemograph_RF_sex = './results_paper/sociodemograph_RF_sex.csv'
sociodemograph_RF_ADHD = './results_paper/sociodemograph_RF_adhd.csv'
sociodemograph_XGB_sex = './results_paper/sociodemograph_XGB_sex.csv'
sociodemograph_XGB_ADHD = './results_paper/sociodemograph_XGB_adhd.csv'

df_soc_SVC_sex = pd.read_csv(sociodemograph_SVC_sex)
df_soc_SVC_ADHD = pd.read_csv(sociodemograph_SVC_ADHD)
df_soc_RF_sex = pd.read_csv(sociodemograph_RF_sex)
df_soc_RF_ADHD = pd.read_csv(sociodemograph_RF_ADHD)
df_soc_XGB_sex = pd.read_csv(sociodemograph_XGB_sex)
df_soc_XGB_ADHD = pd.read_csv(sociodemograph_XGB_ADHD)

# sociodemograph + connectivity data
sociodemograph_conn_SVC_sex = './results_paper/sociodemograph_conn_SVC_sex.csv'
sociodemograph_conn_SVC_ADHD = './results_paper/sociodemograph_conn_SVC_adhd.csv'
sociodemograph_conn_RF_sex = './results_paper/sociodemograph_conn_RF_sex.csv'
sociodemograph_conn_RF_ADHD = './results_paper/sociodemograph_conn_RF_adhd.csv'
sociodemograph_conn_XGB_sex = './results_paper/sociodemograph_conn_XGB_sex.csv'
sociodemograph_conn_XGB_ADHD = './results_paper/sociodemograph_conn_XGB_adhd.csv'

df_soc_conn_SVC_sex = pd.read_csv(sociodemograph_conn_SVC_sex)
df_soc_conn_SVC_ADHD = pd.read_csv(sociodemograph_conn_SVC_ADHD)
df_soc_conn_RF_sex = pd.read_csv(sociodemograph_conn_RF_sex)
df_soc_conn_RF_ADHD = pd.read_csv(sociodemograph_conn_RF_ADHD)
df_soc_conn_XGB_sex = pd.read_csv(sociodemograph_conn_XGB_sex)
df_soc_conn_XGB_ADHD = pd.read_csv(sociodemograph_conn_XGB_ADHD)

# single-output and manual feature selection
#file 4_3_SVC_sex <- conn_manual_single_SVC_sex.csv
#[...]
#file 4_3_SVC_ADHD <- sociodemograph_manual_single_SVC_adhd.csv

conn_manual_single_SVC_sex = './results_paper/conn_manual_single_SVC_sex.csv'
conn_manual_single_RF_sex = './results_paper/conn_manual_single_RF_sex.csv'
conn_manual_single_XGB_sex = './results_paper/conn_manual_single_XGB_sex.csv'
sociodemograph_manual_single_SVC_ADHD = './results_paper/sociodemograph_manual_single_SVC_adhd.csv'
sociodemograph_manual_single_RF_ADHD = './results_paper/sociodemograph_manual_single_RF_adhd.csv'
sociodemograph_manual_single_XGB_ADHD = './results_paper/sociodemograph_manual_single_XGB_adhd.csv'

df_conn_manual_single_SVC_sex = pd.read_csv(conn_manual_single_SVC_sex)
df_conn_manual_single_RF_sex = pd.read_csv(conn_manual_single_RF_sex)
df_conn_manual_single_XGB_sex = pd.read_csv(conn_manual_single_XGB_sex)
df_soc_manual_single_SVC_ADHD = pd.read_csv(sociodemograph_manual_single_SVC_ADHD)
df_soc_manual_single_RF_ADHD = pd.read_csv(sociodemograph_manual_single_RF_ADHD)
df_soc_manual_single_XGB_ADHD = pd.read_csv(sociodemograph_manual_single_XGB_ADHD)


# single-output and manual+automatic feature selection
#file 4_4_SVC_sex_mista <- conn_mixed_single_SVC_sex.csv
#file 4_4_RF_ADHD_mista <- sociodemograph_mixed_single_RF_adhd.csv

conn_mixed_single_SVC_sex = './results_paper/conn_mixed_single_SVC_sex.csv'
sociodemograph_mixed_single_RF_ADHD = './results_paper/sociodemograph_mixed_single_RF_adhd.csv'

df_conn_mixed_single_SVC_sex = pd.read_csv(conn_mixed_single_SVC_sex)
df_soc_mixed_single_RF_ADHD = pd.read_csv(sociodemograph_mixed_single_RF_ADHD)

### Figure 2

F1 score comparison of SVC, Random Forest, and XGBoost classification algorithms. Models were implemented using a multi-output classifier, leveraging only sociodemographic data to predict sex (orange) and ADHD (blue) targets.

In [None]:
# Sex
fig, axes = plt.subplots(1, 3, figsize=(6, 6), share_y=True) 
sns.boxplot(y='F1_sex_score', data=df_soc_SVC_sex, color='orange', ax=axes[0])
sns.stripplot(y='F1_sex_score', data=df_soc_SVC_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[0])
axes[0].set_ylabel('F1 score [adim]', fontsize=14)
axes[0].set_xlabel('SVC', fontsize=14)
axes[0].grid(axis='y', linestyle='--', alpha=0.7)
axes[0].tick_params(axis='x', labelsize=12)
axes[0].tick_params(axis='y', labelsize=12)

sns.boxplot(y='F1_sex_score', data=df_soc_RF_sex, color='orange', ax=axes[1])
sns.stripplot(y='F1_sex_score', data=df_soc_RF_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[1])
axes[1].set_xlabel('RF', fontsize=14)
axes[1].grid(axis='y', linestyle='--', alpha=0.7)
axes[1].tick_params(axis='x', labelsize=12)
axes[1].tick_params(axis='y', labelsize=12)

sns.boxplot(y='F1_sex_score', data=df_soc_XGB_sex, color='orange', ax=axes[2])
sns.stripplot(y='F1_sex_score', data=df_soc_XGB_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[2])
axes[2].set_xlabel('XGB', fontsize=14)
axes[2].grid(axis='y', linestyle='--', alpha=0.7)
axes[2].tick_params(axis='x', labelsize=12)
axes[2].tick_params(axis='y', labelsize=12)

axes[2].set_ylim(0.0, 0.8)
fig.suptitle('Sex', fontsize=16)
plt.tight_layout()
plt.show()


# ADHD
fig, axes = plt.subplots(1, 3, figsize=(6, 6), share_y=True) 
sns.boxplot(y='F1_adhd_score', data=df_soc_SVC_ADHD, ax=axes[0])
sns.stripplot(y='F1_adhd_score', data=df_soc_SVC_ADHD, color='black', jitter=True, alpha=0.7, size=8, ax=axes[0]) 
axes[0].set_ylabel('F1 score [adim]', fontsize=14)
axes[0].set_xlabel('SVC', fontsize=14)
axes[0].grid(axis='y', linestyle='--', alpha=0.7)
axes[0].tick_params(axis='x', labelsize=12)
axes[0].tick_params(axis='y', labelsize=12) 

sns.boxplot(y='F1_adhd_score', data=df_soc_RF_ADHD, ax=axes[1])
sns.stripplot(y='F1_adhd_score', data=df_soc_RF_ADHD, color='black', jitter=True, alpha=0.7, size=8, ax=axes[1]) 
axes[1].set_xlabel('RF', fontsize=14)
axes[1].grid(axis='y', linestyle='--', alpha=0.7)
axes[1].tick_params(axis='x', labelsize=12)
axes[1].tick_params(axis='y', labelsize=12) 
plt.tight_layout()
plt.show()

sns.boxplot(y='F1_adhd_score', data=df_soc_XGB_ADHD, ax=axes[2])
sns.stripplot(y='F1_adhd_score', data=df_soc_XGB_ADHD, color='black', jitter=True, alpha=0.7, size=8, ax=axes[2]) 
axes[2].set_xlabel('XGB', fontsize=14)
axes[2].grid(axis='y', linestyle='--', alpha=0.7)
axes[2].tick_params(axis='x', labelsize=12)
axes[2].tick_params(axis='y', labelsize=12) 

axes[2].set_ylim(0.7, 1.0)
fig.suptitle('ADHD', fontsize=16) 
plt.tight_layout()
plt.show()

: 

### Figure 3

F1 score comparison for multi-output models, evaluating the impact of adding the connectome dataset. Sex prediction (orange) is implemented with SVC; ADHD prediction (blue) uses Random Forest

In [None]:
# Sex with SVC
fig, axes = plt.subplots(1, 2, figsize=(6, 6), share_y=True)
sns.boxplot(y='F1_sex_score', data=df_soc_SVC_sex, color='orange', ax=axes[0])
sns.stripplot(y='F1_sex_score', data=df_soc_SVC_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[0])
axes[0].set_ylabel('F1 score [adim]', fontsize=14)
axes[0].set_xlabel('without connectome', fontsize=14)
axes[0].grid(axis='y', linestyle='--', alpha=0.7)

axes[0].tick_params(axis='x', labelsize=12)
axes[0].tick_params(axis='y', labelsize=12)
sns.boxplot(y='F1_sex_score', data=df_soc_conn_SVC_sex, color='orange', ax=axes[0])
sns.stripplot(y='F1_sex_score', data=df_soc_conn_SVC_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[0])
axes[1].set_xlabel('with connectome', fontsize=14)
axes[1].grid(axis='y', linestyle='--', alpha=0.7)
axes[1].tick_params(axis='x', labelsize=12)
axes[1].tick_params(axis='y', labelsize=12)

axes[1].set_ylim(0.0, 0.8)
fig.suptitle('Sex with SVC', fontsize=16)
plt.tight_layout()
plt.show()


# ADHD with RF
sns.boxplot(y='F1_adhd_score', data=df_soc_RF_ADHD, ax=axes[0])
sns.stripplot(y='F1_adhd_score', data=df_soc_RF_ADHD, color='black', jitter=True, alpha=0.7, size=8, ax=axes[0]) 
axes[0].set_ylabel('F1 score [adim]', fontsize=14)
axes[0].set_xlabel('without connectome', fontsize=14)
axes[0].grid(axis='y', linestyle='--', alpha=0.7)
axes[0].tick_params(axis='x', labelsize=12)
axes[0].tick_params(axis='y', labelsize=12) 

fig, axes = plt.subplots(1, 2, figsize=(6, 6), share_y=True)
sns.boxplot(y='F1_sex_score', data=df_soc_conn_RF_sex, color='orange', ax=axes[1])
sns.stripplot(y='F1_sex_score', data=df_soc_conn_RF_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[1])
axes[1].set_xlabel('with connectome', fontsize=14)
axes[1].grid(axis='y', linestyle='--', alpha=0.7)
axes[1].tick_params(axis='x', labelsize=12)
axes[1].tick_params(axis='y', labelsize=12)

axes[1].set_ylim(0.7, 1.0)
fig.suptitle('ADHD with RF', fontsize=16)
plt.tight_layout()
plt.show()

### Figure 4

F1 score comparison of SVC, Random Forest, and XGBoost algorithms for single-output models. Sex prediction (orange) utilizes only connectome data with manual feature selection, while ADHD prediction (blue) uses only sociodemographic data with manual feature selection.

In [None]:
# Sex single-output manual only
fig, axes = plt.subplots(1, 3, figsize=(6, 6), share_y=True)
sns.boxplot(y='F1_sex_score', data=df_conn_manual_single_SVC_sex, color='orange', ax=axes[0])
sns.stripplot(y='F1_sex_score', data=df_conn_manual_single_SVC_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[0])
axes[0].set_ylabel('F1 score [adim]', fontsize=14)
axes[0].set_xlabel('SVC', fontsize=14)
axes[0].grid(axis='y', linestyle='--', alpha=0.7)
axes[0].tick_params(axis='x', labelsize=12)
axes[0].tick_params(axis='y', labelsize=12)

sns.boxplot(y='F1_sex_score', data=df_conn_manual_single_RF_sex, color='orange', ax=axes[1])
sns.stripplot(y='F1_sex_score', data=df_conn_manual_single_RF_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[1])
axes[1].set_ylabel('', fontsize=14)
axes[1].set_xlabel('RF', fontsize=14)
axes[1].grid(axis='y', linestyle='--', alpha=0.7)
axes[1].tick_params(axis='x', labelsize=12)
axes[1].tick_params(axis='y', labelleft=False)

sns.boxplot(y='F1_sex_score', data=df_conn_manual_single_XGB_sex, color='orange', ax=axes[2])
sns.stripplot(y='F1_sex_score', data=df_conn_manual_single_XGB_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[2])
axes[2].set_ylabel('', fontsize=14)
axes[2].set_xlabel('XGB', fontsize=14)
axes[2].grid(axis='y', linestyle='--', alpha=0.7)
axes[2].tick_params(axis='x', labelsize=12)
axes[2].tick_params(axis='y', labelleft=False)

axes[2].set_ylim(0.0, 0.8)
fig.suptitle('Sex', fontsize=16)
plt.tight_layout()
plt.show()


# ADHD single-output manual only
fig, axes = plt.subplots(1, 3, figsize=(6, 6), share_y=True)
sns.boxplot(y='F1_ADHD_score', data=df_soc_manual_single_SVC_ADHD, ax=axes[0])
sns.stripplot(y='F1_ADHD_score', data=df_soc_manual_single_SVC_ADHD, color='black', jitter=True, alpha=0.7, size=8, ax=axes[0])
axes[0].set_ylabel('F1 score [adim]', fontsize=14)
axes[0].set_xlabel('SVC', fontsize=14)
axes[0].grid(axis='y', linestyle='--', alpha=0.7)
axes[0].tick_params(axis='x', labelsize=12)
axes[0].tick_params(axis='y', labelsize=12)

sns.boxplot(y='F1_ADHD_score', data=df_soc_manual_single_RF_ADHD, ax=axes[1])
sns.stripplot(y='F1_ADHD_score', data=df_soc_manual_single_RF_ADHD, color='black', jitter=True, alpha=0.7, size=8, ax=axes[1])
axes[1].set_ylabel('', fontsize=14)
axes[1].set_xlabel('RF', fontsize=14)
axes[1].grid(axis='y', linestyle='--', alpha=0.7)
axes[1].tick_params(axis='x', labelsize=12)
axes[1].tick_params(axis='y', labelleft=False)

sns.boxplot(y='F1_ADHD_score', data=df_soc_manual_single_XGB_ADHD, ax=axes[2])
sns.stripplot(y='F1_ADHD_score', data=df_soc_manual_single_XGB_ADHD, color='black', jitter=True, alpha=0.7, size=8, ax=axes[2])
axes[2].set_ylabel('', fontsize=14)
axes[2].set_xlabel('XGB', fontsize=14)
axes[2].grid(axis='y', linestyle='--', alpha=0.7)
axes[2].tick_params(axis='x', labelsize=12)
axes[2].tick_params(axis='y', labelleft=False)

axes[2].set_ylim(0.7, 1.0)
fig.suptitle('ADHD', fontsize=16)
plt.tight_layout()
plt.show()

### Figure 5

F1 score comparison for single-output models, evaluating the impact of manual versus manual + automatic feature selection. Sex prediction (orange) with SVC uses only connectome data; ADHD prediction (blue) with Random Forest uses only sociodemographic data.

In [None]:
# Sex with SVC with manual vs manual+automatic feature selection
fig, axes = plt.subplots(1, 2, figsize=(6, 6), share_y=True)
sns.boxplot(y='F1_sex_score', data=df_conn_manual_single_SVC_sex, color='orange', ax=axes[0])
sns.stripplot(y='F1_sex_score', data=df_conn_manual_single_SVC_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[0])
axes[0].set_ylabel('F1 score [adim]', fontsize=14)
axes[0].set_xlabel('Only manual', fontsize=14)
axes[0].grid(axis='y', linestyle='--', alpha=0.7)
axes[0].tick_params(axis='x', labelsize=12)
axes[0].tick_params(axis='y', labelsize=12)

sns.boxplot(y='F1_sex_score', data=df_conn_mixed_single_SVC_sex, color='orange', ax=axes[1])
sns.stripplot(y='F1_sex_score', data=df_conn_mixed_single_SVC_sex, color='black', jitter=True, alpha=0.7, size=8, ax=axes[1])
axes[1].set_ylabel('', fontsize=14)
axes[1].set_xlabel('Manual+automatic', fontsize=14)
axes[1].grid(axis='y', linestyle='--', alpha=0.7)
axes[1].tick_params(axis='x', labelsize=12)
axes[1].tick_params(axis='y', labelleft=False)

axes[1].set_ylim(0.0, 0.8)
fig.suptitle('Sex', fontsize=16)
plt.tight_layout()
plt.show()


# ADHD with RF with manual vs manual+automatic feature selection
fig, axes = plt.subplots(1, 2, figsize=(6, 6), share_y=True)

sns.boxplot(y='F1_ADHD_score', data=df_soc_manual_single_RF_ADHD, ax=axes[0])
sns.stripplot(y='F1_ADHD_score', data=df_soc_manual_single_RF_ADHD, color='black', jitter=True, alpha=0.7, size=8, ax=axes[0])
axes[0].set_ylabel('F1 score [adim]', fontsize=14)
axes[0].set_xlabel('Only manual', fontsize=14)
axes[0].grid(axis='y', linestyle='--', alpha=0.7)
axes[0].tick_params(axis='x', labelsize=12)
axes[0].tick_params(axis='y', labelsize=12)

sns.boxplot(y='F1_ADHD_score', data=df_soc_mixed_single_RF_ADHD, ax=axes[1])
sns.stripplot(y='F1_ADHD_score', data=df_soc_mixed_single_RF_ADHD, color='black', jitter=True, alpha=0.7, size=8, ax=axes[1])
axes[1].set_ylabel('', fontsize=14)
axes[1].set_xlabel('Manual+automatic', fontsize=14)
axes[1].grid(axis='y', linestyle='--', alpha=0.7)
axes[1].tick_params(axis='x', labelsize=12)
axes[1].tick_params(axis='y', labelleft=False)

axes[1].set_ylim(0.7, 1.0)
fig.suptitle('ADHD', fontsize=16)
plt.tight_layout()
plt.show()