In [7]:
%run /data/NNDSP/anal/analysis_notebooks/util.ipynb



In [59]:
%run /data/NNDSP/anal/analysis_notebooks/stat_tests.ipynb

In [3]:
from pathlib import Path
import pandas as pd
import numpy as np
import subprocess
from datetime import date
import re

In [4]:
analysis_version = "2018_07_12"
project_dir_absolute = Path('/data/NNDSP')

# Project Directory
project_dir = Path(project_dir_absolute) # needs to be pathlib.Path object

# Bids Directory
bids_fs = project_dir.joinpath('derivatives/fs5.3_subj') # freesurfer bids
bids_bar = project_dir.joinpath('derivatives/bar_subj') # baracus bids

# Brain Age File
brain_age_file = project_dir.joinpath('anal/analysis_notebooks/phenotype_files/NNDSP_famid.csv')

print(brain_age_file.absolute())

/data/NNDSP/anal/analysis_notebooks/phenotype_files/NNDSP_famid.csv


In [5]:
import os
from glob import glob

# get the subjects in the freesurfer directory
subj_dirs = [os.path.basename(s) for s in sorted(glob(os.path.join(bids_fs, "sub-*")))]

## NNDSP Data

### Complex

In [8]:
subjects_to_analyze = subject_to_anal(subj_dirs, bids_bar)
features = extract_features(subjects_to_analyze, bids_bar)

In [10]:
df_target = pd.read_csv(brain_age_file, sep=',') # open file with ages
del df_target['nuclear_fam_id']
del df_target['Sex']
del df_target['MRN']

In [11]:
df_data_ct = get_source_data('thickness', features, df_target) # extract thickness info from features
df_data_ct = df_data_ct.sort_values(by='MASKID')

In [12]:
df_data_sv = get_source_data('aseg', features, df_target)
df_data_sv = df_data_sv.sort_values(by='MASKID')

In [13]:
df_data_ca = get_source_data('area', features, df_target)
df_data_ca = df_data_ca.sort_values(by='MASKID')

### Simple

In [20]:
df_atf = pd.DataFrame([])
df_atf = df_atf.assign(MASKID = [int(x[4:]) for x in subjects_to_analyze])
df_atf = df_atf.merge(df_target, on = 'MASKID')

white_matter, grey_matter, csf, intra_cran_vol = get_anat_features(subjects_to_analyze, bids_bar)

# calculate white matter fraction
white_matter = calc_fraction(white_matter, intra_cran_vol)
# calculate grey matter fraction
grey_matter = calc_fraction(grey_matter, intra_cran_vol)
# calculate csf fraction
csf = calc_fraction(csf, intra_cran_vol)

df_atf = df_atf.assign(Intra_Cran_Vol = intra_cran_vol)
df_atf = df_atf.assign(WM_Frac = white_matter)
df_atf = df_atf.assign(GM_Frac = grey_matter)
df_atf = df_atf.assign(CSF_Frac = csf)
df_atf = df_atf.sort_values(by='MASKID')

## HCP Data

In [14]:
# HCP Directory
hcp_dir = Path('/data/HCP/HCP_900/s3/hcp')

# HCP Outfir of Prepared Freesurfer Files
hcp_bar_dir = project_dir.joinpath('derivatives/bar_hcp_subj/')

# HCP Freesurfer Data
hcp_fs_dir = project_dir.joinpath('derivatives/fs_hcp_subj/')

print(hcp_fs_dir.absolute())

/data/NNDSP/derivatives/fs_hcp_subj


In [15]:
import os
from glob import glob

# get the subjects in the freesurfer directory
hcp_subj_dirs = [os.path.basename(s) for s in sorted(glob(os.path.join(hcp_bar_dir, "sub-*")))]

In [16]:
import matplotlib.pyplot as plt
df_hcp_age = pd.read_csv(project_dir.joinpath('nino/HCP_ages.csv'))
df_hcp_age = pd.DataFrame(df_hcp_age[['Subject','Age_in_Yrs']])

### Complex

In [17]:
hcp_subjects_to_analyze = subject_to_anal(hcp_subj_dirs, hcp_bar_dir)

In [18]:
hcp_features = extract_features(hcp_subjects_to_analyze, hcp_bar_dir)

In [21]:
df_data_hcp_ct = get_source_data('thickness', hcp_features, df_hcp_age, on='Subject')

In [22]:
df_data_hcp_sv = get_source_data('aseg', hcp_features, df_hcp_age, on='Subject')

In [23]:
df_data_hcp_ca = get_source_data('area', hcp_features, df_hcp_age, on='Subject')

### Simple

In [24]:
# get age and subject number
df_hcp_atf = pd.DataFrame([])
df_hcp_atf = df_hcp_atf.assign(Subject = [int(x[4:]) for x in hcp_subjects_to_analyze])
df_hcp_atf = df_hcp_atf.merge(df_hcp_age, on = 'Subject')

white_matter_hcp, grey_matter_hcp, csf_hcp, intra_cran_vol_hcp = get_anat_features(hcp_subjects_to_analyze, hcp_bar_dir)

# calculate white matter fraction
white_matter_hcp = calc_fraction(white_matter_hcp, intra_cran_vol_hcp)

# calculate grey matter fraction
grey_matter_hcp = calc_fraction(grey_matter_hcp, intra_cran_vol_hcp)

# calculate csf fraction
csf_hcp = calc_fraction(csf_hcp, intra_cran_vol_hcp)

df_hcp_atf = df_hcp_atf.assign(Intra_Cran_Vol = intra_cran_vol_hcp)
df_hcp_atf = df_hcp_atf.assign(WM_Frac = white_matter_hcp)
df_hcp_atf = df_hcp_atf.assign(GM_Frac = grey_matter_hcp)
df_hcp_atf = df_hcp_atf.assign(CSF_Frac = csf_hcp)

## NKI Data

In [25]:
# NKI Directory
nki_dir = Path('/data/NNDSP/anal/NKI')

# NKI Behavioral Data File
nki_bh_file = project_dir.joinpath('anal/analysis_notebooks/phenotype_files/participants.tsv')

# NKI baracus
nki_bar_dir = project_dir.joinpath('derivatives/bar_nki_subj/')

# NKI Freesurfer Data
nki_fs_dir = project_dir.joinpath('derivatives/fs_nki_subj/')

In [26]:
df_nki_ages = pd.read_csv(project_dir.joinpath('anal/analysis_notebooks/phenotype_files/participants.tsv'), sep='\t')

### Simple

In [27]:
import os
from glob import glob

# get the subjects in the freesurfer directory
nki_subj_dirs = [os.path.basename(s) for s in sorted(glob(os.path.join(nki_bar_dir, "sub-*")))]

nki_subjects_to_analyze = subject_to_anal(nki_subj_dirs, nki_bar_dir)

# get age and subject number
df_nki_atf = pd.DataFrame([])
df_nki_atf = df_nki_atf.assign(participant_name = [x for x in nki_subjects_to_analyze])
df_nki_atf = df_nki_atf.assign(participant_id = [x[4:13] for x in df_nki_atf.participant_name.values])
df_nki_atf = df_nki_atf.drop_duplicates(subset='participant_id', keep='first')
df_nki_atf = df_nki_atf.merge(df_nki_ages, on = 'participant_id')

white_matter_nki, grey_matter_nki, csf_nki, intra_cran_vol_nki = get_anat_features(df_nki_atf.participant_name.values, nki_bar_dir)

# calculate white matter fraction
white_matter_nki = calc_fraction(white_matter_nki, intra_cran_vol_nki)

# calculate grey matter fraction
grey_matter_nki = calc_fraction(grey_matter_nki, intra_cran_vol_nki)

# calculate csf fraction
csf_nki = calc_fraction(csf_nki, intra_cran_vol_nki)

df_nki_atf = df_nki_atf.assign(Intra_Cran_Vol = intra_cran_vol_nki)
df_nki_atf = df_nki_atf.assign(WM_Frac = white_matter_nki)
df_nki_atf = df_nki_atf.assign(GM_Frac = grey_matter_nki)
df_nki_atf = df_nki_atf.assign(CSF_Frac = csf_nki)

### Complex

In [28]:
nki_features = extract_features(df_nki_atf.participant_name.values, nki_bar_dir)

In [29]:
df_data_nki_ct = get_source_data('thickness', nki_features, df_nki_ages, on='participant_id', is_string=True)

In [30]:
df_data_nki_sv = get_source_data('aseg', nki_features, df_nki_ages, on='participant_id', is_string=True)

In [31]:
df_data_nki_ca = get_source_data('area', nki_features, df_nki_ages, on='participant_id', is_string=True)

## Run Iterations

In [None]:
huge_it = mult_iterations(15000, 
                          df_data_ct, df_data_sv, df_data_ca, 
                          df_data_hcp_ct, df_data_hcp_sv, df_data_hcp_ca, 
                          df_data_nki_ct, df_data_nki_sv, df_data_nki_ca,
                          df_atf, df_hcp_atf, df_nki_atf)

Fitting stacking model
best max_depth: 4
0
Fitting stacking model


In [68]:
huge_it[huge_it.kpval_nki > 0.0166][huge_it.wpval_nki > 0.0166][huge_it.wpval_ts > 0.0166][huge_it.kpval_ts > 0.0166]

  """Entry point for launching an IPython kernel.


Unnamed: 0,kpval_hcp,kpval_nki,kpval_ts,kstat_hcp,kstat_nki,kstat_ts,wpval_hcp,wpval_nki,wpval_ts,wstat_hcp,wstat_nki,wstat_ts
5665,3.1531220000000002e-105,0.043269,0.020891,0.516201,0.065315,0.143519,3.2672790000000003e-119,0.07052,0.358155,20898.0,183531.0,10873.0
8253,2.1354629999999997e-85,0.02504,0.036926,0.464804,0.06982,0.134259,8.509364999999999e-104,0.129629,0.042331,33119.0,185771.0,9851.0


In [69]:
huge_it.to_csv('/data/NNDSP/anal/analysis_notebooks/other_files/mult_iterations.csv')