### This notebook is designed to explore some interesting features avaliable through ABCD_ML, specifically loading Data_Files and using custom loaders.

This example will require some extra optional ABCD_ML libraries, including nibabel and nilearn!

In [1]:
from ABCD_ML import *
import nibabel as nib
import numpy as np
import pandas as pd
import os

## Let's start by saving some fake surface time-series data, and also some fake just surface data.

In [2]:
X = np.random.random(size = (10, 100, 10242))
os.makedirs('fake_time_data', exist_ok=True)

for x in range(len(X)):
    np.save('fake_time_data/' + str(x) + '_lh', X[x])
for x in range(len(X)):
    np.save('fake_time_data/' + str(x) + '_rh', X[x])
    
X = np.random.random(size = (10, 10242))
os.makedirs('fake_surf_data', exist_ok=True)

for x in range(len(X)):
    np.save('fake_surf_data/' + str(x) + '_lh', X[x])
for x in range(len(X)):
    np.save('fake_surf_data/' + str(x) + '_rh', X[x])

### In this expiriment we will load both the timeseries and the just surface data

In [3]:
ML = ABCD_ML(log_dr=None, verbose=False)

timeseries_dr = 'fake_time_data/'
files = os.listdir(timeseries_dr)
lh_timeseries = [timeseries_dr + f for f in files if '_lh' in f]
rh_timeseries = [timeseries_dr + f for f in files if '_rh' in f]

surf_dr = 'fake_surf_data/'
files = os.listdir(surf_dr)
lh_surf = [surf_dr + f for f in files if '_lh' in f]
rh_surf = [surf_dr + f for f in files if '_rh' in f]

subjects = [str(i) for i in range(10)]

df = pd.DataFrame()

df['lh_timeseries'] = lh_timeseries
df['rh_timeseries'] = rh_timeseries

df['lh_surf'] = lh_surf
df['rh_surf'] = rh_surf

df['src_subject_id'] = subjects
df['target'] = np.random.randint(2, size=10)

ML.Load_Data_Files(df = df,
                   load_func = np.load,
                   drop_keys = ['target'])

ML.Load_Targets(df = df, col_name='target', data_type='b')

ML.Train_Test_Split(test_size=0)

### This will assume you have some saved parcellations in the relevant space, i.e., we saved fake fsaverage5 surface data, so we will load in the desikan parcellations

In [4]:
from ABCD_ML.extensions import SurfLabels, Connectivity

base = '/home/sage/work/Parcel_Search/Existing_Parcels/'
desikan_lh = base + 'lh.aparc.annot'
desikan_rh = base + 'rh.aparc.annot'

t_surf_rois_lh = SurfLabels(labels = desikan_lh,
                            vectorize = False)
t_surf_rois_rh = SurfLabels(labels = desikan_rh,
                            vectorize = False)

connectivity = Connectivity(vectorize=True)

surf_rois_lh = SurfLabels(labels = desikan_lh)
surf_rois_rh = SurfLabels(labels = desikan_rh)

### Try just loading the left hemisphere surface data

In [9]:
problem_spec = Problem_Spec(problem_type = 'binary',
                            scope = 'lh_surf')

loaders = Loader(surf_rois_lh, scope='lh##_surf')

model_pipeline = Model_Pipeline(loaders=loaders)
results = ML.Evaluate(model_pipeline, problem_spec)

for step in ML.Model_Pipeline.Model.steps:
    print(step)

HBox(children=(FloatProgress(value=0.0, description='Repeats', max=2.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Folds', max=3.0, style=ProgressStyle(description_width='i…



('user passed0', Loader_Wrapper(background_label=0,
               file_mapping={0: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f15335dcd90>,
                             1: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f15335dcdd0>,
                             2: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f15335dced0>,
                             3: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f15335dc650>,
                             4: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f1533...
               labels='/home/sage/work/Parcel_Search/Existing_Parcels/lh.aparc.annot',
               mask=None, strategy=<function mean at 0x7f15800bc710>,
               vectorize=True, wrapper_inds=[0], wrapper_n_jobs=1,
               wrapper_transformer=SurfLabels(background_label=0,
                                              labels='/home/sage/work/Parcel_Search/Existing_Parcels/lh.aparc.annot',
                                              mask=None,
      

### Load everything

In [10]:
problem_spec.scope = 'all'

loaders = [Loader(surf_rois_lh, scope='lh_surf'),
           Loader(surf_rois_rh, scope='rh_surf'),
           Loader(Pipe([t_surf_rois_lh, connectivity]), scope='lh_timeseries'),
           Loader(Pipe([t_surf_rois_rh, connectivity]), scope='rh_timeseries')]

model_pipeline = Model_Pipeline(loaders=loaders)
results = ML.Evaluate(model_pipeline, problem_spec)

for step in ML.Model_Pipeline.Model.steps:
    print(step)

HBox(children=(FloatProgress(value=0.0, description='Repeats', max=2.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Folds', max=3.0, style=ProgressStyle(description_width='i…



('user passed0', Loader_Wrapper(background_label=0,
               file_mapping={0: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f1533590a50>,
                             1: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f1533590e90>,
                             2: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f1533590790>,
                             3: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f1533590a10>,
                             4: <ABCD_ML.helpers.Data_File.Data_File object at 0x7f1533...
               labels='/home/sage/work/Parcel_Search/Existing_Parcels/lh.aparc.annot',
               mask=None, strategy=<function mean at 0x7f15800bc710>,
               vectorize=True, wrapper_inds=[2], wrapper_n_jobs=1,
               wrapper_transformer=SurfLabels(background_label=0,
                                              labels='/home/sage/work/Parcel_Search/Existing_Parcels/lh.aparc.annot',
                                              mask=None,
      