# Prepare FreeSurfer ROIs Dataset

The idea here is to work with freesurfer extracted ROI data in desikan + destr + subcort + wmparc as provided in downloaded:

`derivatives/fs_stats/data-*.tsv
                      data-cortical_type-aparc.a2009s_measure-*.tsv
                      data-cortical_type-aparc_measure-*.tsv
                      data-subcortical_type-aseg_measure-*.tsv
                      data-subcortical_type-wmparc_measure-*.tsv`
                      
The purpose of this notebook in particular is for preparing 

In [6]:
from os.path import dirname, abspath
import pandas as pd
import os
import BPt as bp

In [15]:
# Useful directories
main_dr = dirname(dirname(abspath(os.getcwd())))
saves_dr = os.path.join(dirname(os.getcwd()), 'datasets')
data_dr = os.path.join(main_dr, 'data')
deriv_dr = os.path.join(data_dr, 'ds003097', 'derivatives')
fs_stats_dr = os.path.join(deriv_dr, 'fs_stats')

'/home/sage/methods_series/ds003097/datasets'

In [8]:
# These are all avaliable files, but we likely don't want all, let's apply further filtering
files = [file for file in os.listdir(fs_stats_dr) if 'sub-' not in file]
files

['data-cortical_type-aparc.a2009s_measure-meancurv_hemi-lh.tsv',
 'data-cortical_type-aparc_measure-area_hemi-lh.tsv',
 'data-cortical_type-aparc_measure-volume_hemi-lh.tsv',
 'data-cortical_type-aparc_measure-area_hemi-rh.tsv',
 'data-subcortical_type-wmparc_measure-mean_hemi-both.tsv',
 'data-cortical_type-aparc_measure-thickness_hemi-lh.tsv',
 'data-subcortical_type-aseg_measure-mean_hemi-both.tsv',
 'data-cortical_type-aparc.a2009s_measure-volume_hemi-rh.tsv',
 'data-cortical_type-aparc.a2009s_measure-volume_hemi-lh.tsv',
 'data-cortical_type-aparc_measure-thickness_hemi-rh.tsv',
 'data-cortical_type-aparc_measure-meancurv_hemi-rh.tsv',
 'data-subcortical_type-wmparc_measure-volume_hemi-both.tsv',
 'data-cortical_type-aparc_measure-volume_hemi-rh.tsv',
 'data-cortical_type-aparc.a2009s_measure-meancurv_hemi-rh.tsv',
 'data-cortical_type-aparc.a2009s_measure-thickness_hemi-lh.tsv',
 'data-cortical_type-aparc_measure-meancurv_hemi-lh.tsv',
 'data-cortical_type-aparc.a2009s_measure-th

In [9]:
# Filter to just destr altas and subcortical
files = [file for file in files if 'curv' not in file and (('subcortical' in file) or ('a2009s' in file))]
files

['data-subcortical_type-wmparc_measure-mean_hemi-both.tsv',
 'data-subcortical_type-aseg_measure-mean_hemi-both.tsv',
 'data-cortical_type-aparc.a2009s_measure-volume_hemi-rh.tsv',
 'data-cortical_type-aparc.a2009s_measure-volume_hemi-lh.tsv',
 'data-subcortical_type-wmparc_measure-volume_hemi-both.tsv',
 'data-cortical_type-aparc.a2009s_measure-thickness_hemi-lh.tsv',
 'data-cortical_type-aparc.a2009s_measure-thickness_hemi-rh.tsv',
 'data-cortical_type-aparc.a2009s_measure-area_hemi-rh.tsv',
 'data-subcortical_type-aseg_measure-volume_hemi-both.tsv',
 'data-cortical_type-aparc.a2009s_measure-area_hemi-lh.tsv']

In [10]:
files = [os.path.join(fs_stats_dr, file) for file in files]

In [11]:
def load(loc):
    
    # Load as dataframe
    data = pd.read_csv(loc, sep='\t')
    name = list(data)[0]
    
    # Set to correct index
    data = data.set_index(list(data)[0])
    
    # Change index name
    data.index.name = 'participant_id'
    
    # Add name to columns
    new_col_names = {col: name + '_' + col for col in list(data)}
    data = data.rename(new_col_names, axis=1)
    
    return data

In [12]:
# Concat
rois = pd.concat([load(file) for file in files], axis=1)

# Drop duplicates
rois = rois.loc[:,~rois.columns.duplicated()]

In [16]:
# Load the base template dataframe with targets loaded
data = bp.read_pickle(os.path.join(saves_dr, 'template.dataset'))

In [17]:
# Concat and cast to dataset
data = bp.Dataset(pd.concat([data, rois], axis=1), targets=list(data))

In [19]:
# Save
data.to_pickle(os.path.join(saves_dr, 'fs_rois.dataset'))