In [212]:
import pandas as pd
import os 
import numpy as np

In [213]:

parcels = ['bankssts_thickness','caudalanteriorcingulate_thickness',
       'caudalmiddlefrontal_thickness', 'cuneus_thickness',
       'entorhinal_thickness', 'fusiform_thickness',
       'inferiorparietal_thickness', 'inferiortemporal_thickness',
       'isthmuscingulate_thickness', 'lateraloccipital_thickness',
       'lateralorbitofrontal_thickness', 'lingual_thickness',
       'medialorbitofrontal_thickness', 'middletemporal_thickness',
       'parahippocampal_thickness', 'paracentral_thickness',
       'parsopercularis_thickness', 'parsorbitalis_thickness',
       'parstriangularis_thickness', 'pericalcarine_thickness',
       'postcentral_thickness', 'posteriorcingulate_thickness',
       'precentral_thickness', 'precuneus_thickness',
       'rostralanteriorcingulate_thickness',
       'rostralmiddlefrontal_thickness', 'superiorfrontal_thickness',
       'superiorparietal_thickness', 'superiortemporal_thickness',
       'supramarginal_thickness', 'frontalpole_thickness',
       'temporalpole_thickness', 'transversetemporal_thickness',
       'insula_thickness']

def parse_thickness(filename, idcolumn, hemi, parcels):
    df = pd.read_csv(filename, sep='\t')
    df['STUDYID'] = df[idcolumn].map(lambda name: name.split("_")[2]) 
    df['timepoint'] = df[idcolumn].map(lambda name: name.split("_")[3].split(".")[0]) 
    df['site'] = df[idcolumn].map(lambda name: name.split("_")[1]) 
    df = df.sort_values(["STUDYID", "timepoint"]) 
    cols = [hemi + '_' + p for p in parcels]
    df[[col + "_delta" for col in cols]] = df.groupby("STUDYID")[cols].diff() # calculate the difference: timepoint 2 - timepoint 1
    df = df[df['timepoint'] == '02'] # get rid of timepoint 01 
    df = df.loc[:, ['STUDYID', 'site'] + list(df.columns[df.columns.str.contains("delta")])] # retain only columns with delta 
    return df

In [214]:
# load
base = os.path.dirname(os.getcwd())
base

lh_file = base + '/data/lh_aparc_stats.txt'
rh_file = base + '/data/rh_aparc_stats.txt'
demo_file =  base + '/data/STOPPD_final_clinical.csv'


In [215]:
# parse and calculate delta 
lh = parse_thickness(lh_file, "lh.aparc.thickness", 'lh', parcels)

rh = parse_thickness(rh_file, "rh.aparc.thickness", 'rh', parcels)

In [216]:
# merge
df = pd.merge(lh, rh, on='STUDYID')
df = df.astype({'STUDYID': 'int64'})
demo = demo.astype({'STUDYID': 'int64'})
df = pd.merge(demo, df, on='STUDYID')


In [217]:
df.to_csv(base + '/data/STOPPD_merged.csv')