In [1]:
import pandas as pd
from glob import glob
from os import path
import numpy as np

### demo data from demo_survey

In [2]:
old_demo_df = pd.read_csv('aim1_demographics.csv', index_col='index')

In [3]:
old_demo_df

Unnamed: 0_level_0,age,sex
index,Unnamed: 1_level_1,Unnamed: 2_level_1
s130,29,Female
s192,19,Female
s251,19,Female
s358,26,Female
s373,32,Female
...,...,...
s646,22,Male
s647,24,Female
s648,30,Female
s649,22,Female


### demo data from redcap

In [4]:
redcap_demo_df = pd.read_csv('aim1_redcap_demographics_report.csv', index_col='record_id')

In [5]:
redcap_demo_df

Unnamed: 0_level_0,age,sex,ethnicity,race
record_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,27.0,0.0,1.0,4.0
2,19.0,1.0,1.0,4.0
3,20.0,0.0,1.0,1.0
4,21.0,0.0,1.0,1.0
5,22.0,0.0,1.0,1.0
...,...,...,...,...
755,30.0,0.0,1.0,4.0
756,20.0,0.0,1.0,4.0
757,19.0,0.0,0.0,4.0
758,30.0,,,


### framewise displacement data - reveals some missing subjects drom the old_demo_df

In [6]:
fd_df = pd.read_csv('aim1_fd_stats.tsv', delimiter='\t')

In [7]:
fd_df = fd_df.rename(columns={"Unnamed: 0": "index", "Unnamed: 1": "task"})
fd_df['index'] = fd_df['index'].map(lambda subj: subj.split('-')[-1])
fd_df = fd_df.set_index('index')

In [8]:
fd_df

Unnamed: 0_level_0,task,fd_mean,fd_num,fd_perc
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
s061,CCTHot,0.137224,135,15.715949
s061,WATT3,0.151611,137,17.430025
s061,stopSignal,0.125783,44,8.396947
s061,twoByTwo,0.124104,73,7.087379
s061,DPX,0.108978,58,5.301645
...,...,...,...,...
s650,rest,0.253770,332,47.025496
s650,stroop,0.231252,51,15.044248
s650,surveyMedley,0.189059,209,34.775374
s650,CCTHot,0.200641,432,40.563380


#### get all the subjects from the fd_df

In [9]:
simple_ids = fd_df.index.unique().map(lambda subj: subj.strip('s'))
simple_ids

Index(['061', '130', '144', '172', '192', '234', '251', '358', '373', '445',
       ...
       '641', '642', '643', '644', '645', '646', '647', '648', '649', '650'],
      dtype='object', name='index', length=108)

In [10]:
redcap_fmri_df = redcap_demo_df[redcap_demo_df.index.isin(simple_ids)]

#### get subjects that completed the fmri demo survey

In [11]:
limited_ids = old_demo_df.index.unique().map(lambda subj: subj.strip('s'))


In [12]:
redcap_fmri_limited_df = redcap_fmri_df[redcap_fmri_df.index.isin(limited_ids)]



change dataframe's indexing to match the others

In [13]:
redcap_fmri_limited_df.insert(0, 'index', old_demo_df.index)
redcap_fmri_limited_df = redcap_fmri_limited_df.set_index('index')

## get the differences!

In [14]:
age_diffs =  old_demo_df.age.map(lambda x: float(x)) - redcap_fmri_limited_df.age



In [15]:
pd.set_option('display.max_rows', None)

### Age

In [16]:
age_diffs[age_diffs!=0]

index
s130    2.0
s192    1.0
s251    1.0
s358    1.0
s373    1.0
s596    1.0
s613   -1.0
s650   -4.0
Name: age, dtype: float64

### Sex

In [17]:
old_demo_df.loc[(old_demo_df.sex == 'Female'),'sex']=0
old_demo_df.loc[(old_demo_df.sex == 'Male'),'sex']=1

In [18]:
sex_diffs =  old_demo_df.sex - redcap_fmri_limited_df.sex

In [19]:
sex_diffs[sex_diffs!=0]

index
s624   NaN
Name: sex, dtype: float64

### investigate s624

In [20]:
old_demo_df.loc['s624',:]

age    21
sex     1
Name: s624, dtype: int64

In [21]:
redcap_fmri_limited_df.loc['s624',:]

age          21.0
sex           NaN
ethnicity     1.0
race          4.0
Name: s624, dtype: float64

# Build 2ndlevel design matrix

In [22]:
redcap_fmri_df

redcap_fmri_df.insert(0, 'index', fd_df.index.unique())
redcap_fmri_df = redcap_fmri_df.set_index('index')

In [23]:
# pivot to get mean FD per task
fd_pivot = pd.pivot_table(fd_df.reset_index(), values='fd_mean', index=['index'], columns='task')
fd_pivot.columns = [col+'_meanFD' for col in fd_pivot.columns]

In [24]:
# combine with demo data, add s624 sex
out_df = pd.concat([redcap_fmri_df, fd_pivot], 1)
out_df.loc['s624', 'sex'] = old_demo_df.loc['s624','sex']

out_df = out_df.drop(['ethnicity', 'race'], axis=1)

In [25]:
out_df.to_csv('aim1_2ndlevel_confounds_matrix.csv')

In [28]:
out_df.loc[['s130', 's061'],:]

Unnamed: 0_level_0,age,sex,ANT_meanFD,CCTHot_meanFD,DPX_meanFD,WATT3_meanFD,discountFix_meanFD,motorSelectiveStop_meanFD,rest_meanFD,stopSignal_meanFD,stroop_meanFD,surveyMedley_meanFD,twoByTwo_meanFD
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
s130,27.0,0.0,0.041484,0.03107,0.044276,0.045655,0.036948,0.03694,0.039885,0.03104,0.038328,0.030956,0.038415
s061,41.0,1.0,,0.137224,0.108978,0.151611,0.134908,,0.11946,0.125783,0.104488,0.138526,0.124104
