-
Notifications
You must be signed in to change notification settings - Fork 1
/
setup_fs_dataset.py
37 lines (27 loc) · 1.12 KB
/
setup_fs_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from setup_dataset import load_base
import pandas as pd
def main():
# Load dataset with targets and family_id
data = load_base(filter_outliers=True)
# Get base_subjects from main dataset, as we want these to match
base_data = pd.read_pickle('../data/dataset.pkl')
base_subjects = base_data.index
data = data.loc[base_subjects]
def add_fs_data(loc, suffix):
# Load ROIs
fs_data = pd.read_csv(loc)
fs_data['src_subject_id'] = [s.replace('NDAR', 'NDAR_') for s in fs_data['src_subject_id']]
# Reduce to just keep features
keep = ['_thickavg', '_surfarea', '_meancurv']
to_keep = [col for col in list(fs_data) if any([k in col for k in keep])]
fs_data = fs_data[to_keep]
# Add to data with suffix
for col in fs_data:
data[col + suffix] = fs_data[col]
# Add both
add_fs_data('../data/aparc.a2009s_rois.csv', '-DESTR')
add_fs_data('../data/aparc_rois.csv', '-DESIKAN')
# Save with pickle to data
data.to_pickle('../data/fs_dataset.pkl')
if __name__ == '__main__':
main()