/
io.py
261 lines (228 loc) · 9.55 KB
/
io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
"""
Input/Output operations.
"""
import re
import json
import os.path as op
import numpy as np
import pandas as pd
from .dataset import Dataset
def convert_neurosynth_to_dict(text_file, annotations_file=None):
"""
Convert Neurosynth database files to a dictionary.
Parameters
----------
text_file : :obj:`str`
Text file with Neurosynth's coordinates. Normally named "database.txt".
annotations_file : :obj:`str` or None, optional
Optional file with Neurosynth's annotations. Normally named
"features.txt". Default is None.
Returns
-------
dict_ : :obj:`dict`
NiMARE-organized dictionary containing experiment information from text
files.
"""
dset_df = pd.read_csv(text_file, sep='\t')
if annotations_file is not None:
label_df = pd.read_csv(annotations_file, sep='\t', index_col='pmid')
label_df.index = label_df.index.astype(str)
else:
label_df = None
dset_df['id'] = dset_df['id'].astype(str)
ids = dset_df['id'].unique()
dict_ = {}
for sid in ids:
study_df = dset_df.loc[dset_df['id'] == sid]
study_dict = {}
study_dict['metadata'] = {}
study_dict['metadata']['authors'] = study_df['authors'].tolist()[0]
study_dict['metadata']['journal'] = study_df['journal'].tolist()[0]
study_dict['metadata']['year'] = study_df['year'].tolist()[0]
study_dict['metadata']['title'] = study_df['title'].tolist()[0]
study_dict['contrasts'] = {}
study_dict['contrasts']['1'] = {}
study_dict['contrasts']['1']['coords'] = {}
study_dict['contrasts']['1']['coords']['space'] = study_df['space'].tolist()[0]
study_dict['contrasts']['1']['coords']['x'] = study_df['x'].tolist()
study_dict['contrasts']['1']['coords']['y'] = study_df['y'].tolist()
study_dict['contrasts']['1']['coords']['z'] = study_df['z'].tolist()
if label_df is not None:
study_dict['contrasts']['1']['labels'] = label_df.loc[sid].to_dict()
dict_[sid] = study_dict
return dict_
def convert_neurosynth_to_json(text_file, out_file, annotations_file=None):
"""
Convert Neurosynth dataset text file to a NiMARE json file.
Parameters
----------
text_file : :obj:`str`
Text file with Neurosynth's coordinates. Normally named "database.txt".
out_file : :obj:`str`
Output NiMARE-format json file.
annotations_file : :obj:`str` or None, optional
Optional file with Neurosynth's annotations. Normally named
"features.txt". Default is None.
"""
dict_ = convert_neurosynth_to_dict(text_file, annotations_file)
with open(out_file, 'w') as fo:
json.dump(dict_, fo, indent=4, sort_keys=True)
def convert_neurosynth_to_dataset(text_file, annotations_file=None,
target='mni152_2mm'):
"""
Convert Neurosynth database files into dictionary and create NiMARE Dataset
with dictionary.
Parameters
----------
text_file : :obj:`str`
Text file with Neurosynth's coordinates. Normally named "database.txt".
target : {'mni152_2mm', 'ale_2mm'}, optional
Target template space for coordinates. Default is 'mni152_2mm'.
annotations_file : :obj:`str` or None, optional
Optional file with Neurosynth's annotations. Normally named
"features.txt". Default is None.
Returns
-------
:obj:`nimare.dataset.Dataset`
Dataset object containing experiment information from text_file.
"""
dict_ = convert_neurosynth_to_dict(text_file, annotations_file)
return Dataset(dict_, target=target)
def convert_sleuth_to_dict(text_file):
"""
Convert Sleuth text file to a dictionary.
Parameters
----------
text_file : :obj:`str`
Path to Sleuth-format text file.
Returns
-------
dict_ : :obj:`dict`
NiMARE-organized dictionary containing experiment information from text
file.
"""
filename = op.basename(text_file)
study_name, _ = op.splitext(filename)
with open(text_file, 'r') as file_object:
data = file_object.read()
data = [line.rstrip() for line in re.split('\n\r|\r\n|\n|\r', data)]
data = [line for line in data if line]
# First line indicates space. The rest are studies, ns, and coords
space = data[0].replace(' ', '').replace('//Reference=', '')
SPACE_OPTS = ['MNI', 'TAL', 'Talairach']
if space not in SPACE_OPTS:
raise Exception('Space {0} unknown. Options supported: '
'{0}.'.format(space, ', '.format(SPACE_OPTS)))
# Split into experiments
data = data[1:]
metadata_idx = [i for i, line in enumerate(data) if line.startswith('//')]
exp_idx = np.split(metadata_idx,
np.where(np.diff(metadata_idx) != 1)[0] + 1)
start_idx = [tup[0] for tup in exp_idx]
end_idx = start_idx[1:] + [len(data) + 1]
split_idx = zip(start_idx, end_idx)
dict_ = {}
for i_exp, exp_idx in enumerate(split_idx):
exp_data = data[exp_idx[0]:exp_idx[1]]
if exp_data:
header_idx = [i for i in range(len(exp_data)) if exp_data[i].startswith('//')]
study_info_idx = header_idx[:-1]
n_idx = header_idx[-1]
study_info = [exp_data[i].replace('//', '').strip() for i in study_info_idx]
study_info = ' '.join(study_info)
study_name = study_info.split(':')[0]
contrast_name = ':'.join(study_info.split(':')[1:]).strip()
sample_size = int(exp_data[n_idx].replace(
' ', '').replace('//Subjects=', ''))
xyz = exp_data[n_idx + 1:] # Coords are everything after study info and n
xyz = [row.split('\t') for row in xyz]
correct_shape = np.all([len(coord) == 3 for coord in xyz])
if not correct_shape:
all_shapes = np.unique([len(coord) for coord in xyz]).astype(
str) # pylint: disable=no-member
raise Exception('Coordinates for study "{0}" are not all '
'correct length. Lengths detected: '
'{1}.'.format(study_info,
', '.join(all_shapes)))
try:
xyz = np.array(xyz, dtype=float)
except:
# Prettify xyz
strs = [[str(e) for e in row] for row in xyz]
lens = [max(map(len, col)) for col in zip(*strs)]
fmt = '\t'.join('{{:{}}}'.format(x) for x in lens)
table = '\n'.join([fmt.format(*row) for row in strs])
raise Exception('Conversion to numpy array failed for study '
'"{0}". Coords:\n{1}'.format(study_info,
table))
x, y, z = list(xyz[:, 0]), list(xyz[:, 1]), list(xyz[:, 2])
if study_name not in dict_.keys():
dict_[study_name] = {'contrasts': {}}
dict_[study_name]['contrasts'][contrast_name] = {
'coords': {},
'sample_sizes': [],
}
dict_[study_name]['contrasts'][contrast_name]['coords'][
'space'] = space
dict_[study_name]['contrasts'][contrast_name]['coords']['x'] = x
dict_[study_name]['contrasts'][contrast_name]['coords']['y'] = y
dict_[study_name]['contrasts'][contrast_name]['coords']['z'] = z
dict_[study_name]['contrasts'][contrast_name][
'sample_sizes'] = [sample_size]
return dict_
def convert_sleuth_to_json(text_file, out_file):
"""
Convert Sleuth output text file into json.
Parameters
----------
text_file : :obj:`str`
Path to Sleuth-format text file.
out_file : :obj:`str`
Path to output json file.
"""
if isinstance(text_file, str):
text_files = [text_file]
elif isinstance(text_file, list):
text_files = text_file
else:
raise ValueError('Unsupported type for parameter "text_file": '
'{0}'.format(type(text_file)))
dict_ = {}
for text_file in text_files:
temp_dict = convert_sleuth_to_dict(text_file)
for sid in temp_dict.keys():
if sid in dict_.keys():
dict_[sid]['contrasts'] = {**dict_[sid]['contrasts'],
**temp_dict[sid]['contrasts']}
else:
dict_[sid] = temp_dict[sid]
with open(out_file, 'w') as fo:
json.dump(dict_, fo, indent=4, sort_keys=True)
def convert_sleuth_to_dataset(text_file, target='ale_2mm'):
"""
Convert Sleuth output text file into dictionary and create NiMARE Dataset
with dictionary.
Parameters
----------
text_file : :obj:`str`
Path to Sleuth-format text file.
target : {'ale_2mm', 'mni152_2mm'}, optional
Target template space for coordinates. Default is 'ale_2mm'
(ALE-specific brainmask in MNI152 2mm space).
Returns
-------
:obj:`nimare.dataset.Dataset`
Dataset object containing experiment information from text_file.
"""
if isinstance(text_file, str):
text_files = [text_file]
elif isinstance(text_file, list):
text_files = text_file
else:
raise ValueError('Unsupported type for parameter "text_file": '
'{0}'.format(type(text_file)))
dict_ = {}
for text_file in text_files:
temp_dict = convert_sleuth_to_dict(text_file)
dict_ = {**dict_, **temp_dict}
return Dataset(dict_, target=target)