-
Notifications
You must be signed in to change notification settings - Fork 56
/
generate.py
331 lines (282 loc) · 12.2 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
"""Utilities for generating data for testing."""
from itertools import zip_longest
import numpy as np
import sparse
from nimare.dataset import Dataset
from nimare.io import convert_neurovault_to_dataset
from nimare.meta.utils import compute_ale_ma, get_ale_kernel
from nimare.transforms import ImageTransformer
from nimare.utils import get_template, mm2vox, vox2mm
# defaults for creating a neurovault dataset
NEUROVAULT_IDS = (8836, 8838, 8893, 8895, 8892, 8891, 8962, 8894, 8956, 8854, 9000)
CONTRAST_OF_INTEREST = {"animal": "as-Animal"}
def create_coordinate_dataset(
foci=1,
foci_percentage="100%",
fwhm=10,
sample_size=30,
n_studies=30,
n_noise_foci=0,
seed=None,
space="MNI",
):
"""Generate coordinate based dataset for meta analysis.
.. versionadded:: 0.0.4
Parameters
----------
foci : :obj:`int` or :obj:`list`
The number of foci to be generated per study or the
x,y,z coordinates of the ground truth foci. (Default=1)
foci_percentage : :obj:`float`
Percentage of studies where the foci appear. (Default="100%")
fwhm : :obj:`float`
Full width at half maximum (fwhm) to define the probability
spread of the foci. (Default=10)
sample_size : :obj:`int` or :obj:`list`
Either mean number of participants in each study
or a list specifying the sample size for each
study. If a list of two numbers and n_studies is
not two, then the first number will represent a lower
bound and the second number will represent an upper bound
of a uniform sample. (Default=30)
n_studies : :obj:`int`
Number of studies to generate. (Default=30)
n_noise_foci : :obj:`int`
Number of foci considered to be noise in each study. (Default=0)
seed : :obj:`int` or None
Random state to reproducibly initialize random numbers.
If seed is None, then the random state will try to be initialized
with data from /dev/urandom (or the Windows analogue) if available
or will initialize from the clock otherwise. (Default=None)
space : :obj:`str`
The template space the coordinates are reported in. (Default='MNI')
Returns
-------
ground_truth_foci : :obj:`list`
generated foci in xyz (mm) coordinates
dataset : :class:`~nimare.dataset.Dataset`
"""
# set random state
rng = np.random.RandomState(seed=seed)
# check foci argument
if not isinstance(foci, int) and not _array_like(foci):
raise ValueError("foci must be a positive integer or array like")
# check foci_percentage argument
if (
(not isinstance(foci_percentage, (float, str)))
or (isinstance(foci_percentage, str) and foci_percentage[-1] != "%")
or (isinstance(foci_percentage, float) and not (0.0 <= foci_percentage <= 1.0))
):
raise ValueError(
"foci_percentage must be a string (example '96%') or a float between 0 and 1"
)
# check sample_size argument
if _array_like(sample_size) and len(sample_size) != n_studies and len(sample_size) != 2:
raise ValueError("sample_size must be the same length as n_studies or list of 2 items")
elif not _array_like(sample_size) and not isinstance(sample_size, int):
raise ValueError("sample_size must be array like or integer")
# check space argument
if space != "MNI":
raise NotImplementedError("Only coordinates for the MNI atlas has been defined")
# process foci_percentage argument
if isinstance(foci_percentage, str) and foci_percentage[-1] == "%":
foci_percentage = float(foci_percentage[:-1]) / 100
# process sample_size argument
if isinstance(sample_size, int):
sample_size = [sample_size] * n_studies
elif _array_like(sample_size) and len(sample_size) == 2 and n_studies != 2:
sample_size_lower_limit = sample_size[0]
sample_size_upper_limit = sample_size[1]
sample_size = rng.randint(sample_size_lower_limit, sample_size_upper_limit, size=n_studies)
ground_truth_foci, foci_dict = _create_foci(
foci, foci_percentage, fwhm, n_studies, n_noise_foci, rng, space
)
source_dict = _create_source(foci_dict, sample_size, space)
dataset = Dataset(source_dict)
return ground_truth_foci, dataset
def create_neurovault_dataset(
collection_ids=NEUROVAULT_IDS,
contrasts=CONTRAST_OF_INTEREST,
img_dir=None,
map_type_conversion=None,
**dset_kwargs,
):
"""Download images from NeuroVault and use them to create a dataset.
.. versionadded:: 0.0.8
This function will also attempt to generate Z images for any contrasts
for which this is possible.
Parameters
----------
collection_ids : :obj:`list` of :obj:`int` or :obj:`dict`, optional
A list of collections on neurovault specified by their id.
The collection ids can accessed through the neurovault API
(i.e., https://neurovault.org/api/collections) or
their main website (i.e., https://neurovault.org/collections).
For example, in this URL https://neurovault.org/collections/8836/,
`8836` is the collection id.
collection_ids can also be a dictionary whose keys are the informative
study name and the values are collection ids to give the collections
human readable names in the dataset.
contrasts : :obj:`dict`, optional
Dictionary whose keys represent the name of the contrast in
the dataset and whose values represent a regular expression that would
match the names represented in NeuroVault.
For example, under the ``Name`` column in this URL
https://neurovault.org/collections/8836/,
a valid contrast could be "as-Animal", which will be called "animal" in the created
dataset if the contrasts argument is ``{'animal': "as-Animal"}``.
img_dir : :obj:`str` or None, optional
Base path to save all the downloaded images, by default the images
will be saved to a temporary directory with the prefix "neurovault"
map_type_conversion : :obj:`dict` or None, optional
Dictionary whose keys are what you expect the `map_type` name to
be in neurovault and the values are the name of the respective
statistic map in a nimare dataset. Default = None.
**dset_kwargs : keyword arguments passed to Dataset
Keyword arguments to pass in when creating the Dataset object.
see :obj:`~nimare.dataset.Dataset` for details.
Returns
-------
:obj:`~nimare.dataset.Dataset`
Dataset object containing experiment information from neurovault.
"""
dataset = convert_neurovault_to_dataset(
collection_ids, contrasts, img_dir, map_type_conversion, **dset_kwargs
)
transformer = ImageTransformer(target="z")
dataset = transformer.transform(dataset)
return dataset
def _create_source(foci, sample_sizes, space="MNI"):
"""Create dictionary according to nimads(ish) specification.
.. versionadded:: 0.0.4
Parameters
----------
foci : :obj:`dict`
A dictionary of foci in xyz (mm) coordinates whose keys represent
different studies.
sample_sizes : :obj:`list`
The sample size for each study
space : :obj:`str`
The template space the coordinates are reported in. (Default='MNI')
Returns
-------
source : :obj:`dict`
study information in nimads format
"""
source = {}
for sample_size, (study, study_foci) in zip(sample_sizes, foci.items()):
source[f"study-{study}"] = {
"contrasts": {
"1": {
"coords": {
"space": space,
"x": [c[0] for c in study_foci],
"y": [c[1] for c in study_foci],
"z": [c[2] for c in study_foci],
},
"metadata": {"sample_sizes": [sample_size]},
}
}
}
return source
def _create_foci(foci, foci_percentage, fwhm, n_studies, n_noise_foci, rng, space):
"""Generate study specific foci.
.. versionadded:: 0.0.4
Parameters
----------
foci : :obj:`int` or :obj:`list`
The number of foci to be generated per study or the
x,y,z coordinates of the ground truth foci.
foci_percentage : :obj:`float`
Percentage of studies where the foci appear.
fwhm : :obj:`float`
Full width at half maximum (fwhm) to define the probability
spread of the foci.
n_studies : :obj:`int`
Number of n_studies to generate.
n_noise_foci : :obj:`int`
Number of foci considered to be noise in each study.
rng : :class:`numpy.random.RandomState`
Random state to reproducibly initialize random numbers.
space : :obj:`str`
The template space the coordinates are reported in.
Returns
-------
ground_truth_foci : :obj:`list`
List of 3-item tuples containing x, y, z coordinates
of the ground truth foci or an empty list if
there are no ground_truth_foci.
foci_dict : :obj:`dict`
Dictionary with keys representing the study, and
whose values represent the study specific foci.
"""
# convert foci_percentage to float between 0 and 1
if isinstance(foci_percentage, str) and foci_percentage[-1] == "%":
foci_percentage = float(foci_percentage[:-1]) / 100
if space == "MNI":
template_img = get_template(space="mni152_2mm", mask="brain")
# use a template to find all "valid" coordinates
template_data = template_img.get_fdata()
possible_ijks = np.argwhere(template_data)
# number of "convergent" foci each study should report
if isinstance(foci, int):
foci_idxs = np.unique(rng.choice(range(possible_ijks.shape[0]), foci, replace=True))
# if there are no foci_idxs, give a dummy coordinate (0, 0, 0)
ground_truth_foci_ijks = possible_ijks[foci_idxs] if foci_idxs.size else np.array([[]])
elif isinstance(foci, list):
ground_truth_foci_ijks = np.array([mm2vox(coord, template_img.affine) for coord in foci])
# create a probability map for each peak
kernel = get_ale_kernel(template_img, fwhm)[1]
foci_prob_maps = {
tuple(peak): compute_ale_ma(template_img, np.atleast_2d(peak), kernel=kernel).reshape(
template_data.shape
)
for peak in ground_truth_foci_ijks
if peak.size
}
# get study specific instances of each foci
signal_studies = int(round(foci_percentage * n_studies))
signal_ijks = {
peak: sparse.argwhere(prob_map)[
rng.choice(
sparse.argwhere(prob_map).shape[0],
size=signal_studies,
replace=True,
p=(prob_map[prob_map.nonzero()] / sum(prob_map[prob_map.nonzero()])).todense(),
)
]
for peak, prob_map in foci_prob_maps.items()
}
# reshape foci coordinates to be study specific
paired_signal_ijks = (
np.transpose(np.array(list(signal_ijks.values())), axes=(1, 0, 2))
if signal_ijks
else (None,)
)
foci_dict = {}
for study_signal_ijks, study in zip_longest(paired_signal_ijks, range(n_studies)):
if study_signal_ijks is None:
study_signal_ijks = np.array([[]])
n_noise_foci = max(1, n_noise_foci)
if n_noise_foci > 0:
noise_ijks = possible_ijks[
rng.choice(possible_ijks.shape[0], n_noise_foci, replace=True)
]
# add the noise foci ijks to the existing signal ijks
foci_ijks = (
np.unique(np.vstack([study_signal_ijks, noise_ijks]), axis=0)
if np.any(study_signal_ijks)
else noise_ijks
)
else:
foci_ijks = study_signal_ijks
# transform ijk voxel coordinates to xyz mm coordinates
foci_xyzs = [vox2mm(ijk, template_img.affine) for ijk in foci_ijks]
foci_dict[study] = foci_xyzs
ground_truth_foci_xyz = [
tuple(vox2mm(ijk, template_img.affine)) for ijk in ground_truth_foci_ijks if np.any(ijk)
]
return ground_truth_foci_xyz, foci_dict
def _array_like(obj):
"""Test if obj is array-like."""
return isinstance(obj, (list, tuple, np.ndarray))