/
kernel.py
511 lines (429 loc) · 19.8 KB
/
kernel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
"""Kernel transformers for CBMA algorithms.
Methods for estimating thresholded cluster maps from neuroimaging contrasts
(Contrasts) from sets of foci and optional additional information (e.g., sample
size and test statistic values).
"""
from __future__ import division
import logging
import os
import warnings
from hashlib import md5
import nibabel as nib
import numpy as np
import pandas as pd
from nilearn import image
from .. import references
from ..base import Transformer
from ..due import due
from ..utils import (
_add_metadata_to_dataframe,
_safe_transform,
mm2vox,
use_memmap,
vox2mm,
)
from .utils import compute_ale_ma, compute_kda_ma, compute_p2m_ma, get_ale_kernel
LGR = logging.getLogger(__name__)
class KernelTransformer(Transformer):
"""Base class for modeled activation-generating methods in :mod:`~nimare.meta.kernel`.
Coordinate-based meta-analyses leverage coordinates reported in
neuroimaging papers to simulate the thresholded statistical maps from the
original analyses. This generally involves convolving each coordinate with
a kernel (typically a Gaussian or binary sphere) that may be weighted based
on some additional measure, such as statistic value or sample size.
Notes
-----
All extra (non-ijk) parameters for a given kernel should be overrideable as
parameters to __init__, so we can access them with get_params() and also
apply them to datasets with missing data.
"""
def _infer_names(self, **kwargs):
"""Determine filename pattern and image type for files created with this transformer.
The parameters used to construct the filenames come from the transformer's
parameters (attributes saved in ``__init__()``).
Parameters
----------
**kwargs
Additional key/value pairs to incorporate into the image name.
A common example is the hash for the target template's affine.
Attributes
----------
filename_pattern : str
Filename pattern for images that will be saved by the transformer.
image_type : str
Name of the corresponding column in the Dataset.images DataFrame.
"""
params = self.get_params()
params = dict(**params, **kwargs)
# Determine names for kernel-specific files
keys = sorted(params.keys())
param_str = "_".join(f"{k}-{str(params[k])}" for k in keys)
self.filename_pattern = (
f"study-[[id]]_{param_str}_{self.__class__.__name__}.nii.gz".replace(
"[[", "{"
).replace("]]", "}")
)
self.image_type = f"{param_str}_{self.__class__.__name__}"
@use_memmap(LGR)
def transform(self, dataset, masker=None, return_type="image"):
"""Generate modeled activation images for each Contrast in dataset.
Parameters
----------
dataset : :obj:`~nimare.dataset.Dataset` or :obj:`pandas.DataFrame`
Dataset for which to make images. Can be a DataFrame if necessary.
masker : img_like or None, optional
Mask to apply to MA maps. Required if ``dataset`` is a DataFrame.
If None (and ``dataset`` is a Dataset), the Dataset's masker attribute will be used.
Default is None.
return_type : {'array', 'image', 'dataset'}, optional
Whether to return a numpy array ('array'), a list of niimgs ('image'),
or a Dataset with MA images saved as files ('dataset').
Default is 'image'.
Returns
-------
imgs : (C x V) :class:`numpy.ndarray` or :obj:`list` of :class:`nibabel.Nifti1Image` \
or :class:`~nimare.dataset.Dataset`
If return_type is 'array', a 2D numpy array (C x V), where C is
contrast and V is voxel.
If return_type is 'image', a list of modeled activation images
(one for each of the Contrasts in the input dataset).
If return_type is 'dataset', a new Dataset object with modeled
activation images saved to files and referenced in the
Dataset.images attribute.
Attributes
----------
filename_pattern : str
Filename pattern for MA maps that will be saved by the transformer.
image_type : str
Name of the corresponding column in the Dataset.images DataFrame.
"""
if return_type not in ("array", "image", "dataset"):
raise ValueError('Argument "return_type" must be "image", "array", or "dataset".')
if isinstance(dataset, pd.DataFrame):
assert (
masker is not None
), "Argument 'masker' must be provided if dataset is a DataFrame."
mask = masker.mask_img
coordinates = dataset.copy()
assert (
return_type != "dataset"
), "Input dataset must be a Dataset if return_type='dataset'."
# Calculate IJK. Must assume that the masker is in same space,
# but has different affine, from original IJK.
coordinates[["i", "j", "k"]] = mm2vox(coordinates[["x", "y", "z"]], mask.affine)
else:
masker = dataset.masker if not masker else masker
mask = masker.mask_img
coordinates = dataset.coordinates.copy()
# Determine MA map filenames. Must happen after parameters are set.
self._infer_names(affine=md5(mask.affine).hexdigest())
# Check for existing MA maps
# Use coordinates to get IDs instead of Dataset.ids bc of possible
# mismatch between full Dataset and contrasts with coordinates.
if self.image_type in dataset.images.columns:
files = dataset.get_images(ids=coordinates["id"].unique(), imtype=self.image_type)
if all(f is not None for f in files):
LGR.debug("Files already exist. Using them.")
if return_type == "array":
masked_data = _safe_transform(files, masker)
return masked_data
elif return_type == "image":
return [nib.load(f) for f in files]
elif return_type == "dataset":
return dataset.copy()
# Calculate IJK
if not np.array_equal(mask.affine, dataset.masker.mask_img.affine):
LGR.warning("Mask affine does not match Dataset affine. Assuming same space.")
coordinates[["i", "j", "k"]] = mm2vox(coordinates[["x", "y", "z"]], mask.affine)
# Add any metadata the Transformer might need to the coordinates DataFrame
# This approach is probably inferior to one which uses a _required_inputs attribute
# (like the MetaEstimators), but it should work just fine as long as individual
# requirements are written in here.
if (
hasattr(self, "sample_size")
and (self.sample_size is None)
and ("sample_size" not in coordinates.columns)
):
coordinates = _add_metadata_to_dataframe(
dataset,
coordinates,
metadata_field="sample_sizes",
target_column="sample_size",
filter_func=np.mean,
)
# Generate the MA maps if they weren't already available as images
if return_type == "array":
mask_data = mask.get_fdata().astype(bool)
elif return_type == "image":
dtype = type(self.value) if hasattr(self, "value") else float
mask_data = mask.get_fdata().astype(dtype)
elif return_type == "dataset":
if dataset.basepath is None:
raise ValueError(
"Dataset output path is not set. Set the path with Dataset.update_path()."
)
elif not os.path.isdir(dataset.basepath):
raise ValueError(
"Output directory does not exist. Set the path to an existing folder with "
"Dataset.update_path()."
)
dataset = dataset.copy()
transformed_maps = self._transform(mask, coordinates)
# This will be a numpy.ndarray or numpy.memmap if the kernel is an (M)KDAKernel or
# memory_limit is set, respectively.
if not isinstance(transformed_maps[0], (list, tuple)):
if return_type == "array":
ma_arr = transformed_maps[0][:, mask_data]
# If this array is a memmap, then the file needs to be closed
if isinstance(transformed_maps[0], np.memmap):
LGR.debug(f"Closing memmap at {transformed_maps[0].filename}")
transformed_maps[0]._mmap.close()
return ma_arr
else:
# Transform into an length-N list of length-2 tuples,
# composed of a 3D array/memmap and a string with the ID.
transformed_maps = list(zip(*transformed_maps))
imgs = []
for (kernel_data, id_) in transformed_maps:
if isinstance(kernel_data, np.memmap):
# Convert data to a numpy array if it's a memmap
kernel_data = np.array(kernel_data)
if return_type == "array":
# NOTE: This will never be a memmap because memory_limit[!None]+return_type[array]
# is dealt with above.
img = kernel_data[mask_data]
imgs.append(img)
elif return_type == "image":
kernel_data *= mask_data
img = nib.Nifti1Image(kernel_data, mask.affine)
imgs.append(img)
elif return_type == "dataset":
img = nib.Nifti1Image(kernel_data, mask.affine)
out_file = os.path.join(dataset.basepath, self.filename_pattern.format(id=id_))
img.to_filename(out_file)
dataset.images.loc[dataset.images["id"] == id_, self.image_type] = out_file
# If this array is a memmap, then the file needs to be closed
if isinstance(transformed_maps[0][0], np.memmap):
LGR.debug(f"Closing memmap at {transformed_maps[0][0].filename}")
transformed_maps[0][0]._mmap.close()
del kernel_data, transformed_maps
if return_type == "array":
return np.vstack(imgs)
elif return_type == "image":
return imgs
elif return_type == "dataset":
# Replace NaNs with Nones
dataset.images[self.image_type] = dataset.images[self.image_type].where(
dataset.images[self.image_type].notnull(), None
)
# Infer relative path
dataset.images = dataset.images
return dataset
def _transform(self, mask, coordinates):
"""Apply the kernel's unique transformer.
Parameters
----------
mask : niimg-like
Mask image. Should contain binary-like integer data.
coordinates : pandas.DataFrame
DataFrame containing IDs and coordinates.
The DataFrame must have the following columns: "id", "i", "j", "k".
Additionally, individual kernels may require other columns
(e.g., "sample_size" for ALE).
Returns
-------
transformed_maps : N-length list of (3D array, str) tuples or (4D array, 1D array) tuple \
or (4D memmap, 1D array) tuple
Transformed data, containing one element for each study.
- Case 1: A kernel that is not an (M)KDAKernel, with no memory limit.
Each list entry is composed of a 3D array (the MA map) and the study's ID.
- Case 2: (M)KDAKernel, with no memory limit.
There is a length-2 tuple with a 4D numpy array of the shape (N, X, Y, Z),
containing all of the MA maps, and a numpy array of shape (N,) with the study IDs.
- Case 3: Any kernel, with memory_limit set, so memmaps will be used.
There is a length-2 tuple with a 4D memmap array of the shape (N, X, Y, Z),
containing all of the MA maps, and a numpy array of shape (N,) with the study IDs.
"""
pass
@due.dcite(
references.ALE2,
description=(
"Modifies ALE algorithm to eliminate within-experiment "
"effects and generate MA maps based on subject group "
"instead of experiment."
),
)
class ALEKernel(KernelTransformer):
"""Generate ALE modeled activation images from coordinates and sample size.
By default (if neither ``fwhm`` nor ``sample_size`` is provided), the FWHM of the kernel
will be determined on a study-wise basis based on the sample sizes available in the input,
via the method described in [1]_.
.. versionchanged:: 0.0.8
* [ENH] Add low-memory option for kernel transformers.
Parameters
----------
fwhm : :obj:`float`, optional
Full-width half-max for Gaussian kernel, if you want to have a
constant kernel across Contrasts. Mutually exclusive with
``sample_size``.
sample_size : :obj:`int`, optional
Sample size, used to derive FWHM for Gaussian kernel based on
formulae from Eickhoff et al. (2012). This sample size overwrites
the Contrast-specific sample sizes in the dataset, in order to hold
kernel constant across Contrasts. Mutually exclusive with ``fwhm``.
memory_limit : :obj:`str` or None, optional
Memory limit to apply to data. If None, no memory management will be applied.
Otherwise, the memory limit will be used to (1) assign memory-mapped files and
(2) restrict memory during array creation to the limit.
Default is None.
References
----------
.. [1] Eickhoff, Simon B., et al. "Activation likelihood estimation
meta-analysis revisited." Neuroimage 59.3 (2012): 2349-2361.
"""
def __init__(self, fwhm=None, sample_size=None, memory_limit=None):
if fwhm is not None and sample_size is not None:
raise ValueError('Only one of "fwhm" and "sample_size" may be provided.')
self.fwhm = fwhm
self.sample_size = sample_size
self.memory_limit = memory_limit
def _transform(self, mask, coordinates):
kernels = {} # retain kernels in dictionary to speed things up
exp_ids = coordinates["id"].unique()
if self.memory_limit:
# Use a memmapped 4D array
transformed_shape = (len(exp_ids),) + mask.shape
transformed = np.memmap(
self.memmap_filenames[0],
dtype=float,
mode="w+",
shape=transformed_shape,
)
else:
# Use a list of tuples
transformed = []
for i_exp, id_ in enumerate(exp_ids):
data = coordinates.loc[coordinates["id"] == id_]
ijk = np.vstack((data.i.values, data.j.values, data.k.values)).T.astype(int)
if self.sample_size is not None:
sample_size = self.sample_size
elif self.fwhm is None:
# Extract from input
sample_size = data.sample_size.astype(float).values[0]
if self.fwhm is not None:
assert np.isfinite(self.fwhm), "FWHM must be finite number"
if self.fwhm not in kernels.keys():
_, kern = get_ale_kernel(mask, fwhm=self.fwhm)
kernels[self.fwhm] = kern
else:
kern = kernels[self.fwhm]
else:
assert np.isfinite(sample_size), "Sample size must be finite number"
if sample_size not in kernels.keys():
_, kern = get_ale_kernel(mask, sample_size=sample_size)
kernels[sample_size] = kern
else:
kern = kernels[sample_size]
kernel_data = compute_ale_ma(mask.shape, ijk, kern)
if self.memory_limit:
transformed[i_exp, :, :, :] = kernel_data
# Write changes to disk
transformed.flush()
else:
transformed.append((kernel_data, id_))
if self.memory_limit:
return transformed, exp_ids
else:
return transformed
class KDAKernel(KernelTransformer):
"""Generate KDA modeled activation images from coordinates.
.. versionchanged:: 0.0.8
* [ENH] Add low-memory option for kernel transformers.
Parameters
----------
r : :obj:`int`, optional
Sphere radius, in mm.
value : :obj:`int`, optional
Value for sphere.
memory_limit : :obj:`str` or None, optional
Memory limit to apply to data. If None, no memory management will be applied.
Otherwise, the memory limit will be used to (1) assign memory-mapped files and
(2) restrict memory during array creation to the limit.
Default is None.
"""
_sum_overlap = True
def __init__(self, r=10, value=1, memory_limit=None):
self.r = float(r)
self.value = value
self.memory_limit = memory_limit
def _transform(self, mask, coordinates):
dims = mask.shape
vox_dims = mask.header.get_zooms()
ijks = coordinates[["i", "j", "k"]].values
exp_idx = coordinates["id"].values
transformed = compute_kda_ma(
dims,
vox_dims,
ijks,
self.r,
self.value,
exp_idx,
sum_overlap=self._sum_overlap,
memory_limit=self.memory_limit,
memmap_filename=self.memmap_filenames[0],
)
exp_ids = np.unique(exp_idx)
return transformed, exp_ids
class MKDAKernel(KDAKernel):
"""Generate MKDA modeled activation images from coordinates.
.. versionchanged:: 0.0.8
* [ENH] Add low-memory option for kernel transformers.
Parameters
----------
r : :obj:`int`, optional
Sphere radius, in mm.
value : :obj:`int`, optional
Value for sphere.
memory_limit : :obj:`str` or None, optional
Memory limit to apply to data. If None, no memory management will be applied.
Otherwise, the memory limit will be used to (1) assign memory-mapped files and
(2) restrict memory during array creation to the limit.
Default is None.
"""
_sum_overlap = False
class Peaks2MapsKernel(KernelTransformer):
"""Generate peaks2maps modeled activation images from coordinates.
.. deprecated:: 0.0.11
`Peaks2MapsKernel` will be removed in NiMARE 0.0.13.
Parameters
----------
model_dir : :obj:`str`, optional
Path to model directory. Default is "auto".
Warnings
--------
Peaks2MapsKernel is not intended for serious research.
We strongly recommend against using it for any meaningful analyses.
"""
def __init__(self, model_dir="auto"):
warnings.warn(
"Peaks2MapsKernel is deprecated, and will be removed in NiMARE version 0.0.13.",
DeprecationWarning,
)
# Use private attribute to hide value from get_params.
# get_params will find model_dir=None, which is *very important* when a path is provided.
self._model_dir = model_dir
def _transform(self, mask, coordinates):
transformed = []
coordinates_list = []
ids = []
for id_, data in coordinates.groupby("id"):
ijk = np.vstack((data.i.values, data.j.values, data.k.values)).T.astype(int)
xyz = vox2mm(ijk, mask.affine)
coordinates_list.append(xyz)
ids.append(id_)
imgs = compute_p2m_ma(coordinates_list, skip_out_of_bounds=True, model_dir=self._model_dir)
resampled_imgs = []
for img in imgs:
resampled_imgs.append(image.resample_to_img(img, mask).get_fdata())
transformed = list(zip(resampled_imgs, ids))
return transformed