/
struct.py
604 lines (495 loc) · 22.8 KB
/
struct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
"""
Downloading NeuroImaging datasets: structural datasets
"""
import warnings
import os
import numpy as np
from scipy import ndimage
from sklearn.utils import Bunch
from .utils import (_get_dataset_dir, _fetch_files,
_get_dataset_descr, _uncompress_file)
from .._utils import check_niimg, niimg
from .._utils.exceptions import VisibleDeprecationWarning
from ..image import new_img_like, get_data
_package_directory = os.path.dirname(os.path.abspath(__file__))
# Useful for the very simple examples
MNI152_FILE_PATH = os.path.join(_package_directory, "data",
"avg152T1_brain.nii.gz")
FSAVERAGE5_PATH = os.path.join(_package_directory, "data", "fsaverage5")
def fetch_icbm152_2009(data_dir=None, url=None, resume=True, verbose=1):
"""Download and load the ICBM152 template (dated 2009)
Parameters
----------
data_dir: string, optional
Path of the data directory. Used to force data storage in a non-
standard location. Default: None (meaning: default)
url: string, optional
Download URL of the dataset. Overwrite the default URL.
Returns
-------
data: sklearn.datasets.base.Bunch
dictionary-like object, interest keys are:
"t1", "t2", "t2_relax", "pd": anatomical images obtained with the
given modality (resp. T1, T2, T2 relaxometry and proton
density weighted). Values are file paths.
"gm", "wm", "csf": segmented images, giving resp. gray matter,
white matter and cerebrospinal fluid. Values are file paths.
"eye_mask", "face_mask", "mask": use these images to mask out
parts of mri images. Values are file paths.
References
----------
VS Fonov, AC Evans, K Botteron, CR Almli, RC McKinstry, DL Collins
and BDCG, "Unbiased average age-appropriate atlases for pediatric studies",
NeuroImage,Volume 54, Issue 1, January 2011
VS Fonov, AC Evans, RC McKinstry, CR Almli and DL Collins,
"Unbiased nonlinear average age-appropriate brain templates from birth
to adulthood", NeuroImage, Volume 47, Supplement 1, July 2009, Page S102
Organization for Human Brain Mapping 2009 Annual Meeting.
DL Collins, AP Zijdenbos, WFC Baare and AC Evans,
"ANIMAL+INSECT: Improved Cortical Structure Segmentation",
IPMI Lecture Notes in Computer Science, 1999, Volume 1613/1999, 210-223
Notes
-----
For more information about this dataset's structure:
http://www.bic.mni.mcgill.ca/ServicesAtlases/ICBM152NLin2009
The original download URL is
http://www.bic.mni.mcgill.ca/~vfonov/icbm/2009/mni_icbm152_nlin_sym_09a_nifti.zip
"""
if url is None:
# The URL can be retrieved from the nilearn account on OSF (Open
# Science Framework), https://osf.io/4r3jt/quickfiles/
# Clicking on the "share" button gives the root of the URL.
url = "https://osf.io/7pj92/download"
opts = {'uncompress': True}
keys = ("csf", "gm", "wm",
"pd", "t1", "t2", "t2_relax",
"eye_mask", "face_mask", "mask")
filenames = [(os.path.join("mni_icbm152_nlin_sym_09a", name), url, opts)
for name in (
"mni_icbm152_csf_tal_nlin_sym_09a.nii.gz",
"mni_icbm152_gm_tal_nlin_sym_09a.nii.gz",
"mni_icbm152_wm_tal_nlin_sym_09a.nii.gz",
"mni_icbm152_pd_tal_nlin_sym_09a.nii.gz",
"mni_icbm152_t1_tal_nlin_sym_09a.nii.gz",
"mni_icbm152_t2_tal_nlin_sym_09a.nii.gz",
"mni_icbm152_t2_relx_tal_nlin_sym_09a.nii.gz",
"mni_icbm152_t1_tal_nlin_sym_09a_eye_mask.nii.gz",
"mni_icbm152_t1_tal_nlin_sym_09a_face_mask.nii.gz",
"mni_icbm152_t1_tal_nlin_sym_09a_mask.nii.gz")]
dataset_name = 'icbm152_2009'
data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
verbose=verbose)
sub_files = _fetch_files(data_dir, filenames, resume=resume,
verbose=verbose)
fdescr = _get_dataset_descr(dataset_name)
params = dict([('description', fdescr)] + list(zip(keys, sub_files)))
return Bunch(**params)
def load_mni152_template():
"""Load skullstripped 2mm version of the MNI152 originally distributed
with FSL
Returns
-------
mni152_template: nibabel object corresponding to the template
References
----------
VS Fonov, AC Evans, K Botteron, CR Almli, RC McKinstry, DL Collins and
BDCG, Unbiased average age-appropriate atlases for pediatric studies,
NeuroImage, Volume 54, Issue 1, January 2011, ISSN 1053-8119, DOI:
10.1016/j.neuroimage.2010.07.033
VS Fonov, AC Evans, RC McKinstry, CR Almli and DL Collins, Unbiased
nonlinear average age-appropriate brain templates from birth to adulthood,
NeuroImage, Volume 47, Supplement 1, July 2009, Page S102 Organization for
Human Brain Mapping 2009 Annual Meeting, DOI: 10.1016/S1053-8119(09)70884-5
"""
return check_niimg(MNI152_FILE_PATH)
def load_mni152_brain_mask():
"""Load brain mask from MNI152 T1 template
.. versionadded:: 0.2.5
Returns
-------
mask_img: Nifti-like mask image corresponding to grey and white matter.
References
----------
Refer to load_mni152_template function for more information about the MNI152
T1 template
See Also
--------
nilearn.datasets.load_mni152_template : for details about version of the
MNI152 T1 template and related.
"""
# Load MNI template
target_img = load_mni152_template()
mask_voxels = (get_data(target_img) > 0).astype(int)
mask_img = new_img_like(target_img, mask_voxels)
return mask_img
def fetch_icbm152_brain_gm_mask(data_dir=None, threshold=0.2, resume=True,
verbose=1):
"""Downloads ICBM152 template first, then loads 'gm' mask image.
.. versionadded:: 0.2.5
Parameters
----------
data_dir: str, optional
Path of the data directory. Used to force storage in a specified
location. Defaults to None.
threshold: float, optional
The parameter which amounts to include the values in the mask image.
The values lies above than this threshold will be included. Defaults
to 0.2 (one fifth) of values.
resume: bool, optional
If True, try resuming partially downloaded data. Defaults to True.
verbose: int, optional
verbosity level (0 means no message).
Returns
-------
gm_mask_img: Nifti image
Corresponding to brain grey matter from ICBM152 template.
Notes
-----
This function relies on ICBM152 templates where we particularly pick
grey matter template and threshold the template at .2 to take one fifth
of the values. Then, do a bit post processing such as binary closing
operation to more compact mask image.
Note: It is advised to check the mask image with your own data processing.
See Also
--------
nilearn.datasets.fetch_icbm152_2009: for details regarding the ICBM152
template.
nilearn.datasets.load_mni152_template: for details about version of MNI152
template and related.
"""
# Fetching ICBM152 grey matter mask image
icbm = fetch_icbm152_2009(data_dir=data_dir, resume=resume, verbose=verbose)
gm = icbm['gm']
gm_img = check_niimg(gm)
gm_data = niimg._safe_get_data(gm_img)
# getting one fifth of the values
gm_mask = (gm_data > threshold)
gm_mask = ndimage.binary_closing(gm_mask, iterations=2)
gm_mask_img = new_img_like(gm_img, gm_mask)
return gm_mask_img
def fetch_oasis_vbm(n_subjects=None, dartel_version=True, data_dir=None,
url=None, resume=True, verbose=1):
"""Download and load Oasis "cross-sectional MRI" dataset (416 subjects).
Parameters
----------
n_subjects: int, optional
The number of subjects to load. If None is given, all the
subjects are used.
dartel_version: boolean,
Whether or not to use data normalized with DARTEL instead of standard
SPM8 normalization.
data_dir: string, optional
Path of the data directory. Used to force data storage in a specified
location. Default: None
url: string, optional
Override download URL. Used for test only (or if you setup a mirror of
the data).
resume: bool, optional
If true, try resuming download if possible
verbose: int, optional
verbosity level (0 means no message).
Returns
-------
data: Bunch
Dictionary-like object, the interest attributes are :
- 'gray_matter_maps': string list
Paths to nifti gray matter density probability maps
- 'white_matter_maps' string list
Paths to nifti white matter density probability maps
- 'ext_vars': np.recarray
Data from the .csv file with information about selected subjects
- 'data_usage_agreement': string
Path to the .txt file containing the data usage agreement.
References
----------
[1] http://www.oasis-brains.org/
[2] Open Access Series of Imaging Studies (OASIS): Cross-sectional MRI
Data in Young, Middle Aged, Nondemented, and Demented Older Adults.
Marcus, D. S and al., 2007, Journal of Cognitive Neuroscience.
Notes
-----
In the DARTEL version, original Oasis data [1] have been preprocessed
with the following steps:
1. Dimension swapping (technically required for subsequent steps)
2. Brain Extraction
3. Segmentation with SPM8
4. Normalization using DARTEL algorithm
5. Modulation
6. Replacement of NaN values with 0 in gray/white matter density maps.
7. Resampling to reduce shape and make it correspond to the shape of
the non-DARTEL data (fetched with dartel_version=False).
8. Replacement of values < 1e-4 with zeros to reduce the file size.
In the non-DARTEL version, the following steps have been performed instead:
1. Dimension swapping (technically required for subsequent steps)
2. Brain Extraction
3. Segmentation and normalization to a template with SPM8
4. Modulation
5. Replacement of NaN values with 0 in gray/white matter density maps.
An archive containing the gray and white matter density probability maps
for the 416 available subjects is provided. Gross outliers are removed and
filtered by this data fetcher (DARTEL: 13 outliers; non-DARTEL: 1 outlier)
Externals variates (age, gender, estimated intracranial volume,
years of education, socioeconomic status, dementia score) are provided
in a CSV file that is a copy of the original Oasis CSV file. The current
downloader loads the CSV file and keeps only the lines corresponding to
the subjects that are actually demanded.
The Open Access Structural Imaging Series (OASIS) is a project
dedicated to making brain imaging data openly available to the public.
Using data available through the OASIS project requires agreeing with
the Data Usage Agreement that can be found at
http://www.oasis-brains.org/app/template/UsageAgreement.vm
"""
# check number of subjects
if n_subjects is None:
n_subjects = 403 if dartel_version else 415
if dartel_version: # DARTEL version has 13 identified outliers
if n_subjects > 403:
warnings.warn('Only 403 subjects are available in the '
'DARTEL-normalized version of the dataset. '
'All of them will be used instead of the wanted %d'
% n_subjects)
n_subjects = 403
else: # all subjects except one are available with non-DARTEL version
if n_subjects > 415:
warnings.warn('Only 415 subjects are available in the '
'non-DARTEL-normalized version of the dataset. '
'All of them will be used instead of the wanted %d'
% n_subjects)
n_subjects = 415
if n_subjects < 1:
raise ValueError("Incorrect number of subjects (%d)" % n_subjects)
# pick the archive corresponding to preprocessings type
if url is None:
if dartel_version:
url_images = ('https://www.nitrc.org/frs/download.php/'
'6364/archive_dartel.tgz?i_agree=1&download_now=1')
else:
url_images = ('https://www.nitrc.org/frs/download.php/'
'6359/archive.tgz?i_agree=1&download_now=1')
# covariates and license are in separate files on NITRC
url_csv = ('https://www.nitrc.org/frs/download.php/'
'6348/oasis_cross-sectional.csv?i_agree=1&download_now=1')
url_dua = ('https://www.nitrc.org/frs/download.php/'
'6349/data_usage_agreement.txt?i_agree=1&download_now=1')
else: # local URL used in tests
url_csv = url + "/oasis_cross-sectional.csv"
url_dua = url + "/data_usage_agreement.txt"
if dartel_version:
url_images = url + "/archive_dartel.tgz"
else:
url_images = url + "/archive.tgz"
opts = {'uncompress': True}
# missing subjects create shifts in subjects ids
missing_subjects = [8, 24, 36, 48, 89, 93, 100, 118, 128, 149, 154,
171, 172, 175, 187, 194, 196, 215, 219, 225, 242,
245, 248, 251, 252, 257, 276, 297, 306, 320, 324,
334, 347, 360, 364, 391, 393, 412, 414, 427, 436]
if dartel_version:
# DARTEL produces outliers that are hidden by nilearn API
removed_outliers = [27, 57, 66, 83, 122, 157, 222, 269, 282, 287,
309, 428]
missing_subjects = sorted(missing_subjects + removed_outliers)
file_names_gm = [
(os.path.join(
"OAS1_%04d_MR1",
"mwrc1OAS1_%04d_MR1_mpr_anon_fslswapdim_bet.nii.gz")
% (s, s),
url_images, opts)
for s in range(1, 457) if s not in missing_subjects][:n_subjects]
file_names_wm = [
(os.path.join(
"OAS1_%04d_MR1",
"mwrc2OAS1_%04d_MR1_mpr_anon_fslswapdim_bet.nii.gz")
% (s, s),
url_images, opts)
for s in range(1, 457) if s not in missing_subjects]
else:
# only one gross outlier produced, hidden by nilearn API
removed_outliers = [390]
missing_subjects = sorted(missing_subjects + removed_outliers)
file_names_gm = [
(os.path.join(
"OAS1_%04d_MR1",
"mwc1OAS1_%04d_MR1_mpr_anon_fslswapdim_bet.nii.gz")
% (s, s),
url_images, opts)
for s in range(1, 457) if s not in missing_subjects][:n_subjects]
file_names_wm = [
(os.path.join(
"OAS1_%04d_MR1",
"mwc2OAS1_%04d_MR1_mpr_anon_fslswapdim_bet.nii.gz")
% (s, s),
url_images, opts)
for s in range(1, 457) if s not in missing_subjects]
file_names_extvars = [("oasis_cross-sectional.csv", url_csv, {})]
file_names_dua = [("data_usage_agreement.txt", url_dua, {})]
# restrict to user-specified number of subjects
file_names_gm = file_names_gm[:n_subjects]
file_names_wm = file_names_wm[:n_subjects]
file_names = (file_names_gm + file_names_wm +
file_names_extvars + file_names_dua)
dataset_name = 'oasis1'
data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir,
verbose=verbose)
files = _fetch_files(data_dir, file_names, resume=resume,
verbose=verbose)
# Build Bunch
gm_maps = files[:n_subjects]
wm_maps = files[n_subjects:(2 * n_subjects)]
ext_vars_file = files[-2]
data_usage_agreement = files[-1]
# Keep CSV information only for selected subjects
csv_data = np.recfromcsv(ext_vars_file)
# Comparisons to recfromcsv data must be bytes.
actual_subjects_ids = [("OAS1" +
str.split(os.path.basename(x),
"OAS1")[1][:9]).encode()
for x in gm_maps]
subject_mask = np.asarray([subject_id in actual_subjects_ids
for subject_id in csv_data['id']])
csv_data = csv_data[subject_mask]
fdescr = _get_dataset_descr(dataset_name)
return Bunch(
gray_matter_maps=gm_maps,
white_matter_maps=wm_maps,
ext_vars=csv_data,
data_usage_agreement=data_usage_agreement,
description=fdescr)
def fetch_surf_fsaverage(mesh='fsaverage5', data_dir=None):
""" Download a Freesurfer fsaverage surface
Parameters
----------
mesh: str, optional (default='fsaverage5')
Which mesh to fetch.
'fsaverage5': the low-resolution fsaverage5 mesh (10242 nodes)
'fsaverage5_sphere': the low-resolution fsaverage5 spheres (10242 nodes)
'fsaverage': the high-resolution fsaverage mesh (163842 nodes)
(high-resolution fsaverage will result in
more computation time and memory usage)
data_dir: str, optional (default=None)
Path of the data directory. Used to force data storage in a specified
location.
Returns
-------
data: sklearn.datasets.base.Bunch
Dictionary-like object, the interest attributes are :
- 'pial_left': Gifti file, left hemisphere pial surface mesh
- 'pial_right': Gifti file, right hemisphere pial surface mesh
- 'infl_left': Gifti file, left hemisphere inflated pial surface mesh
- 'infl_right': Gifti file, right hemisphere inflated pial
surface mesh
- 'sulc_left': Gifti file, left hemisphere sulcal depth data
- 'sulc_right': Gifti file, right hemisphere sulcal depth data
References
----------
Fischl et al, (1999). High-resolution intersubject averaging and a
coordinate system for the cortical surface. Hum Brain Mapp 8, 272-284.
"""
meshes = {'fsaverage5': _fetch_surf_fsaverage5,
'fsaverage5_sphere': _fetch_surf_fsaverage5_sphere,
'fsaverage': _fetch_surf_fsaverage}
if mesh not in meshes:
raise ValueError(
"'mesh' should be one of {}; {!r} was provided".format(
list(meshes.keys()), mesh))
return meshes[mesh](data_dir=data_dir)
def _fetch_surf_fsaverage(data_dir=None):
"""Helper function to ship fsaverage (highest resolution) surfaces
and sulcal information with Nilearn.
The source of the data is downloaded from nitrc.
"""
dataset_dir = _get_dataset_dir('fsaverage', data_dir=data_dir)
url = 'https://www.nitrc.org/frs/download.php/10846/fsaverage.tar.gz'
if not os.path.isdir(os.path.join(dataset_dir, 'fsaverage')):
_fetch_files(dataset_dir, [('fsaverage.tar.gz', url, {})])
_uncompress_file(os.path.join(dataset_dir, 'fsaverage.tar.gz'))
result = {
name: os.path.join(dataset_dir, 'fsaverage', '{}.gii'.format(name))
for name in ['pial_right', 'sulc_right', 'sulc_left', 'pial_left']}
result['infl_left'] = os.path.join(
dataset_dir, 'fsaverage', 'inflated_left.gii')
result['infl_right'] = os.path.join(
dataset_dir, 'fsaverage', 'inflated_right.gii')
result['description'] = str(_get_dataset_descr('fsaverage'))
return Bunch(**result)
def fetch_surf_fsaverage5(data_dir=None, url=None, resume=True, verbose=1):
""" Deprecated since version 0.4.3
Use fetch_surf_fsaverage instead.
Parameters
----------
data_dir: str, optional (default=None)
Path of the data directory. Used to force data storage in a specified
location.
Returns
-------
data: sklearn.datasets.base.Bunch
Dictionary-like object, the interest attributes are :
- 'pial_left': Gifti file, left hemisphere pial surface mesh
- 'pial_right': Gifti file, right hemisphere pial surface mesh
- 'infl_left': Gifti file, left hemisphere inflated pial surface mesh
- 'infl_right': Gifti file, right hemisphere inflated pial
surface mesh
- 'sulc_left': Gifti file, left hemisphere sulcal depth data
- 'sulc_right': Gifti file, right hemisphere sulcal depth data
References
----------
Fischl et al, (1999). High-resolution intersubject averaging and a
coordinate system for the cortical surface. Hum Brain Mapp 8, 272-284.
"""
warnings.warn("fetch_surf_fsaverage5 has been deprecated and will "
"be removed in a future release. "
"Use fetch_surf_fsaverage(mesh='fsaverage5')",
VisibleDeprecationWarning, stacklevel=2)
return fetch_surf_fsaverage(mesh='fsaverage5', data_dir=data_dir)
def _fetch_surf_fsaverage5(data_dir=None, url=None, resume=True, verbose=1):
"""Helper function to ship fsaverage5 surfaces and sulcal information
with Nilearn.
The source of the data is coming from nitrc based on this PR #1016.
Manually downloaded gzipped and shipped with this function.
Shipping is done with Nilearn based on issue #1705.
"""
dataset_name = 'fsaverage5'
# Dataset description
fdescr = _get_dataset_descr(dataset_name)
# Download fsaverage surfaces and sulcal information
surface_file = '%s.%s.gii.gz'
surface_path = os.path.join(FSAVERAGE5_PATH, surface_file)
pials = []
infls = []
sulcs = []
for hemi in ['left', 'right']:
# pial
pial_path = surface_path % ('pial', hemi)
pials.append(pial_path)
# pial_inflated
pial_infl_path = surface_path % ('pial_inflated', hemi)
infls.append(pial_infl_path)
# sulcal
sulc = surface_path % ('sulc', hemi)
sulcs.append(sulc)
return Bunch(pial_left=pials[0],
pial_right=pials[1],
infl_left=infls[0],
infl_right=infls[1],
sulc_left=sulcs[0],
sulc_right=sulcs[1],
description=fdescr)
def _fetch_surf_fsaverage5_sphere(data_dir=None):
"""Helper function to ship fsaverage5 spherical meshes.
These meshes can be used for visualization purposes, but also to run
cortical surface-based searchlight decoding.
The source of the data is downloaded from OSF.
"""
fsaverage_dir = _get_dataset_dir('fsaverage', data_dir=data_dir)
dataset_dir = _get_dataset_dir('fsaverage5_sphere', data_dir=fsaverage_dir)
url = 'https://osf.io/b79fy/download'
opts = {'uncompress': True}
names = ['sphere_right', 'sphere_left']
filenames = [('{}.gii'.format(name), url, opts)
for name in names]
_fetch_files(dataset_dir, filenames)
result = {
name: os.path.join(dataset_dir, '{}.gii'.format(name))
for name in names}
result['description'] = str(_get_dataset_descr('fsaverage5_sphere'))
return Bunch(**result)