/
searchlight.py
311 lines (255 loc) · 10.7 KB
/
searchlight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
"""
The searchlight is a widely used approach for the study of the
fine-grained patterns of information in fMRI analysis, in which
multivariate statistical relationships are iteratively tested in the
neighborhood of each location of a domain.
"""
# Authors : Vincent Michel (vm.michel@gmail.com)
# Alexandre Gramfort (alexandre.gramfort@inria.fr)
# Philippe Gervais (philippe.gervais@inria.fr)
#
# License: simplified BSD
import time
import sys
import warnings
import numpy as np
from joblib import Parallel, delayed, cpu_count
from sklearn import svm
from sklearn.base import BaseEstimator
from sklearn.exceptions import ConvergenceWarning
from .. import masking
from ..image.resampling import coord_transform
from ..input_data.nifti_spheres_masker import _apply_mask_and_get_affinity
from .._utils import check_niimg_4d, fill_doc
from sklearn.model_selection import cross_val_score
ESTIMATOR_CATALOG = dict(svc=svm.LinearSVC, svr=svm.SVR)
@fill_doc
def search_light(X, y, estimator, A, groups=None, scoring=None,
cv=None, n_jobs=-1, verbose=0):
"""Function for computing a search_light
Parameters
----------
X : array-like of shape at least 2D
data to fit.
y : array-like
target variable to predict.
estimator : estimator object implementing 'fit'
object to use to fit the data
A : scipy sparse matrix.
adjacency matrix. Defines for each feature the neigbhoring features
following a given structure of the data.
groups : array-like, optional
group label for each sample for cross validation. default None
.. note::
This will have no effect for scikit learn < 0.18
scoring : string or callable, optional
The scoring strategy to use. See the scikit-learn documentation
for possible values.
If callable, it takes as arguments the fitted estimator, the
test data (X_test) and the test target (y_test) if y is
not None.
cv : cross-validation generator, optional
A cross-validation generator. If None, a 3-fold cross
validation is used or 3-fold stratified cross-validation
when y is supplied.
%(n_jobs_all)s
%(verbose0)s
Returns
-------
scores : array-like of shape (number of rows in A)
search_light scores
"""
group_iter = GroupIterator(A.shape[0], n_jobs)
with warnings.catch_warnings(): # might not converge
warnings.simplefilter('ignore', ConvergenceWarning)
scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
delayed(_group_iter_search_light)(
A.rows[list_i],
estimator, X, y, groups, scoring, cv,
thread_id + 1, A.shape[0], verbose)
for thread_id, list_i in enumerate(group_iter))
return np.concatenate(scores)
@fill_doc
class GroupIterator(object):
"""Group iterator
Provides group of features for search_light loop
that may be used with Parallel.
Parameters
----------
n_features : int
Total number of features
%(n_jobs)s
"""
def __init__(self, n_features, n_jobs=1):
self.n_features = n_features
if n_jobs == -1:
n_jobs = cpu_count()
self.n_jobs = n_jobs
def __iter__(self):
split = np.array_split(np.arange(self.n_features), self.n_jobs)
for list_i in split:
yield list_i
def _group_iter_search_light(list_rows, estimator, X, y, groups,
scoring, cv, thread_id, total, verbose=0):
"""Function for grouped iterations of search_light
Parameters
-----------
list_rows : array of arrays of int
adjacency rows. For a voxel with index i in X, list_rows[i] is the list
of neighboring voxels indices (in X).
estimator : estimator object implementing 'fit'
object to use to fit the data
X : array-like of shape at least 2D
data to fit.
y : array-like
target variable to predict.
groups : array-like, optional
group label for each sample for cross validation.
scoring : string or callable, optional
Scoring strategy to use. See the scikit-learn documentation.
If callable, takes as arguments the fitted estimator, the
test data (X_test) and the test target (y_test) if y is
not None.
cv : cross-validation generator, optional
A cross-validation generator. If None, a 3-fold cross validation is
used or 3-fold stratified cross-validation when y is supplied.
thread_id : int
process id, used for display.
total : int
Total number of voxels, used for display
verbose : int, optional
The verbosity level. Default is 0
Returns
-------
par_scores : numpy.ndarray
score for each voxel. dtype: float64.
"""
par_scores = np.zeros(len(list_rows))
t0 = time.time()
for i, row in enumerate(list_rows):
kwargs = dict()
kwargs['scoring'] = scoring
kwargs['groups'] = groups
par_scores[i] = np.mean(cross_val_score(estimator, X[:, row],
y, cv=cv, n_jobs=1,
**kwargs))
if verbose > 0:
# One can't print less than each 10 iterations
step = 11 - min(verbose, 10)
if (i % step == 0):
# If there is only one job, progress information is fixed
if total == len(list_rows):
crlf = "\r"
else:
crlf = "\n"
percent = float(i) / len(list_rows)
percent = round(percent * 100, 2)
dt = time.time() - t0
# We use a max to avoid a division by zero
remaining = (100. - percent) / max(0.01, percent) * dt
sys.stderr.write(
"Job #%d, processed %d/%d voxels "
"(%0.2f%%, %i seconds remaining)%s"
% (thread_id, i, len(list_rows), percent, remaining, crlf))
return par_scores
##############################################################################
# Class for search_light #####################################################
##############################################################################
@fill_doc
class SearchLight(BaseEstimator):
"""Implement search_light analysis using an arbitrary type of classifier.
Parameters
-----------
mask_img : Niimg-like object
See http://nilearn.github.io/manipulating_images/input_output.html
boolean image giving location of voxels containing usable signals.
process_mask_img : Niimg-like object, optional
See http://nilearn.github.io/manipulating_images/input_output.html
boolean image giving voxels on which searchlight should be
computed.
radius : float, optional
radius of the searchlight ball, in millimeters. Defaults to 2.
estimator : 'svr', 'svc', or an estimator object implementing 'fit'
The object to use to fit the data
%(n_jobs)s
scoring : string or callable, optional
The scoring strategy to use. See the scikit-learn documentation
If callable, takes as arguments the fitted estimator, the
test data (X_test) and the test target (y_test) if y is
not None.
cv : cross-validation generator, optional
A cross-validation generator. If None, a 3-fold cross
validation is used or 3-fold stratified cross-validation
when y is supplied.
%(verbose0)s
Notes
------
The searchlight [Kriegeskorte 06] is a widely used approach for the
study of the fine-grained patterns of information in fMRI analysis.
Its principle is relatively simple: a small group of neighboring
features is extracted from the data, and the prediction function is
instantiated on these features only. The resulting prediction
accuracy is thus associated with all the features within the group,
or only with the feature on the center. This yields a map of local
fine-grained information, that can be used for assessing hypothesis
on the local spatial layout of the neural code under investigation.
Nikolaus Kriegeskorte, Rainer Goebel & Peter Bandettini.
Information-based functional brain mapping.
Proceedings of the National Academy of Sciences
of the United States of America,
vol. 103, no. 10, pages 3863-3868, March 2006
"""
def __init__(self, mask_img, process_mask_img=None, radius=2.,
estimator='svc',
n_jobs=1, scoring=None, cv=None,
verbose=0):
self.mask_img = mask_img
self.process_mask_img = process_mask_img
self.radius = radius
self.estimator = estimator
self.n_jobs = n_jobs
self.scoring = scoring
self.cv = cv
self.verbose = verbose
def fit(self, imgs, y, groups=None):
"""Fit the searchlight
Parameters
----------
imgs : Niimg-like object
See http://nilearn.github.io/manipulating_images/input_output.html
4D image.
y : 1D array-like
Target variable to predict. Must have exactly as many elements as
3D images in img.
groups : array-like, optional
group label for each sample for cross validation. Must have
exactly as many elements as 3D images in img. default None
NOTE: will have no effect for scikit learn < 0.18
"""
# check if image is 4D
imgs = check_niimg_4d(imgs)
# Get the seeds
process_mask_img = self.process_mask_img
if self.process_mask_img is None:
process_mask_img = self.mask_img
# Compute world coordinates of the seeds
process_mask, process_mask_affine = masking._load_mask_img(
process_mask_img)
process_mask_coords = np.where(process_mask != 0)
process_mask_coords = coord_transform(
process_mask_coords[0], process_mask_coords[1],
process_mask_coords[2], process_mask_affine)
process_mask_coords = np.asarray(process_mask_coords).T
X, A = _apply_mask_and_get_affinity(
process_mask_coords, imgs, self.radius, True,
mask_img=self.mask_img)
estimator = self.estimator
if isinstance(estimator, str):
estimator = ESTIMATOR_CATALOG[estimator]()
scores = search_light(X, y, estimator, A, groups,
self.scoring, self.cv, self.n_jobs,
self.verbose)
scores_3D = np.zeros(process_mask.shape)
scores_3D[process_mask] = scores
self.scores_ = scores_3D
return self