-
Notifications
You must be signed in to change notification settings - Fork 21
/
confounds.py
555 lines (462 loc) · 19.4 KB
/
confounds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""Confound matrix selection based on Ciric et al. 2007."""
import json
import os
import warnings
import numpy as np
import pandas as pd
from nilearn.interfaces.fmriprep.load_confounds import _load_single_confounds_file
from nipype import logging
from scipy.signal import butter, filtfilt, iirnotch
from xcp_d.utils.doc import fill_doc
LOGGER = logging.getLogger("nipype.utils")
@fill_doc
def load_motion(
confounds_df,
TR,
motion_filter_type=None,
band_stop_min=None,
band_stop_max=None,
motion_filter_order=4,
):
"""Load the six basic motion regressors (three rotations, three translations).
Parameters
----------
confounds_df : pandas.DataFrame
The confounds DataFrame from which to extract the six basic motion regressors.
%(TR)s
%(motion_filter_type)s
If "lp" or "notch", that filtering will be done in this function.
Otherwise, no filtering will be applied.
%(band_stop_min)s
%(band_stop_max)s
%(motion_filter_order)s
Returns
-------
motion_confounds_df : pandas.DataFrame
The six motion regressors.
The three rotations are listed first, then the three translations.
References
----------
.. footbibliography::
"""
if motion_filter_type not in ("lp", "notch", None):
raise ValueError(f"Motion filter type '{motion_filter_type}' not supported.")
# Select the motion columns from the overall confounds DataFrame
motion_confounds_df = confounds_df[
["rot_x", "rot_y", "rot_z", "trans_x", "trans_y", "trans_z"]
]
# Apply LP or notch filter
if motion_filter_type in ("lp", "notch"):
motion_confounds = motion_regression_filter(
data=motion_confounds_df.to_numpy(),
TR=TR,
motion_filter_type=motion_filter_type,
band_stop_min=band_stop_min,
band_stop_max=band_stop_max,
motion_filter_order=motion_filter_order,
)
motion_confounds_df = pd.DataFrame(
data=motion_confounds,
columns=motion_confounds_df.columns,
)
# Volterra expansion
# Ignore pandas SettingWithCopyWarning
with pd.option_context("mode.chained_assignment", None):
columns = motion_confounds_df.columns.tolist()
for col in columns:
new_col = f"{col}_derivative1"
motion_confounds_df[new_col] = motion_confounds_df[col].diff()
columns = motion_confounds_df.columns.tolist()
for col in columns:
new_col = f"{col}_power2"
motion_confounds_df[new_col] = motion_confounds_df[col] ** 2
return motion_confounds_df
@fill_doc
def get_custom_confounds(custom_confounds_folder, fmriprep_confounds_file):
"""Identify a custom confounds file.
Parameters
----------
%(custom_confounds_folder)s
%(fmriprep_confounds_file)s
We expect the custom confounds file to have the same name.
Returns
-------
%(custom_confounds_file)s
"""
import os
if custom_confounds_folder is None:
return None
if not os.path.isdir(custom_confounds_folder):
raise FileNotFoundError(
f"Custom confounds location does not exist: {custom_confounds_folder}"
)
custom_confounds_filename = os.path.basename(fmriprep_confounds_file)
custom_confounds_file = os.path.abspath(
os.path.join(
custom_confounds_folder,
custom_confounds_filename,
)
)
if not os.path.isfile(custom_confounds_file):
raise FileNotFoundError(f"Custom confounds file not found: {custom_confounds_file}")
return custom_confounds_file
def _get_acompcor_confounds(confounds_file):
confounds_df = pd.read_table(confounds_file)
csf_compcor_columns = [c for c in confounds_df.columns if c.startswith("c_comp_cor")]
wm_compcor_columns = [c for c in confounds_df.columns if c.startswith("w_comp_cor")]
if not csf_compcor_columns:
raise ValueError(f"No c_comp_cor columns in {confounds_file}")
if not wm_compcor_columns:
raise ValueError(f"No w_comp_cor columns in {confounds_file}")
csf_compcor_columns = csf_compcor_columns[: min((5, len(csf_compcor_columns)))]
wm_compcor_columns = wm_compcor_columns[: min((5, len(wm_compcor_columns)))]
selected_columns = csf_compcor_columns + wm_compcor_columns
return confounds_df[selected_columns]
@fill_doc
def load_confound_matrix(
params,
img_file,
confounds_file,
confounds_json_file,
custom_confounds=None,
):
"""Load a subset of the confounds associated with a given file.
Parameters
----------
%(params)s
img_file : :obj:`str`
The path to the bold file. Used to load the AROMA mixing matrix, if necessary.
confounds_file : :obj:`str`
The fMRIPrep confounds file. Used to load most confounds.
confounds_json_file : :obj:`str`
The JSON file associated with the fMRIPrep confounds file.
custom_confounds : :obj:`str` or None, optional
Custom confounds TSV if there is one. Default is None.
Returns
-------
confounds_df : :obj:`pandas.DataFrame` or None
The loaded and selected confounds.
If "AROMA" is requested, then this DataFrame will include signal components as well.
These will be named something like "signal_[XX]".
If ``params`` is "none", ``confounds_df`` will be None.
confounds_metadata : :obj:`dict`
Metadata for the columns in the confounds file.
"""
PARAM_KWARGS = {
# Get rot and trans values, as well as derivatives and square
"24P": {
"strategy": ["motion"],
"motion": "full",
},
# Get rot and trans values, as well as derivatives and square, WM, CSF,
"27P": {
"strategy": ["motion", "global_signal", "wm_csf"],
"motion": "full",
"global_signal": "basic",
"wm_csf": "basic",
},
# Get rot and trans values, as well as derivatives, WM, CSF,
# global signal, and square. Add the square and derivative of the WM, CSF
# and global signal as well.
"36P": {
"strategy": ["motion", "global_signal", "wm_csf"],
"motion": "full",
"global_signal": "full",
"wm_csf": "full",
},
# Get the rot and trans values, their derivative,
# as well as acompcor and cosine
"acompcor": {
"strategy": ["motion", "high_pass", "compcor"],
"motion": "derivatives",
"compcor": "anat_separated",
"n_compcor": 5,
},
# Get the rot and trans values, as well as their derivative,
# acompcor and cosine values as well as global signal
"acompcor_gsr": {
"strategy": ["motion", "high_pass", "compcor", "global_signal"],
"motion": "derivatives",
"compcor": "anat_separated",
"global_signal": "basic",
"n_compcor": 5,
},
# Get WM and CSF
# AROMA confounds are loaded separately
"aroma": {
"strategy": ["wm_csf"],
"wm_csf": "basic",
},
# Get WM, CSF, and global signal
# AROMA confounds are loaded separately
"aroma_gsr": {
"strategy": ["wm_csf", "global_signal"],
"wm_csf": "basic",
"global_signal": "basic",
},
# Get global signal only
"gsr_only": {
"strategy": ["global_signal"],
"global_signal": "basic",
},
}
if params == "none":
return None, {}
if params in PARAM_KWARGS:
kwargs = PARAM_KWARGS[params]
confounds_df = _load_single_confounds_file(
confounds_file=confounds_file,
demean=False,
confounds_json_file=confounds_json_file,
**kwargs,
)[1]
elif params == "custom":
# For custom confounds with no other confounds
confounds_df = pd.read_table(custom_confounds, sep="\t")
else:
raise ValueError(f"Unrecognized parameter string '{params}'")
# A workaround for the compcor bug in load_confounds with fMRIPrep v22+
if "acompcor" in params and all("comp_cor" not in col for col in confounds_df.columns):
LOGGER.warning("No aCompCor confounds detected with load_confounds. Extracting manually.")
confounds_df = pd.concat((_get_acompcor_confounds(confounds_file), confounds_df), axis=1)
if "aroma" in params:
ica_mixing_matrix = _get_mixing_matrix(img_file)
aroma_noise_comps_idx = _get_aroma_noise_comps(img_file)
labeled_ica_mixing_matrix = _label_mixing_matrix(ica_mixing_matrix, aroma_noise_comps_idx)
confounds_df = pd.concat([confounds_df, labeled_ica_mixing_matrix], axis=1)
if params != "custom" and custom_confounds is not None:
# For both custom and fMRIPrep confounds
custom_confounds_df = pd.read_table(custom_confounds, sep="\t")
confounds_df = pd.concat([custom_confounds_df, confounds_df], axis=1)
with open(confounds_json_file, "r") as fo:
full_confounds_metadata = json.load(fo)
confounds_metadata = {
k: v for k, v in full_confounds_metadata.items() if k in confounds_df.columns
}
return confounds_df, confounds_metadata
def _get_mixing_matrix(img_file):
"""Find AROMA (i.e., MELODIC) mixing matrix file for a given BOLD file."""
suffix = "_space-" + img_file.split("space-")[1]
mixing_candidates = [
img_file.replace(suffix, "_desc-MELODIC_mixing.tsv"),
]
mixing_file = [cr for cr in mixing_candidates if os.path.isfile(cr)]
if not mixing_file:
raise FileNotFoundError(f"Could not find mixing matrix for {img_file}")
return mixing_file[0]
def _get_aroma_noise_comps(img_file):
"""Find AROMA noise components file for a given BOLD file."""
suffix = "_space-" + img_file.split("space-")[1]
index_candidates = [
img_file.replace(suffix, "_AROMAnoiseICs.csv"),
]
index_file = [cr for cr in index_candidates if os.path.isfile(cr)]
if not index_file:
raise FileNotFoundError(f"Could not find AROMAnoiseICs file for {img_file}")
return index_file[0]
def _label_mixing_matrix(mixing_file, noise_index_file):
"""Prepend 'signal__' to any non-noise components in AROMA mixing matrix."""
mixing_matrix = np.loadtxt(mixing_file, delimiter="\t")
noise_index = np.loadtxt(noise_index_file, delimiter=",", dtype=int)
# shift noise index to start with zero
noise_index -= 1
all_index = np.arange(mixing_matrix.shape[1], dtype=int)
signal_index = np.setdiff1d(all_index, noise_index)
noise_components = mixing_matrix[:, noise_index]
signal_components = mixing_matrix[:, signal_index]
# basing naming convention on fMRIPrep confounds column names
noise_component_names = [f"aroma_motion_{i:03g}" for i in noise_index]
signal_component_names = [f"signal__aroma_signal_{i:03g}" for i in signal_index]
noise_component_df = pd.DataFrame(noise_components, columns=noise_component_names)
signal_component_df = pd.DataFrame(signal_components, columns=signal_component_names)
component_df = pd.concat((noise_component_df, signal_component_df), axis=1)
return component_df
@fill_doc
def motion_regression_filter(
data,
TR,
motion_filter_type,
band_stop_min,
band_stop_max,
motion_filter_order=4,
):
"""Filter translation and rotation motion parameters.
Parameters
----------
data : (T, R) numpy.ndarray
Data to filter. T = time, R = motion regressors
The filter will be applied independently to each variable, across time.
%(TR)s
%(motion_filter_type)s
If not "notch" or "lp", an exception will be raised.
%(band_stop_min)s
%(band_stop_max)s
%(motion_filter_order)s
Returns
-------
data : (T, R) numpy.ndarray
Filtered data. Same shape as the original data.
Notes
-----
Low-pass filtering (``motion_filter_type = "lp"``) is performed with a Butterworth filter,
as in :footcite:t:`gratton2020removal`.
The order of the Butterworth filter is determined by ``motion_filter_order``,
although the original paper used a first-order filter.
The original paper also used zero-padding with a padding size of 100.
We use constant-padding, with the default padding size determined by
:func:`scipy.signal.filtfilt`.
Band-stop filtering (``motion_filter_type = "notch"``) is performed with a notch filter,
as in :footcite:t:`fair2020correction`.
This filter uses the mean of the stopband frequencies as the target frequency,
and the range between the two frequencies as the bandwidth.
The filter is applied with constant-padding, using the default padding size determined by
:func:`scipy.signal.filtfilt`.
References
----------
.. footbibliography::
"""
if motion_filter_type not in ("lp", "notch"):
raise ValueError(f"Motion filter type '{motion_filter_type}' not supported.")
lowpass_hz = band_stop_min / 60
sampling_frequency = 1 / TR
if motion_filter_type == "lp": # low-pass filter
b, a = butter(
motion_filter_order / 2,
lowpass_hz,
btype="lowpass",
output="ba",
fs=sampling_frequency,
)
filtered_data = filtfilt(b, a, data, axis=0, padtype="constant", padlen=data.shape[0] - 1)
else: # notch filter
highpass_hz = band_stop_max / 60
stopband_hz = np.array([lowpass_hz, highpass_hz])
# Convert stopband to a single notch frequency.
freq_to_remove = np.mean(stopband_hz)
bandwidth = np.abs(np.diff(stopband_hz))
# Create filter coefficients.
b, a = iirnotch(freq_to_remove, freq_to_remove / bandwidth, fs=sampling_frequency)
n_filter_applications = int(np.floor(motion_filter_order / 2))
filtered_data = data.copy()
for _ in range(n_filter_applications):
filtered_data = filtfilt(
b,
a,
filtered_data,
axis=0,
padtype="constant",
padlen=data.shape[0] - 1,
)
return filtered_data
def _modify_motion_filter(motion_filter_type, band_stop_min, band_stop_max, TR):
"""Modify the motion filter parameters based on the TR.
Parameters
----------
motion_filter_type : str
The type of motion filter to apply.
band_stop_min : float
The minimum frequency to stop in the filter, in breaths-per-minute.
band_stop_max : float
The maximum frequency to stop in the filter, in breaths-per-minute.
TR : float
The repetition time of the data.
Returns
-------
band_stop_min_adjusted : float
The adjusted low-pass filter frequency, in breaths-per-minute.
band_stop_max_adjusted : float
The adjusted high-pass filter frequency, in breaths-per-minute.
is_modified : bool
Whether the filter parameters were modified.
"""
sampling_frequency = 1 / TR
nyquist_frequency = sampling_frequency / 2
nyquist_bpm = nyquist_frequency * 60
is_modified = False
if motion_filter_type == "lp": # low-pass filter
# Remove any frequencies above band_stop_min.
assert band_stop_min is not None
assert band_stop_min > 0
if band_stop_max:
warnings.warn("The parameter 'band_stop_max' will be ignored.")
lowpass_hz = band_stop_min / 60 # change BPM to right time unit
# Adjust frequency in case Nyquist is below cutoff.
# This won't have an effect if the data have a fast enough sampling rate.
lowpass_hz_adjusted = np.abs(
lowpass_hz
- (np.floor((lowpass_hz + nyquist_frequency) / sampling_frequency))
* sampling_frequency
)
band_stop_min_adjusted = lowpass_hz_adjusted * 60 # change Hertz back to BPM
if band_stop_min_adjusted != band_stop_min:
warnings.warn(
f"Low-pass filter frequency is above Nyquist frequency ({nyquist_bpm} BPM), "
f"so it has been changed ({band_stop_min} --> {band_stop_min_adjusted} BPM)."
)
is_modified = True
band_stop_max_adjusted = None
elif motion_filter_type == "notch": # notch filter
# Retain any frequencies *outside* the band_stop_min-band_stop_max range.
assert band_stop_max is not None
assert band_stop_min is not None
assert band_stop_max > 0
assert band_stop_min > 0
assert band_stop_min < band_stop_max, f"{band_stop_min} >= {band_stop_max}"
stopband = np.array([band_stop_min, band_stop_max])
stopband_hz = stopband / 60 # change BPM to Hertz
# Adjust frequencies in case Nyquist is within/below band.
# This won't have an effect if the data have a fast enough sampling rate.
stopband_hz_adjusted = np.abs(
stopband_hz
- (np.floor((stopband_hz + nyquist_frequency) / sampling_frequency))
* sampling_frequency
)
stopband_adjusted = stopband_hz_adjusted * 60 # change Hertz back to BPM
if not np.array_equal(stopband_adjusted, stopband):
warnings.warn(
f"One or both filter frequencies are above Nyquist frequency ({nyquist_bpm} BPM), "
"so they have been changed "
f"({stopband[0]} --> {stopband_adjusted[0]}, "
f"{stopband[1]} --> {stopband_adjusted[1]} BPM)."
)
is_modified = True
band_stop_min_adjusted, band_stop_max_adjusted = stopband_adjusted
else:
band_stop_min_adjusted, band_stop_max_adjusted, is_modified = None, None, False
return band_stop_min_adjusted, band_stop_max_adjusted, is_modified
def _infer_dummy_scans(dummy_scans, confounds_file=None):
"""Attempt to determine the number of dummy scans from the confounds file.
This function expects non-steady-state volumes flagged by the preprocessing pipeline
to be indicated in columns starting with "non_steady_state_outlier" in the confounds file.
Parameters
----------
dummy_scans : "auto" or int
The number of dummy scans.
If an integer, this will be returned without modification.
If "auto", the confounds file will be loaded and the number of non-steady-state
volumes estimated by the preprocessing workflow will be determined.
confounds_file : None or str
Path to the confounds TSV file from the preprocessing pipeline.
Only used if dummy_scans is "auto".
Default is None.
Returns
-------
dummy_scans : int
Estimated number of dummy scans.
"""
if dummy_scans == "auto":
confounds_df = pd.read_table(confounds_file)
nss_cols = [c for c in confounds_df.columns if c.startswith("non_steady_state_outlier")]
if nss_cols:
initial_volumes_df = confounds_df[nss_cols]
dummy_scans = np.any(initial_volumes_df.to_numpy(), axis=1)
dummy_scans = np.where(dummy_scans)[0]
# reasonably assumes all NSS volumes are contiguous
dummy_scans = int(dummy_scans[-1] + 1)
LOGGER.info(f"Found {dummy_scans} dummy scans in {os.path.basename(confounds_file)}")
else:
LOGGER.warning(f"No non-steady-state outliers found in {confounds_file}")
dummy_scans = 0
return dummy_scans