From 0b33453fb299a1675dc3c48393154f8c000788d5 Mon Sep 17 00:00:00 2001 From: Remi Gau Date: Wed, 12 Apr 2023 11:21:27 +0200 Subject: [PATCH] [FIX] properly infer slice_time_ref from BIDS derivatives (#3605) * checks for tr and slice time * improve getting t_r and slice_time_ref * refactor * refactor * refactor * [DATALAD] Recorded changes * refactor * refactor * refactor * fix * add more tests * fix * use pytest fixture * add indent for readability * update changelog * Apply suggestions from code review Co-authored-by: Taylor Salo * fix tests * add verbose level to reduce number of warnings * rm type annotations * extend coverage * Apply suggestions from code review Co-authored-by: Yasmin <63292494+ymzayek@users.noreply.github.com> Co-authored-by: bthirion * [DATALAD] Recorded changes * rm type hints * rm warnings * refactor get_bids_file * make xfail strict * fix typo * infer metadata all the time * swith back to original default * use new default in most tests to reduce number of warnings * Apply suggestions from code review * update deprecation version number * update doc * add test for depreaction warning * lint * lint * add test for waning * lint * update doc * update doc * fix flake8 * add doc strings * Update nilearn/_utils/tests/test_data_gen.py Co-authored-by: Yasmin <63292494+ymzayek@users.noreply.github.com> * typo --------- Co-authored-by: Taylor Salo Co-authored-by: Yasmin <63292494+ymzayek@users.noreply.github.com> Co-authored-by: bthirion --- .flake8 | 1 + doc/changes/latest.rst | 2 + nilearn/_utils/data_gen.py | 49 ++++- nilearn/_utils/tests/test_data_gen.py | 36 ++- nilearn/glm/first_level/first_level.py | 218 +++++++++++++------ nilearn/glm/tests/test_first_level.py | 289 +++++++++++++++++++++++-- nilearn/interfaces/bids/_utils.py | 4 +- nilearn/interfaces/bids/query.py | 168 ++++++++++++-- nilearn/interfaces/tests/test_bids.py | 196 +++++++++++++++++ 9 files changed, 859 insertions(+), 104 deletions(-) diff --git a/.flake8 b/.flake8 index 71eec37228..521dd6accf 100644 --- a/.flake8 +++ b/.flake8 @@ -7,6 +7,7 @@ exclude = build, dist, nilearn/externals/tempita + nilearn_cache --select = D,E,F,W,C90 docstring-convention = numpy max-line-length = 79 diff --git a/doc/changes/latest.rst b/doc/changes/latest.rst index 9c3046db12..9beb9412d9 100644 --- a/doc/changes/latest.rst +++ b/doc/changes/latest.rst @@ -17,6 +17,8 @@ NEW Fixes ----- +- Improve how :func:`~.glm.first_level.first_level_from_bids` handles fetching slice timing metadata and add additional input validation. In release ``0.12`` the default `slice_time_ref` will be `None` instead of `0` (:gh:`3605` by `Rémi Gau`_). + - Fixes several bugs in :func:`~glm.first_level.first_level_from_bids`. Refactors :func:`~glm.first_level.first_level_from_bids` and ``nilearn._utils.data_gen.create_fake_bids_dataset``. (:gh:`3525` by `Rémi Gau`_). - Change calculation of TR in :func:`~.glm.first_level.compute_regressor` to be more precise (:gh:`3362` by `Anne-Sophie Kieslinger`_) diff --git a/nilearn/_utils/data_gen.py b/nilearn/_utils/data_gen.py index 6dcc2c7cf7..7d9001347d 100644 --- a/nilearn/_utils/data_gen.py +++ b/nilearn/_utils/data_gen.py @@ -1,15 +1,14 @@ """ Data generation utilities """ - from __future__ import annotations import itertools + import json import string from pathlib import Path -from typing import Any import numpy as np import pandas as pd @@ -751,6 +750,52 @@ def basic_confounds(length, random_state=0): return confounds +def _add_metadata_to_bids_dataset(bids_path, + metadata, + json_file=None): + """Add JSON file with specific metadata to BIDS dataset. + + Note no "BIDS validation" are performed on the metadata, + or on the file path. + + Parameters + ---------- + bids_path : :obj:`str` or :obj:`pathlib.Path` + Path to the BIDS dataset where the file is to be added. + + metadata : :obj:`dict` + Dictionary with metadata to be added to the JSON file. + + json_file : :obj:`str` or :obj:`pathlib.Path`, default=None + Path to the json file relative to the root of the BIDS dataset. + If no json_file is specified, a default path is used + that is meant to work well with the defaults of + `create_fake_bids_dataset`: + this is meant to facilitate modifying datasets used during tests. + + Returns + ------- + pathlib.Path + Full path to the json file created. + """ + if json_file is None: + json_file = ( + Path(bids_path) / + 'derivatives' / + 'sub-01' / + 'ses-01' / + 'func' / + 'sub-01_ses-01_task-main_run-01_space-MNI_desc-preproc_bold.json' + ) + else: + json_file = Path(bids_path) / json_file + + with open(json_file, 'w') as f: + json.dump(metadata, f) + + return json_file + + def generate_random_img( shape, affine=np.eye(4), diff --git a/nilearn/_utils/tests/test_data_gen.py b/nilearn/_utils/tests/test_data_gen.py index f2bae214ad..d9adcf9e70 100644 --- a/nilearn/_utils/tests/test_data_gen.py +++ b/nilearn/_utils/tests/test_data_gen.py @@ -1,8 +1,7 @@ """Tests for the data generation utilities.""" - from __future__ import annotations -from pathlib import Path +import json import numpy as np import pytest @@ -15,6 +14,39 @@ ) from nilearn.image import get_data +from nilearn._utils.data_gen import _add_metadata_to_bids_dataset + + +def test_add_metadata_to_bids_derivatives_default_path(tmp_path): + """Check the filename created is the default value \ + of _add_metadata_to_bids_dataset.""" + target_dir = tmp_path / 'derivatives' / 'sub-01' / 'ses-01' / 'func' + target_dir.mkdir(parents=True) + json_file = _add_metadata_to_bids_dataset(bids_path=tmp_path, + metadata={"foo": "bar"}) + assert json_file.exists() + assert (json_file.name == + 'sub-01_ses-01_task-main_run-01_space-MNI_desc-preproc_bold.json') + with open(json_file, 'r') as f: + metadata = json.load(f) + assert metadata == {"foo": "bar"} + + +def test_add_metadata_to_bids_derivatives_with_json_path(tmp_path): + # bare bone smoke test + target_dir = tmp_path / 'derivatives' / 'sub-02' + target_dir.mkdir(parents=True) + json_file = 'derivatives/sub-02/sub-02_task-main_bold.json' + json_file = _add_metadata_to_bids_dataset(bids_path=tmp_path, + metadata={"foo": "bar"}, + json_file=json_file) + assert json_file.exists() + assert (json_file.name == + 'sub-02_task-main_bold.json') + with open(json_file, 'r') as f: + metadata = json.load(f) + assert metadata == {"foo": "bar"} + def _bids_path_template( task, diff --git a/nilearn/glm/first_level/first_level.py b/nilearn/glm/first_level/first_level.py index b6671e5816..0f0aa5a0f8 100644 --- a/nilearn/glm/first_level/first_level.py +++ b/nilearn/glm/first_level/first_level.py @@ -7,7 +7,6 @@ from __future__ import annotations import glob -import json import os import sys import time @@ -42,6 +41,10 @@ from nilearn.image import get_data from nilearn.interfaces.bids import get_bids_files, parse_bids_filename from nilearn.interfaces.bids._utils import _bids_entities, _check_bids_label +from nilearn.interfaces.bids.query import ( + _infer_repetition_time_from_dataset, + _infer_slice_timing_start_time_from_dataset, +) from sklearn.base import clone from sklearn.cluster import KMeans @@ -248,7 +251,7 @@ class FirstLevelModel(BaseGLM): slice_time_ref : float, optional This parameter indicates the time of the reference slice used in the slice timing preprocessing step of the experimental runs. It is - expressed as a percentage of the t_r (time repetition), so it can have + expressed as a fraction of the t_r (time repetition), so it can have values between 0. and 1. Default=0. %(hrf_model)s Default='glover'. @@ -365,7 +368,11 @@ def __init__(self, t_r=None, slice_time_ref=0., hrf_model='glover', signal_scaling=0, noise_model='ar1', verbose=0, n_jobs=1, minimize_memory=True, subject_label=None, random_state=None): # design matrix parameters + if t_r is not None: + _check_repetition_time(t_r) self.t_r = t_r + if slice_time_ref is not None: + _check_slice_time_ref(slice_time_ref) self.slice_time_ref = slice_time_ref self.hrf_model = hrf_model self.drift_model = drift_model @@ -805,13 +812,33 @@ def _get_voxelwise_model_attribute(self, attribute, return output +def _check_repetition_time(t_r): + """Check that the repetition time is a positive number.""" + if not isinstance(t_r, (float, int)): + raise TypeError("'t_r' must be a float or an integer. " + f"Got {type(t_r)} instead.") + if t_r <= 0: + raise ValueError("'t_r' must be positive. " + f"Got {t_r} instead.") + + +def _check_slice_time_ref(slice_time_ref): + """Check that slice_time_ref is a number between 0 and 1.""" + if not isinstance(slice_time_ref, (float, int)): + raise TypeError("'slice_time_ref' must be a float or an integer. " + f"Got {type(slice_time_ref)} instead.") + if slice_time_ref < 0 or slice_time_ref > 1: + raise ValueError("'slice_time_ref' must be between 0 and 1. " + f"Got {slice_time_ref} instead.") + + def first_level_from_bids(dataset_path, task_label, space_label=None, sub_labels=None, img_filters=None, t_r=None, - slice_time_ref=0., + slice_time_ref=0.0, hrf_model='glover', drift_model='cosine', high_pass=.01, @@ -833,9 +860,11 @@ def first_level_from_bids(dataset_path, derivatives_folder='derivatives'): """Create FirstLevelModel objects and fit arguments from a BIDS dataset. - If t_r is not specified this function will attempt to load it from a - bold.json file alongside slice_time_ref. - Otherwise t_r and slice_time_ref are taken as given. + If t_r is `None` this function will attempt to load it from a bold.json. + If `slice_time_ref` is `None` this function will attempt + to infer it from a bold.json. + Otherwise t_r and slice_time_ref are taken as given, + but a warning may be raised if they are not consistent with the bold.json. Parameters ---------- @@ -864,6 +893,17 @@ def first_level_from_bids(dataset_path, Filter examples would be ('desc', 'preproc'), ('dir', 'pa') and ('run', '10'). + slice_time_ref : :obj:`float` between 0.0 and 1.0, optional + This parameter indicates the time of the reference slice used in the + slice timing preprocessing step of the experimental runs. It is + expressed as a fraction of the t_r (time repetition), so it can have + values between 0. and 1. Default=0.0 + + .. deprecated:: 0.10.1 + + The default=0 for ``slice_time_ref`` will be deprecated. + The default value will change to 'None' in 0.12. + derivatives_folder : :obj:`str`, Defaults="derivatives". derivatives and app folder path containing preprocessed files. Like "derivatives/FMRIPREP". @@ -890,6 +930,12 @@ def first_level_from_bids(dataset_path, Items for the FirstLevelModel fit function of their respective model. """ + if slice_time_ref == 0: + warn( + 'Starting in version 0.12, slice_time_ref will default to None.', + DeprecationWarning, + ) + sub_labels = sub_labels or [] img_filters = img_filters or [] @@ -902,61 +948,92 @@ def first_level_from_bids(dataset_path, derivatives_path = Path(dataset_path) / derivatives_folder - # Get acq specs for models. RepetitionTime and SliceTimingReference. - # Throw warning if no bold.json is found - if t_r is not None: - warn(f'RepetitionTime given in as {t_r}') - warn(f'slice_time_ref is {slice_time_ref} percent ' - 'of the repetition time') - else: + # Get metadata for models. + # + # We do it once and assume all subjects and runs + # have the same value. + + # Repetition time + # + # Try to find a t_r value in the bids datasets + # If the parameter information is not found in the derivatives folder, + # a search is done in the raw data folder. + filters = _make_bids_files_filter( + task_label=task_label, + space_label=space_label, + supported_filters=[*_bids_entities()["raw"], + *_bids_entities()["derivatives"]], + extra_filter=img_filters, + verbose=verbose + ) + inferred_t_r = _infer_repetition_time_from_dataset( + bids_path=derivatives_path, + filters=filters, + verbose=verbose) + if inferred_t_r is None: filters = _make_bids_files_filter( task_label=task_label, - supported_filters=[*_bids_entities()["raw"], - *_bids_entities()["derivatives"]], - extra_filter=img_filters + supported_filters=[*_bids_entities()["raw"]], + extra_filter=img_filters, + verbose=verbose ) - img_specs = get_bids_files(derivatives_path, - modality_folder='func', - file_tag='bold', - file_type='json', - filters=filters) - # If we don't find the parameter information in the derivatives folder - # we try to search in the raw data folder - if not img_specs: - filters = _make_bids_files_filter( - task_label=task_label, - supported_filters=_bids_entities()["raw"], - extra_filter=img_filters - ) - img_specs = get_bids_files(dataset_path, - modality_folder='func', - file_tag='bold', - file_type='json', - filters=filters) - if not img_specs: - warn('No bold.json found in the derivatives or dataset folder.' - ' t_r can not be inferred ' - ' and will need to be set manually in the list of models' - ' otherwise their fit will throw an exception.') - else: - specs = json.load(open(img_specs[0])) - if 'RepetitionTime' in specs: - t_r = float(specs['RepetitionTime']) - else: - warn(f'RepetitionTime not found in file {img_specs[0]}.' - ' t_r can not be inferred ', - ' and will need to be set manually in the list of models', - ' otherwise their fit will throw an exception.') - if 'SliceTimingRef' in specs: - slice_time_ref = float(specs['SliceTimingRef']) - else: - warn('SliceTimingRef not found in file %s. It will be assumed' - ' that the slice timing reference is 0.0 percent of the ' - 'repetition time. If it is not the case it will need to ' - 'be set manually in the generated list of models' % - img_specs[0]) - - sub_labels = _list_valid_subjects(derivatives_path, sub_labels) + inferred_t_r = _infer_repetition_time_from_dataset( + bids_path=dataset_path, + filters=filters, + verbose=verbose) + + if t_r is None and inferred_t_r is not None: + t_r = inferred_t_r + if t_r is not None and t_r != inferred_t_r: + warn(f"'t_r' provided ({t_r}) is different " + f"from the value found in the BIDS dataset ({inferred_t_r}).\n" + "Note this may lead to the wrong model specification.") + if t_r is not None: + _check_repetition_time(t_r) + else: + warn("'t_r' not provided and cannot be inferred from BIDS metadata. " + "It will need to be set manually in the list of models, " + "otherwise their fit will throw an exception.") + + # Slice time correction reference time + # + # Try to infer a slice_time_ref value in the bids derivatives dataset. + # + # If no value can be inferred, the default value of 0.0 is used. + filters = _make_bids_files_filter( + task_label=task_label, + space_label=space_label, + supported_filters=[*_bids_entities()["raw"], + *_bids_entities()["derivatives"]], + extra_filter=img_filters, + verbose=verbose + ) + StartTime = _infer_slice_timing_start_time_from_dataset( + bids_path=derivatives_path, + filters=filters, + verbose=verbose) + if StartTime is not None and t_r is not None: + assert (StartTime < t_r) + inferred_slice_time_ref = StartTime / t_r + else: + warn("'slice_time_ref' not provided " + "and cannot be inferred from metadata." + "It will be assumed that the slice timing reference " + "is 0.0 percent of the repetition time. " + "If it is not the case it will need to " + "be set manually in the generated list of models.") + inferred_slice_time_ref = 0.0 + + if slice_time_ref is None and inferred_slice_time_ref is not None: + slice_time_ref = inferred_slice_time_ref + if (slice_time_ref is not None + and slice_time_ref != inferred_slice_time_ref): + warn(f"'slice_time_ref' provided ({slice_time_ref}) is different " + f"from the value found in the BIDS dataset " + f"({inferred_slice_time_ref}).\n" + "Note this may lead to the wrong model specification.") + if slice_time_ref is not None: + _check_slice_time_ref(slice_time_ref) # Build fit_kwargs dictionaries to pass to their respective models fit # Events and confounds files must match number of imgs (runs) @@ -965,6 +1042,7 @@ def first_level_from_bids(dataset_path, models_events = [] models_confounds = [] + sub_labels = _list_valid_subjects(derivatives_path, sub_labels) for sub_label_ in sub_labels: # Create model @@ -1128,6 +1206,7 @@ def _get_processed_imgs( supported_filters=_bids_entities()["raw"] + _bids_entities()["derivatives"], extra_filter=img_filters, + verbose=verbose, ) imgs = get_bids_files( main_path=derivatives_path, @@ -1189,6 +1268,7 @@ def _get_events_files( task_label=task_label, supported_filters=_bids_entities()["raw"], extra_filter=img_filters, + verbose=verbose, ) events = get_bids_files( dataset_path, @@ -1210,6 +1290,7 @@ def _get_events_files( task_label=task_label, dataset_path=dataset_path, events_filters=events_filters, + verbose=verbose, ) return events @@ -1265,6 +1346,7 @@ def _get_confounds( task_label=task_label, supported_filters=supported_filters, extra_filter=img_filters, + verbose=verbose, ) confounds = get_bids_files( derivatives_path, @@ -1409,6 +1491,7 @@ def _make_bids_files_filter( space_label=None, supported_filters=None, extra_filter=None, + verbose=0 ) : """Return a filter to specific files from a BIDS dataset. @@ -1425,7 +1508,11 @@ def _make_bids_files_filter( List of authorized BIDS entities extra_filter : :obj:`list` of :obj:`tuple` (str, str) or None, optional - _description_ + Filters are of the form (field, label). + Only one filter per field allowed. + + verbose : :obj:`integer` + Indicate the level of verbosity. Returns ------- @@ -1442,11 +1529,12 @@ def _make_bids_files_filter( if extra_filter and supported_filters: for filter_ in extra_filter: if filter_[0] not in supported_filters: - warn( - f"The filter {filter_} will be skipped. " - f"'{filter_[0]}' is not among the supported filters. " - f"Allowed filters include: {supported_filters}" - ) + if verbose: + warn( + f"The filter {filter_} will be skipped. " + f"'{filter_[0]}' is not among the supported filters. " + f"Allowed filters include: {supported_filters}" + ) continue filters.append(filter_) @@ -1539,6 +1627,7 @@ def _check_bids_events_list( task_label, dataset_path, events_filters, + verbose ): """Check input BIDS events. @@ -1602,6 +1691,7 @@ def _check_bids_events_list( space_label=None, supported_filters=supported_filters, extra_filter=extra_filter, + verbose=verbose ) this_event = get_bids_files( dataset_path, diff --git a/nilearn/glm/tests/test_first_level.py b/nilearn/glm/tests/test_first_level.py index e84934a6eb..c92191e03f 100644 --- a/nilearn/glm/tests/test_first_level.py +++ b/nilearn/glm/tests/test_first_level.py @@ -1,5 +1,6 @@ import os import unittest.mock +import warnings from pathlib import Path import numpy as np @@ -8,6 +9,7 @@ from nibabel import Nifti1Image, load from nibabel.tmpdirs import InTemporaryDirectory from nilearn._utils.data_gen import ( + _add_metadata_to_bids_dataset, basic_paradigm, create_fake_bids_dataset, generate_fake_fmri_data_and_design, @@ -573,6 +575,202 @@ def test_first_level_glm_computation_with_memory_caching(): del mask, func_img, FUNCFILE, model +def test_first_level_from_bids_set_repetition_time_warnings(tmp_path): + """Raise a warning when there is no bold.json file in the derivatives + and no TR value is passed as argument. + + create_fake_bids_dataset does not add JSON files in derivatives, + so the TR value will be inferred from the raw. + """ + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=10, + n_ses=1, + tasks=['main'], + n_runs=[1]) + t_r = None + warning_msg = "No bold.json .* BIDS" + with pytest.warns(UserWarning, match=warning_msg): + models, *_ = first_level_from_bids( + dataset_path=str(tmp_path / bids_path), + task_label='main', + space_label='MNI', + img_filters=[('desc', 'preproc')], + t_r=t_r, + slice_time_ref=None, + verbose=1 + ) + + # If no t_r is provided it is inferred from the raw dataset + # create_fake_bids_dataset generates a dataset + # with bold data with TR=1.5 secs + expected_t_r = 1.5 + assert models[0].t_r == expected_t_r + + +@pytest.mark.parametrize('t_r, error_type, error_msg', + [('not a number', TypeError, "must be a float"), + (-1, ValueError, "positive")]) +def test_first_level_from_bids_set_repetition_time_errors(tmp_path, + t_r, + error_type, + error_msg): + """Throw errors for impossible values of TR.""" + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=1, + n_ses=1, + tasks=['main'], + n_runs=[1]) + + with pytest.raises(error_type, match=error_msg): + first_level_from_bids( + dataset_path=str(tmp_path / bids_path), + task_label='main', + space_label='MNI', + img_filters=[('desc', 'preproc')], + slice_time_ref=None, + t_r=t_r + ) + + +def test_first_level_from_bids_set_slice_timing_ref_warnings(tmp_path): + """Check that a warning is raised when slice_time_ref is not provided \ + and cannot be inferred from the dataset. + + In this case the model should be created with a slice_time_ref of 0.0. + """ + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=10, + n_ses=1, + tasks=['main'], + n_runs=[1]) + + slice_time_ref = None + warning_msg = "not provided and cannot be inferred" + with pytest.warns(UserWarning, match=warning_msg): + models, *_ = first_level_from_bids( + dataset_path=str(tmp_path / bids_path), + task_label='main', + space_label='MNI', + img_filters=[('desc', 'preproc')], + slice_time_ref=slice_time_ref + ) + + expected_slice_time_ref = 0.0 + assert models[0].slice_time_ref == expected_slice_time_ref + + +@pytest.mark.parametrize('slice_time_ref, error_type, error_msg', + [('not a number', TypeError, "must be a float"), + (2, ValueError, "between 0 and 1")]) +def test_first_level_from_bids_set_slice_timing_ref_errors( + tmp_path, + slice_time_ref, + error_type, + error_msg): + """Throw errors for impossible values of slice_time_ref.""" + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=1, + n_ses=1, + tasks=['main'], + n_runs=[1]) + + with pytest.raises(error_type, match=error_msg): + first_level_from_bids( + dataset_path=str(tmp_path / bids_path), + task_label='main', + space_label='MNI', + img_filters=[('desc', 'preproc')], + slice_time_ref=slice_time_ref) + + +def test_first_level_from_bids_get_metadata_from_derivatives(tmp_path): + """No warning should be thrown given derivatives have metadata. + + The model created should use the values found in the derivatives. + """ + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=10, + n_ses=1, + tasks=['main'], + n_runs=[1]) + + RepetitionTime = 6.0 + StartTime = 2.0 + _add_metadata_to_bids_dataset( + bids_path=tmp_path / bids_path, + metadata={"RepetitionTime": RepetitionTime, + "StartTime": StartTime}) + with warnings.catch_warnings(): + warnings.simplefilter("error") + models, *_ = first_level_from_bids( + dataset_path=str(tmp_path / bids_path), + task_label='main', + space_label='MNI', + img_filters=[('desc', 'preproc')], + slice_time_ref=None,) + assert models[0].t_r == RepetitionTime + assert models[0].slice_time_ref == StartTime / RepetitionTime + + +def test_first_level_from_bids_get_RepetitionTime_from_derivatives(tmp_path): + """Only RepetitionTime is provided in derivatives. + + Warning about missing StarTime time in derivatives. + slice_time_ref cannot be inferred: defaults to 0. + """ + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=10, + n_ses=1, + tasks=['main'], + n_runs=[1]) + RepetitionTime = 6.0 + _add_metadata_to_bids_dataset( + bids_path=tmp_path / bids_path, + metadata={"RepetitionTime": RepetitionTime}) + + with pytest.warns(UserWarning, + match="StartTime' not found in file"): + models, *_ = first_level_from_bids( + dataset_path=str(tmp_path / bids_path), + task_label='main', + space_label='MNI', + slice_time_ref=None, + img_filters=[('desc', 'preproc')]) + assert models[0].t_r == 6.0 + assert models[0].slice_time_ref == 0. + + +def test_first_level_from_bids_get_StartTime_from_derivatives(tmp_path): + """Only StartTime is provided in derivatives. + + Warning about missing repetition time in derivatives, + but RepetitionTime is still read from raw dataset. + """ + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=10, + n_ses=1, + tasks=['main'], + n_runs=[1]) + StartTime = 1.0 + _add_metadata_to_bids_dataset( + bids_path=tmp_path / bids_path, + metadata={"StartTime": StartTime}) + + with pytest.warns(UserWarning, + match="RepetitionTime' not found in file"): + models, *_ = first_level_from_bids( + dataset_path=str(tmp_path / bids_path), + task_label='main', + space_label='MNI', + img_filters=[('desc', 'preproc')], + slice_time_ref=None) + + # create_fake_bids_dataset generates a dataset + # with bold data with TR=1.5 secs + assert models[0].t_r == 1.5 + assert models[0].slice_time_ref == StartTime / 1.5 + + def test_first_level_contrast_computation(): with InTemporaryDirectory(): shapes = ((7, 8, 9, 10),) @@ -909,7 +1107,8 @@ def test_first_level_from_bids(tmp_path, dataset_path=bids_path, task_label=tasks[task_index], space_label=space_label, - img_filters=[("desc", "preproc")] + img_filters=[("desc", "preproc")], + slice_time_ref=None, ) assert len(models) == n_sub @@ -940,7 +1139,8 @@ def test_first_level_from_bids_select_one_run_per_session(bids_dataset): task_label='main', space_label='MNI', img_filters=[('run', '01'), - ('desc', 'preproc')] + ('desc', 'preproc')], + slice_time_ref=None, ) assert len(models) == n_sub @@ -960,7 +1160,8 @@ def test_first_level_from_bids_select_all_runs_of_one_session(bids_dataset): task_label='main', space_label='MNI', img_filters=[('ses', '01'), - ('desc', 'preproc')] + ('desc', 'preproc')], + slice_time_ref=None, ) assert len(models) == n_sub @@ -982,6 +1183,7 @@ def test_first_level_from_bids_smoke_test_for_verbose_argument( space_label="MNI", img_filters=[("desc", "preproc")], verbose=verbose, + slice_time_ref=None, ) @@ -1012,6 +1214,7 @@ def test_first_level_from_bids_several_labels_per_entity(tmp_path, entity): task_label="main", space_label="MNI", img_filters=[("desc", "preproc"), (entity, "A")], + slice_time_ref=None, ) assert len(models) == n_sub assert len(models) == len(m_imgs) @@ -1037,8 +1240,10 @@ def test_first_level_from_bids_with_subject_labels(bids_dataset): task_label='main', sub_labels=["foo", "01"], space_label='MNI', - img_filters=[('desc', 'preproc')] + img_filters=[('desc', 'preproc')], + slice_time_ref=None, ) + assert models[0].subject_label == '01' @@ -1053,7 +1258,8 @@ def test_first_level_from_bids_no_duplicate_sub_labels(bids_dataset): task_label='main', sub_labels=["01", "01"], space_label='MNI', - img_filters=[('desc', 'preproc')] + img_filters=[('desc', 'preproc')], + slice_time_ref=None, ) assert len(models) == 1 @@ -1063,16 +1269,19 @@ def test_first_level_from_bids_validation_input_dataset_path(): with pytest.raises(TypeError, match='must be a string or pathlike'): first_level_from_bids(dataset_path=2, task_label="main", - space_label="MNI") + space_label="MNI", + slice_time_ref=None,) with pytest.raises(ValueError, match="'dataset_path' does not exist"): first_level_from_bids(dataset_path="lolo", task_label="main", - space_label="MNI") + space_label="MNI", + slice_time_ref=None,) with pytest.raises(TypeError, match="derivatives_.* must be a string"): first_level_from_bids(dataset_path=Path(), task_label="main", space_label="MNI", - derivatives_folder=1) + derivatives_folder=1, + slice_time_ref=None,) @pytest.mark.parametrize("task_label, error_type", @@ -1102,7 +1311,8 @@ def test_first_level_from_bids_validation_sub_labels(bids_dataset, first_level_from_bids( dataset_path=bids_dataset, task_label="main", - sub_labels=sub_labels + sub_labels=sub_labels, + slice_time_ref=None, ) @@ -1118,7 +1328,8 @@ def test_first_level_from_bids_validation_space_label(bids_dataset, first_level_from_bids( dataset_path=bids_dataset, task_label="main", - space_label=space_label + space_label=space_label, + slice_time_ref=None, ) @@ -1138,7 +1349,8 @@ def test_first_level_from_bids_validation_img_filter(bids_dataset, first_level_from_bids( dataset_path=bids_dataset, task_label="main", - img_filters=img_filters + img_filters=img_filters, + slice_time_ref=None, ) @@ -1151,7 +1363,8 @@ def test_first_level_from_bids_too_many_bold_files(bids_dataset): with pytest.raises(ValueError, match="Too many images found"): first_level_from_bids( - dataset_path=bids_dataset, task_label="main", space_label="T1w" + dataset_path=bids_dataset, task_label="main", space_label="T1w", + slice_time_ref=None, ) @@ -1164,7 +1377,27 @@ def test_first_level_from_bids_with_missing_events(tmp_path_factory): with pytest.raises(ValueError, match="No events.tsv files found"): first_level_from_bids( - dataset_path=bids_dataset, task_label="main", space_label="MNI" + dataset_path=bids_dataset, task_label="main", space_label="MNI", + slice_time_ref=None, + ) + + +def test_first_level_from_bids_no_tr(tmp_path_factory): + """Throw warning when t_r information cannot be inferred from the data \ + and t_r=None is passed.""" + bids_dataset = _new_bids_dataset(tmp_path_factory.mktemp("no_events")) + json_files = get_bids_files(main_path=bids_dataset, + file_tag="bold", + file_type="json") + for f in json_files: + os.remove(f) + + with pytest.warns( + UserWarning, + match="t_r.* will need to be set manually in the list of models"): + first_level_from_bids( + dataset_path=bids_dataset, task_label="main", space_label="MNI", + slice_time_ref=None, t_r=None ) @@ -1178,7 +1411,8 @@ def test_first_level_from_bids_no_bold_file(tmp_path_factory): with pytest.raises(ValueError, match="No BOLD files found "): first_level_from_bids( - dataset_path=bids_dataset, task_label="main", space_label="MNI" + dataset_path=bids_dataset, task_label="main", space_label="MNI", + slice_time_ref=None, ) @@ -1194,7 +1428,8 @@ def test_first_level_from_bids_with_one_events_missing(tmp_path_factory): ValueError, match="Same number of event files " ): first_level_from_bids( - dataset_path=bids_dataset, task_label="main", space_label="MNI" + dataset_path=bids_dataset, task_label="main", space_label="MNI", + slice_time_ref=None, ) @@ -1214,7 +1449,8 @@ def test_first_level_from_bids_one_confound_missing(tmp_path_factory): with pytest.raises(ValueError, match="Same number of confound"): first_level_from_bids( - dataset_path=bids_dataset, task_label="main", space_label="MNI" + dataset_path=bids_dataset, task_label="main", space_label="MNI", + slice_time_ref=None, ) @@ -1235,6 +1471,7 @@ def test_first_level_from_bids_all_confounds_missing(tmp_path_factory): space_label="MNI", img_filters=[("desc", "preproc")], verbose=0, + slice_time_ref=None, ) assert len(models) == len(m_imgs) @@ -1256,7 +1493,8 @@ def test_first_level_from_bids_no_derivatives(tmp_path): ) with pytest.raises(ValueError, match="derivatives folder not found"): first_level_from_bids( - dataset_path=bids_path, task_label="main", space_label="MNI" + dataset_path=bids_path, task_label="main", space_label="MNI", + slice_time_ref=None, ) @@ -1275,7 +1513,8 @@ def test_first_level_from_bids_no_session(tmp_path): with pytest.raises(ValueError, match="Too many images found"): first_level_from_bids( - dataset_path=bids_path, task_label="main", space_label="T1w" + dataset_path=bids_path, task_label="main", space_label="T1w", + slice_time_ref=None, ) @@ -1298,5 +1537,17 @@ def test_first_level_from_bids_mismatch_run_index(tmp_path_factory): dataset_path=bids_dataset, task_label="main", space_label="MNI", - img_filters=[("desc", "preproc")] + img_filters=[("desc", "preproc")], + slice_time_ref=None, + ) + + +def test_first_level_from_bids_deprecated_slice_time_default(bids_dataset): + with pytest.deprecated_call(match="slice_time_ref will default to None."): + first_level_from_bids( + dataset_path=bids_dataset, + task_label="main", + space_label="MNI", + img_filters=[("desc", "preproc")], + slice_time_ref=0, ) diff --git a/nilearn/interfaces/bids/_utils.py b/nilearn/interfaces/bids/_utils.py index 139842bb49..0e041012f5 100644 --- a/nilearn/interfaces/bids/_utils.py +++ b/nilearn/interfaces/bids/_utils.py @@ -154,7 +154,7 @@ def _generate_dataset_description(out_file, model_level): json.dump(dataset_description, f_obj, indent=4, sort_keys=True) -def _bids_entities() -> dict[str, list[str]]: +def _bids_entities(): """Return a dictionary of BIDS entities. Entities are listed in the order they should appear in a filename. @@ -190,7 +190,7 @@ def _bids_entities() -> dict[str, list[str]]: } -def _check_bids_label(label) -> None: +def _check_bids_label(label): """Validate a BIDS label. https://bids-specification.readthedocs.io/en/stable/glossary.html#label-formats # noqa diff --git a/nilearn/interfaces/bids/query.py b/nilearn/interfaces/bids/query.py index f6143d0768..cd0b6e1efc 100644 --- a/nilearn/interfaces/bids/query.py +++ b/nilearn/interfaces/bids/query.py @@ -1,6 +1,146 @@ """Functions for working with BIDS datasets.""" + +from __future__ import annotations + import glob +import json import os +from pathlib import Path +from warnings import warn + + +def _get_metadata_from_bids(field, + json_files, + bids_path=None,): + """Get a metadata field from a BIDS json sidecar files. + + This assumes that all the json files in the list have the same value + for that field, + hence the metadata is read only from the first json file in the list. + + Parameters + ---------- + field : :obj:`str` + Name of the field to be read. For example 'RepetitionTime'. + + json_files : :obj:`list` of :obj:`str` + List of path to json files, for example returned by get_bids_files. + + bids_path : :obj:`str` or :obj:`pathlib.Path`, optional + Fullpath to the BIDS dataset. + + Returns + ------- + float or None + value of the field or None if the field is not found. + """ + if json_files: + assert (isinstance(json_files, list) + and isinstance(json_files[0], (Path, str))) + with open(json_files[0], 'r') as f: + specs = json.load(f) + value = specs.get(field) + if value is not None: + return value + else: + warn(f"'{field}' not found in file {json_files[0]}.") + else: + msg_suffix = f" in {bids_path}" if bids_path else "" + warn(f'No bold.json found in BIDS folder{msg_suffix}.') + + return None + + +def _infer_slice_timing_start_time_from_dataset( + bids_path, + filters, + verbose=0): + """Return the StartTime metadata field from a BIDS derivatives dataset. + + This corresponds to the reference time (in seconds) used for the slice + timing correction. + + See https://github.com/bids-standard/bids-specification/issues/836 + + Parameters + ---------- + bids_path : :obj:`str` or :obj:`pathlib.Path` + Fullpath to the derivatives folder of the BIDS dataset. + + filters : :obj:`list` of :obj:`tuple` (:obj:`str`, :obj:`str`), optional + Filters are of the form (field, label). Only one filter per field + allowed. A file that does not match a filter will be discarded. + Filter examples would be ('ses', '01'), ('dir', 'ap') and + ('task', 'localizer'). + + verbose : :obj:`int`, optional + Indicate the level of verbosity. By default, nothing is printed. + If 0 prints nothing. If 1 prints warnings. + + Returns + ------- + float or None + Value of the field or None if the field is not found. + + """ + img_specs = get_bids_files(bids_path, + modality_folder='func', + file_tag='bold', + file_type='json', + filters=filters) + if not img_specs: + if verbose: + msg_suffix = f" in {bids_path}" + warn(f'No bold.json found in BIDS folder{msg_suffix}.') + return None + + return _get_metadata_from_bids(field="StartTime", + json_files=img_specs, + bids_path=bids_path,) + + +def _infer_repetition_time_from_dataset( + bids_path, + filters, + verbose=0): + """Return the RepetitionTime metadata field from a BIDS dataset. + + Parameters + ---------- + bids_path : :obj:`str` or :obj:`pathlib.Path` + Fullpath to the raw folder of the BIDS dataset. + + filters : :obj:`list` of :obj:`tuple` (:obj:`str`, :obj:`str`), optional + Filters are of the form (field, label). Only one filter per field + allowed. A file that does not match a filter will be discarded. + Filter examples would be ('ses', '01'), ('dir', 'ap') and + ('task', 'localizer'). + + verbose : :obj:`int`, optional + Indicate the level of verbosity. By default, nothing is printed. + If 0 prints nothing. If 1 prints warnings. + + Returns + ------- + float or None + Value of the field or None if the field is not found. + + """ + img_specs = get_bids_files(main_path=bids_path, + modality_folder='func', + file_tag='bold', + file_type='json', + filters=filters) + + if not img_specs: + if verbose: + msg_suffix = f" in {bids_path}" + warn(f'No bold.json found in BIDS folder{msg_suffix}.') + return None + + return _get_metadata_from_bids(field="RepetitionTime", + json_files=img_specs, + bids_path=bids_path,) def get_bids_files( @@ -81,22 +221,20 @@ def get_bids_files( """ filters = filters or [] if sub_folder: + + ses_level = "" files = os.path.join(main_path, 'sub-*', 'ses-*') - if glob.glob(files): - files = os.path.join( - main_path, - f'sub-{sub_label}', - 'ses-*', - modality_folder, - f'sub-{sub_label}*_{file_tag}.{file_type}', - ) - else: - files = os.path.join( - main_path, - f'sub-{sub_label}', - modality_folder, - f'sub-{sub_label}*_{file_tag}.{file_type}', - ) + session_folder_exists = glob.glob(files) + if session_folder_exists: + ses_level = 'ses-*' + + files = os.path.join( + main_path, + f'sub-{sub_label}', + ses_level, + modality_folder, + f'sub-{sub_label}*_{file_tag}.{file_type}', + ) else: files = os.path.join(main_path, f'*{file_tag}.{file_type}') diff --git a/nilearn/interfaces/tests/test_bids.py b/nilearn/interfaces/tests/test_bids.py index e8cf084b82..0bce20386e 100644 --- a/nilearn/interfaces/tests/test_bids.py +++ b/nilearn/interfaces/tests/test_bids.py @@ -1,4 +1,5 @@ """Tests for the nilearn.interfaces.bids submodule.""" +import json import os import numpy as np @@ -6,6 +7,7 @@ import pytest from nibabel.tmpdirs import InTemporaryDirectory from nilearn._utils.data_gen import ( + _add_metadata_to_bids_dataset, create_fake_bids_dataset, generate_fake_fmri_data_and_design, ) @@ -16,9 +18,203 @@ parse_bids_filename, save_glm_to_bids, ) +from nilearn.interfaces.bids.query import ( + _get_metadata_from_bids, + _infer_repetition_time_from_dataset, + _infer_slice_timing_start_time_from_dataset, +) from nilearn.maskers import NiftiMasker +def test_get_metadata_from_bids(tmp_path): + """Ensure that metadata is correctly extracted from BIDS JSON files. + + Throw a warning when the field is not found. + Throw a warning when there is no JSON file. + """ + json_file = tmp_path / "sub-01_task-main_bold.json" + json_files = [json_file] + + with open(json_file, "w") as f: + json.dump({"RepetitionTime": 2.0}, f) + value = _get_metadata_from_bids(field="RepetitionTime", + json_files=json_files) + assert value == 2.0 + + with open(json_file, "w") as f: + json.dump({"foo": 2.0}, f) + with pytest.warns(UserWarning, match="'RepetitionTime' not found"): + value = _get_metadata_from_bids(field="RepetitionTime", + json_files=json_files) + + json_files = [] + with pytest.warns(UserWarning, match="No .*json found in BIDS"): + value = _get_metadata_from_bids(field="RepetitionTime", + json_files=json_files) + assert value is None + + +def test_infer_repetition_time_from_dataset(tmp_path): + """Test inferring repetition time from the BIDS dataset. + + When using create_fake_bids_dataset the value is 1.5 secs by default + in the raw dataset. + When using _add_metadata_to_bids_dataset the value is 2.0 secs. + """ + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=1, + n_ses=1, + tasks=['main'], + n_runs=[1]) + + t_r = _infer_repetition_time_from_dataset( + bids_path=tmp_path / bids_path, + filters=[('task', 'main')]) + + expected_t_r = 1.5 + assert t_r == expected_t_r + + expected_t_r = 2.0 + _add_metadata_to_bids_dataset( + bids_path=tmp_path / bids_path, + metadata={"RepetitionTime": expected_t_r}) + + t_r = _infer_repetition_time_from_dataset( + bids_path=tmp_path / bids_path / 'derivatives', + filters=[('task', 'main'), ('run', '01')]) + + assert t_r == expected_t_r + + +def test_infer_slice_timing_start_time_from_dataset(tmp_path): + """Test inferring slice timing start time from the BIDS dataset. + + create_fake_bids_dataset does not add slice timing information + by default so the value returned will be None. + + If the metadata is added to the BIDS dataset, + then this value should be returned. + """ + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=1, + n_ses=1, + tasks=['main'], + n_runs=[1]) + + StartTime = _infer_slice_timing_start_time_from_dataset( + bids_path=tmp_path / bids_path / "derivatives", + filters=[('task', 'main')]) + + expected_StartTime = None + assert StartTime is expected_StartTime + + expected_StartTime = 1.0 + _add_metadata_to_bids_dataset( + bids_path=tmp_path / bids_path, + metadata={"StartTime": expected_StartTime}) + + StartTime = _infer_slice_timing_start_time_from_dataset( + bids_path=tmp_path / bids_path / "derivatives", + filters=[('task', 'main')]) + + assert StartTime == expected_StartTime + + +def _rm_all_json_files_from_bids_dataset(bids_path): + """Remove all json and make sure that get_bids_files does not find any.""" + [x.unlink() for x in bids_path.glob("**/*.json")] + selection = get_bids_files(bids_path, file_type='json', sub_folder=True) + assert selection == [] + selection = get_bids_files(bids_path, file_type='json', sub_folder=False) + assert selection == [] + + +def test_get_bids_files_inheritance_principle_root_folder(tmp_path): + """Check if json files are found if in root folder of a dataset. + + see https://bids-specification.readthedocs.io/en/latest/common-principles.html#the-inheritance-principle # noqa: E501 + """ + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=1, + n_ses=1, + tasks=['main'], + n_runs=[1]) + + _rm_all_json_files_from_bids_dataset(bids_path) + + # add json file to root of dataset + json_file = 'task-main_bold.json' + json_file = _add_metadata_to_bids_dataset( + bids_path=bids_path, + metadata={"RepetitionTime": 1.5}, + json_file=json_file + ) + assert json_file.exists() + + # make sure that get_bids_files finds the json file + # but only when looking in root of dataset + selection = get_bids_files(bids_path, + file_tag="bold", + file_type='json', + filters=[('task', 'main')], + sub_folder=True) + assert selection == [] + selection = get_bids_files(bids_path, + file_tag="bold", + file_type='json', + filters=[('task', 'main')], + sub_folder=False) + assert selection != [] + assert selection[0] == str(json_file) + + +@pytest.mark.xfail( + reason=("get_bids_files does not find json files" + " that are directly in the subject folder of a dataset."), + strict=True) +@pytest.mark.parametrize( + "json_file", + ['sub-01/sub-01_task-main_bold.json', + 'sub-01/ses-01/sub-01_ses-01_task-main_bold.json'] +) +def test_get_bids_files_inheritance_principle_sub_folder(tmp_path, json_file): + """Check if json files are found if in subject or session folder. + + see https://bids-specification.readthedocs.io/en/latest/common-principles.html#the-inheritance-principle # noqa: E501 + """ + bids_path = create_fake_bids_dataset(base_dir=tmp_path, + n_sub=1, + n_ses=1, + tasks=['main'], + n_runs=[1]) + + _rm_all_json_files_from_bids_dataset(bids_path) + + new_json_file = _add_metadata_to_bids_dataset( + bids_path=bids_path, + metadata={"RepetitionTime": 1.5}, + json_file=json_file + ) + print(new_json_file) + assert new_json_file.exists() + + # make sure that get_bids_files finds the json file + # but only when NOT looking in root of dataset + selection = get_bids_files(bids_path, + file_tag="bold", + file_type='json', + filters=[('task', 'main')], + sub_folder=False) + assert selection == [] + selection = get_bids_files(bids_path, + file_tag="bold", + file_type='json', + filters=[('task', 'main')], + sub_folder=True) + assert selection != [] + assert selection[0] == str(new_json_file) + + def test_get_bids_files(): with InTemporaryDirectory(): bids_path = create_fake_bids_dataset(n_sub=10, n_ses=2,