nilearn · Remi-Gau · Apr 12, 2023 · Mar 18, 2023 · Mar 18, 2023 · Mar 18, 2023
diff --git a/doc/changes/latest.rst b/doc/changes/latest.rst
@@ -12,9 +12,10 @@ NEW
 
 Fixes
 -----
-
 - Change calculation of TR in :func:`~.glm.first_level.compute_regressor` to be more precise (:gh:`3362` by `Anne-Sophie Kieslinger`_)
 
+- Improve how :func:`~.glm.first_level.first_level_from_bids` handles fetching slice timing metadata and add additional input validation (:gh:`3605` by `Rémi Gau`_).
+
 - :func:`~nilearn.interfaces.fmriprep.load_confounds` can support searching preprocessed data in native space. (:gh:`3531` by `Hao-Ting Wang`_)
 
 - Add correct "zscore_sample" strategy to ``signal._standardize`` which will replace the default "zscore" strategy in release 0.13  (:gh:`3474` by `Yasmin Mzayek`_).

diff --git a/nilearn/_utils/data_gen.py b/nilearn/_utils/data_gen.py
@@ -1,10 +1,14 @@
 """
 Data generation utilities
 """
+from __future__ import annotations
+
 import json
 import os
 import string
 
+from pathlib import Path
+
 import numpy as np
 import pandas as pd
 import scipy.linalg
@@ -744,6 +748,21 @@
     return confounds
 
 
+def add_metadata_to_bids_derivatives(bids_path: str | Path,
+                                      metadata: dict,
+                                      json_file: str = None) -> Path:
+    if json_file is None:
+        json_file = (Path(bids_path) / 'derivatives' / 'sub-01' / 'ses-01' / 
+                        'func' / 'sub-01_ses-01_task-main_run-01_bold.json')
+    else:
+        json_file = Path(bids_path) / json_file
+
+    with open(json_file, 'w') as f:
+        json.dump(metadata, f)
+
+    return json_file
+
+
 def create_fake_bids_dataset(base_dir='',
                              n_sub=10,
                              n_ses=2,

diff --git a/nilearn/_utils/tests/test_data_gen.py b/nilearn/_utils/tests/test_data_gen.py
@@ -1,3 +1,7 @@
+"""Test for data generation utilities."""
+
+import json
+
 import numpy as np
 import pytest
 from nilearn._utils.data_gen import (
@@ -8,6 +12,21 @@
 )
 from nilearn.image import get_data
 
+from nilearn._utils.data_gen import add_metadata_to_bids_derivatives
+
+
+def test_add_metadata_to_bids_derivatives(tmp_path):
+    # bare bone smoke test
+    target_dir = tmp_path / 'derivatives' / 'sub-01' / 'ses-01' / 'func'
+    target_dir.mkdir(parents=True)
+    json_file = add_metadata_to_bids_derivatives(bids_path=tmp_path,
+                                                  metadata={"foo": "bar"})
+    assert json_file.exists()
+    assert json_file.name == 'sub-01_ses-01_task-main_run-01_bold.json'
+    with open(json_file, 'r') as f:
+        metadata = json.load(f)
+        assert metadata == {"foo": "bar"}
+
 
 @pytest.mark.parametrize("window", ["boxcar", "hamming"])
 def test_generate_regions_ts_no_overlap(window):

diff --git a/nilearn/glm/first_level/first_level.py b/nilearn/glm/first_level/first_level.py
@@ -6,7 +6,6 @@
 
 """
 import glob
-import json
 import os
 import sys
 import time
@@ -19,7 +18,11 @@
 from sklearn.base import clone
 from sklearn.cluster import KMeans
 
-from nilearn.interfaces.bids import get_bids_files, parse_bids_filename
+from nilearn.interfaces.bids import (get_bids_files,
+                                     parse_bids_filename)
+from nilearn.interfaces.bids.query import \
+    (_infer_slice_timing_start_time_from_dataset,
+     _infer_repetition_time_from_dataset)
 from nilearn._utils import fill_doc
 from nilearn._utils.glm import (_check_events_file_uses_tab_separators,
                                 _check_run_tables, _check_run_sample_masks)
@@ -355,7 +358,11 @@
                  signal_scaling=0, noise_model='ar1', verbose=0, n_jobs=1,
                  minimize_memory=True, subject_label=None, random_state=None):
         # design matrix parameters
+        if t_r is not None:
+            _check_repetition_time(t_r)
         self.t_r = t_r
+        if slice_time_ref is not None:
+            _check_slice_time_ref(slice_time_ref)        
         self.slice_time_ref = slice_time_ref
         self.hrf_model = hrf_model
         self.drift_model = drift_model
@@ -793,9 +800,27 @@
         return output
 
 
+def _check_repetition_time(t_r):
+    if not isinstance(t_r, (float, int)):
+        raise TypeError("'t_r' must be a float or an integer. "
+                        f"Got {type(t_r)} instead.")   
+    if t_r <= 0:
+        raise ValueError("'t_r' must be positive. "
+                        f"Got {t_r} instead.")        
+
+
+def _check_slice_time_ref(slice_time_ref):
+    if not isinstance(slice_time_ref, (float, int)):
+        raise TypeError("'slice_time_ref' must be a float or an integer. "
+                        f"Got {type(slice_time_ref)} instead.")   
+    if slice_time_ref < 0 or slice_time_ref > 1:
+        raise ValueError("'slice_time_ref' must be between 0 and 1. "
+                        f"Got {slice_time_ref} instead.")  
+
+
 def first_level_from_bids(dataset_path, task_label, space_label=None,
                           sub_labels=None,
-                          img_filters=None, t_r=None, slice_time_ref=0.,
+                          img_filters=None, t_r=None, slice_time_ref=None,
                           hrf_model='glover', drift_model='cosine',
                           high_pass=.01, drift_order=1, fir_delays=[0],
                           min_onset=-24, mask_img=None,
@@ -901,49 +926,55 @@
     if not os.path.exists(derivatives_path):
         raise ValueError('derivatives folder does not exist in given dataset')
 
-    # Get acq specs for models. RepetitionTime and SliceTimingReference.
+    # Get acq specs for models.
+    # RepetitionTime and StartTime for slice timing.
     # Throw warning if no bold.json is found
+    filters = [('task', task_label)]
+    for img_filter in img_filters:
+        if img_filter[0] in ['acq', 'rec', 'run']:
+            filters.append(img_filter)
+
     if t_r is not None:
-        warn('RepetitionTime given in model_init as %d' % t_r)
-        warn('slice_time_ref is %d percent of the repetition '
-             'time' % slice_time_ref)
+        _check_repetition_time(t_r)
+        warn("'RepetitionTime' given in model_init as {t_r}")
     else:
-        filters = [('task', task_label)]
-        for img_filter in img_filters:
-            if img_filter[0] in ['acq', 'rec', 'run']:
-                filters.append(img_filter)
-
-        img_specs = get_bids_files(derivatives_path, modality_folder='func',
-                                   file_tag='bold', file_type='json',
-                                   filters=filters)
-        # If we don't find the parameter information in the derivatives folder
-        # we try to search in the raw data folder
-        if not img_specs:
-            img_specs = get_bids_files(dataset_path, modality_folder='func',
-                                       file_tag='bold', file_type='json',
-                                       filters=filters)
-        if not img_specs:
-            warn('No bold.json found in derivatives folder or '
-                 'in dataset folder. t_r can not be inferred and will need to'
-                 ' be set manually in the list of models, otherwise their fit'
-                 ' will throw an exception')
+        t_r = _infer_repetition_time_from_dataset(
+            bids_path=derivatives_path,
+            filters=filters)
+        # If the parameter information is not found in the derivatives folder,
+        # a search is done in the raw data folder.         
+        if t_r is None:
+            t_r = _infer_repetition_time_from_dataset(
+            bids_path=dataset_path,
+            filters=filters)
+    if t_r is not None:        
+        _check_repetition_time(t_r)
+    else:
+        warn("'t_r' not provided and cannot be inferred from metadata. " 
+             "It will need to be set manually in the list of models, "
+             "otherwise their fit will throw an exception.")
+
+    if slice_time_ref is not None:
+        _check_slice_time_ref(slice_time_ref)
+        warn("'slice_time_ref' given in model_init as {slice_time_ref}")
+        warn("'slice_time_ref' is {slice_time_ref} percent of the repetition "
+             'time')
+    else:
+        StartTime = _infer_slice_timing_start_time_from_dataset(
+            bids_path=derivatives_path, 
+            filters=filters)
+        if StartTime is not None and t_r is not None:
+            assert(StartTime < t_r)
+            slice_time_ref = StartTime / t_r
         else:
-            specs = json.load(open(img_specs[0], 'r'))
-            if 'RepetitionTime' in specs:
-                t_r = float(specs['RepetitionTime'])
-            else:
-                warn('RepetitionTime not found in file %s. t_r can not be '
-                     'inferred and will need to be set manually in the '
-                     'list of models. Otherwise their fit will throw an '
-                     ' exception' % img_specs[0])
-            if 'SliceTimingRef' in specs:
-                slice_time_ref = float(specs['SliceTimingRef'])
-            else:
-                warn('SliceTimingRef not found in file %s. It will be assumed'
-                     ' that the slice timing reference is 0.0 percent of the '
-                     'repetition time. If it is not the case it will need to '
-                     'be set manually in the generated list of models' %
-                     img_specs[0])
+            warn(f"'slice_time_ref' not provided "
+                 "and cannot be inferred from metadata."
+                 "It will be assumed that the slice timing reference "
+                 "is 0.0 percent of the repetition time. "
+                 "If it is not the case it will need to "
+                 "be set manually in the generated list of models.")            
+            slice_time_ref = 0.0
+    _check_slice_time_ref(slice_time_ref)
 
     # Infer subjects in dataset
     if not sub_labels: