nilearn · Remi-Gau · Sep 6, 2023 · Sep 1, 2023 · Sep 1, 2023 · Sep 1, 2023
diff --git a/.gitignore b/.gitignore
@@ -61,6 +61,7 @@ doc/themes/nilearn/static/jquery.js
 examples/**/*.pdf
 examples/**/results/
 examples/results/
+results/
 examples/*.nii
 examples/*.nii.gz
 examples/*.png
diff --git a/doc/changes/latest.rst b/doc/changes/latest.rst
@@ -54,6 +54,7 @@ Enhancements
 Changes
 -------
 
+- :func:`~glm.first_level.experimental_paradigm.check_events` will now throw a warning if some events have a 0 second duration and will throw an error if an event has ``NaN`` onset or duration (:gh:`3943` by `Rémi Gau`_).
 - Removed old files and test code from deprecated datasets COBRE and NYU resting state (:gh:`3743` by `Michelle Wang`_).
 - :bdg-secondary:`Maint` PEP8 and isort compliance extended to the whole nilearn codebase. (:gh:`3538`, :gh:`3566`, :gh:`3548`, :gh:`3556`, :gh:`3601`, :gh:`3609`, :gh:`3646`, :gh:`3650`, :gh:`3647`, :gh:`3640`, :gh:`3615`, :gh:`3614`, :gh:`3648`,  :gh:`#3651`  by `Rémi Gau`_).
 - :bdg-secondary:`Maint` Finish applying black formatting to most of the codebase. (:gh:`3836`, :gh:`3833`, :gh:`3827`, :gh:`3810`, :gh:`3803`, :gh:`3802`, :gh:`3795`, :gh:`3790`, :gh:`3783`, :gh:`3777` by `Rémi Gau`_).

diff --git a/doc/glm/first_level_model.rst b/doc/glm/first_level_model.rst
@@ -45,8 +45,9 @@ Refer to the examples below for usage under the different scenarios:
   * Advanced beta series models: :ref:`sphx_glr_auto_examples_07_advanced_plot_beta_series.py`
 
 To ascertain that the sequence of events provided to the first level model is accurate, Nilearn provides an
-event visualization function called :func:`nilearn.plotting.plot_event()`. Sample usage for this is available
-in :ref:`sphx_glr_auto_examples_02_decoding_plot_haxby_glm_decoding.py`.
+event visualization function called :func:`nilearn.plotting.plot_event()`.
+Sample usage for this is available in
+:ref:`sphx_glr_auto_examples_04_glm_first_level_plot_write_events_file.py`.
 
 Once the events are defined, the design matrix is created using the
 :func:`nilearn.glm.first_level.make_first_level_design_matrix` function::

diff --git a/examples/04_glm_first_level/plot_write_events_file.py b/examples/04_glm_first_level/plot_write_events_file.py
@@ -19,11 +19,10 @@
 
 print(__doc__)
 
-#########################################################################
+# %%
 # Define the onset times in seconds. These are typically extracted from
 # the stimulation software used, but we will use hardcoded values in this
 # example.
-import numpy as np
 
 # fmt: off
 onsets = [
@@ -37,9 +36,8 @@
     264.0, 266.7, 269.7, 275.4, 278.4, 284.4, 288.0, 291.0, 293.4, 296.7,
 ]
 # fmt: on
-onsets = np.array(onsets)
 
-#########################################################################
+# %%
 # Associated trial types: these are numbered between 0 and 9, hence
 # corresponding to 10 different conditions.
 
@@ -51,9 +49,8 @@
     7, 1, 0, 0, 4, 1, 9, 8, 4, 9, 9
 ]
 # fmt: on
-trial_type_idx = np.array(trial_type_idx)
 
-#########################################################################
+# %%
 # We may want to map these indices to explicit condition names.
 # For that, we define a list of 10 strings.
 condition_ids = [
@@ -71,13 +68,21 @@
 
 trial_types = [condition_ids[i] for i in trial_type_idx]
 
-#########################################################################
+# %%
 # We must also define a duration (required by :term:`BIDS` conventions).
-# In this case, all trials lasted one second.
-
-durations = np.ones_like(onsets)
-
-#########################################################################
+# In this case, all trials lasted one second,
+# except for button response
+# that are modeled as events with instantaneous duration (0 second).
+
+null_duration_trials = [2, 3, 4, 5]
+durations = []
+for i in trial_type_idx:
+    if i in null_duration_trials:
+        durations.append(0)
+    else:
+        durations.append(1)
+
+# %%
 # Form a pandas DataFrame from this information.
 import pandas as pd
 
@@ -89,11 +94,11 @@
     }
 )
 
-#########################################################################
+# %%
 # Take a look at the new DataFrame
 events
 
-#########################################################################
+# %%
 # Export them to a tsv file.
 from pathlib import Path
 
@@ -104,7 +109,7 @@
 events.to_csv(tsvfile, sep="\t", index=False)
 print(f"The event information has been saved to {tsvfile}")
 
-#########################################################################
+# %%
 # Optionally, the events can be visualized using the
 # :func:`~nilearn.plotting.plot_event` function.
 import matplotlib.pyplot as plt

diff --git a/nilearn/glm/first_level/experimental_paradigm.py b/nilearn/glm/first_level/experimental_paradigm.py
@@ -17,13 +17,15 @@
 import pandas as pd
 from pandas.api.types import is_numeric_dtype
 
-VALID_FIELDS = {"onset", "duration", "trial_type", "modulation"}
-
 
 def check_events(events):
     """Test that the events data describes a valid experimental paradigm.
 
-    It is valid if the events data  has an 'onset' key.
+    It is valid if the events data has ``'onset'`` and ``'duration'`` keys
+    with numeric non NaN values.
+
+    This function also handles duplicate events
+    by summing their modulation if they have one.
 
     Parameters
     ----------
@@ -47,83 +49,155 @@ def check_events(events):
         Per-event modulation, (in seconds)
         defaults to ones(n_events) when no duration is provided.
 
+    Raises
+    ------
+    TypeError
+        If the events data is not a pandas DataFrame.
+
+    ValueError
+        If the events data has:
+
+            - no ``'onset'`` or ``'duration'`` column,
+            - has non numeric values
+              in the ``'onset'`` or ``'duration'`` columns
+            - has nan values in the ``'onset'`` or ``'duration'`` columns.
+
+    Warns
+    -----
+    UserWarning
+        If the events data:
+
+            - has no ``'trial_type'`` column,
+            - has any event with a duration equal to 0,
+            - contains columns other than ``'onset'``, ``'duration'``,
+              ``'trial_type'`` or ``'modulation'``,
+            - contains duplicated events, meaning event with same:
+
+                - ``'trial_type'``
+                - ``'onset'``
+                - ``'duration'``
+
     """
     # Check that events is a Pandas DataFrame
     if not isinstance(events, pd.DataFrame):
         raise TypeError(
             "Events should be a Pandas DataFrame. "
             f"A {type(events)} was provided instead."
         )
+
+    events = _check_columns(events)
+
+    events_copy = events.copy()
+
+    events_copy = _handle_missing_trial_types(events_copy)
+
+    _check_null_duration(events_copy)
+
+    _check_unexpected_columns(events_copy)
+
+    events_copy = _handle_modulation(events_copy)
+
+    cleaned_events = _handle_duplicate_events(events_copy)
+
+    trial_type = cleaned_events["trial_type"].values
+    onset = cleaned_events["onset"].values
+    duration = cleaned_events["duration"].values
+    modulation = cleaned_events["modulation"].values
+    return trial_type, onset, duration, modulation
+
+
+def _check_columns(events):
     # Column checks
     for col_name in ["onset", "duration"]:
         if col_name not in events.columns:
             raise ValueError(
                 f"The provided events data has no {col_name} column."
             )
+        if events[col_name].isnull().any():
+            raise ValueError(
+                f"The following column must not contain nan values: {col_name}"
+            )
+        # Make sure we have a numeric type for duration
+        if not is_numeric_dtype(events[col_name]):
+            try:
+                events = events.astype({col_name: float})
+            except ValueError as e:
+                raise ValueError(
+                    f"Could not cast {col_name} to float in events data."
+                ) from e
+    return events
+
+
+def _handle_missing_trial_types(events):
+    if "trial_type" not in events.columns:
+        warnings.warn(
+            "'trial_type' column not found in the given events data."
+        )
+        events["trial_type"] = "dummy"
+    return events
 
-    # Make a copy of the dataframe
-    events_copy = events.copy()
 
-    # Handle missing trial types
-    if "trial_type" not in events_copy.columns:
+def _check_null_duration(events):
+    conditions_with_null_duration = events["trial_type"][
+        events["duration"] == 0
+    ].unique()
+    if len(conditions_with_null_duration) > 0:
         warnings.warn(
-            "'trial_type' column not found in the given events data."
+            "The following conditions contain events with null duration:\n"
+            f"{', '.join(conditions_with_null_duration)}."
         )
-        events_copy["trial_type"] = "dummy"
 
-    # Handle modulation
-    if "modulation" in events_copy.columns:
+
+def _handle_modulation(events):
+    if "modulation" in events.columns:
         print(
             "A 'modulation' column was found in "
             "the given events data and is used."
         )
     else:
-        events_copy["modulation"] = 1
+        events["modulation"] = 1
+    return events
+
 
+VALID_FIELDS = {"onset", "duration", "trial_type", "modulation"}
+
+
+def _check_unexpected_columns(events):
     # Warn for each unexpected column that will
     # not be used afterwards
-    unexpected_columns = set(events_copy.columns).difference(VALID_FIELDS)
-    for unexpected_column in unexpected_columns:
+    unexpected_columns = list(set(events.columns).difference(VALID_FIELDS))
+    if unexpected_columns:
         warnings.warn(
-            f"Unexpected column '{unexpected_column}' in events data. "
-            "It will be ignored."
+            "The following unexpected columns "
+            "in events data will be ignored: "
+            f"{', '.join(unexpected_columns)}"
         )
 
-    # Make sure we have a numeric type for duration
-    if not is_numeric_dtype(events_copy["duration"]):
-        try:
-            events_copy = events_copy.astype({"duration": float})
-        except ValueError:
-            raise ValueError(
-                "Could not cast duration to float in events data."
-            )
 
-    # Handle duplicate events
-    # Two events are duplicates if they have the same:
-    #   - trial type
-    #   - onset
-    COLUMN_DEFINING_EVENT_IDENTITY = ["trial_type", "onset", "duration"]
+# Two events are duplicates if they have the same:
+#   - trial type
+#   - onset
+#   - duration
+COLUMN_DEFINING_EVENT_IDENTITY = ["trial_type", "onset", "duration"]
+
+# Duplicate handling strategy
+# Sum the modulation values of duplicate events
+STRATEGY = {"modulation": "sum"}
 
-    # Duplicate handling strategy
-    # Sum the modulation values of duplicate events
-    STRATEGY = {"modulation": "sum"}
 
+def _handle_duplicate_events(events):
     cleaned_events = (
-        events_copy.groupby(COLUMN_DEFINING_EVENT_IDENTITY, sort=False)
+        events.groupby(COLUMN_DEFINING_EVENT_IDENTITY, sort=False)
         .agg(STRATEGY)
         .reset_index()
     )
 
     # If there are duplicates, give a warning
-    if len(cleaned_events) != len(events_copy):
+    if len(cleaned_events) != len(events):
         warnings.warn(
             "Duplicated events were detected. "
             "Amplitudes of these events will be summed. "
             "You might want to verify your inputs."
         )
 
-    trial_type = cleaned_events["trial_type"].values
-    onset = cleaned_events["onset"].values
-    duration = cleaned_events["duration"].values
-    modulation = cleaned_events["modulation"].values
-    return trial_type, onset, duration, modulation
+    return cleaned_events