# Generating BIDS *events* files from *Psychopy*'s artifacts

In [None]:
%matplotlib inline

from matplotlib import pyplot as plt
plt.rcParams["figure.figsize"] = (20, 2.5)

In [None]:
from pathlib import Path
from pickle import load
from json import dumps
import numpy as np
import pandas as pd
import h5py
from psychopy.tools.filetools import fromFile

In [None]:
DATA_PATH = Path("/data/datasets/hcph-pilot-sourcedata/recordings/psychopy")
BIDS_PATH = Path("/data/datasets/hcph")

In [None]:
session_path = DATA_PATH / "session-2023-11-03"

In [None]:
f = h5py.File(session_path / "bht_2023-11-03_20h27.56.353_0_session_29.hdf5")
data = f["data_collection"]
print(list(data.keys()))
list(data["events"].keys())

In [None]:
data["session_meta_data"][:]

In [None]:
data["events"]["experiment"]["MessageEvent"].shape

In [None]:
import os

os.environ['PYOPENGL_PLATFORM'] = 'egl'

In [None]:
from jupylet.app import App
from jupylet.label import Label
app = App(width=320, height=64)

psydata = fromFile(session_path / "bht_2023-11-03_20h27.56.353_0_session_29.psydat")

In [None]:
logtext = (session_path / "qct_2023-11-03_19h58.22.540_0_session_29.log").read_text()

In [None]:
print(logtext)

In [None]:
from write_event_file import write_event_file_from_log

In [None]:
import re
from write_event_file import TRIAL_TYPE

def write_event_file_from_log(logfile: Path) -> None:
    """
    Create a BIDS events file from the psychopy log.
    Parameters
    ----------
    log :obj:`os.pathlike`
         The path to the log output from psychopy.
    """
    
    logtext = logfile.read_text()
    
    # Initialize events dataframe
    event_dataframe = pd.DataFrame(
        columns=["onset", "duration", "trial-type", "value"]
    )

    # Find the timestamp of the first trigger aka the beginning of fMRI recording
    trigger_pattern = r"([\d.]+)\s+DATA\s+Keypress:\s+s"
    trigger_timestamp = float(re.findall(trigger_pattern, logtext)[0])

    # Create a regular expression pattern to match lines containing any of the word corresponding to tasks
    autodraw_pattern = r"([\d.]+)\s+EXP\s+({}):\s+autoDraw\s*=\s*(\w+)".format(
        "|".join(TRIAL_TYPE.keys())
    )

    # Use re.findall to find all matching lines in the log
    autodraw_events = re.findall(autodraw_pattern, logtext)
    timestamps, keywords, statuses = zip(*autodraw_events)
    events_table = pd.DataFrame.from_dict(
        {
            "onset": np.array(timestamps, dtype=float) - trigger_timestamp,
            "event": [TRIAL_TYPE[k] for k in keywords],
            "status": statuses,
        },
    )
    print(events_table)

    # Initialize variable to keep track of the start timestampss
    start_timestamp = {}

    # Extract the times associated with the matches
    for timestamp, keyword, status in autodraw_events:
        if status == "True":
            # Store the start timestamp for "autoDraw=True"
            start_timestamp[keyword] = timestamp

        elif status == "False" and keyword in start_timestamp:

            # Calculate the duration for "autoDraw=False" if there is a corresponding "autoDraw=True"
            onset = float(start_timestamp[keyword])
            end = float(timestamp)
            duration = end - onset

            # Match keyword with associated sub-task
            trial_type = TRIAL_TYPE[keyword]

            # For the fingertapping and the eye movement sub-tasks, we have to encode which hand or the position
            # of the fixation point to fully characterize the sub-task instance.
            value = ""
            if trial_type == "mot":
                # Which hand is instructed to fingertap is encoded in the psychopy log one line above
                # the onset 'ft_hand : autoDraw = True' and as the same timestamp as the onset.
                hand_pattern = (
                    r"{:.4f}\s+EXP\s+ft_hand:\s*text\s*=\s*\'(RIGHT|LEFT)\'".format(
                        onset
                    )
                )
                hand_match = re.search(hand_pattern, logtext)
                value = hand_match.group(1).lower()

            elif trial_type == "cog":
                # The position of the point is reported in the psychopy log 5 to 7 lines above
                # the onset event of the cognitive instance.

                # Retrieve line number corresponding to the onset event
                pattern = r"{:.4f}\s+EXP\s+{}:\s+autoDraw\s*=\s*True".format(
                    onset, keyword
                )
                match = re.search(pattern, logtext)
                (
                    start,
                    end,
                ) = match.span()  # Get the start and end position of the match
                line_nbr = (
                    logtext.count("\n", 0, start) + 1
                )  # Calculate the corresponding line number

                # Extract the seven lines before the match
                previous_lines = logtext.splitlines()[line_nbr - 7 : line_nbr]
                previous_lines_text = "\n".join(previous_lines)

                # Find all the matches using the regular expression
                fix_pos_pattern = r"([\d.]+)\s+EXP\s+New trial \(rep=\d+, index=\d+\): OrderedDict\(\[\(\'xpos\', (-?\d+\.\d+)\), \(\'ypos\', (-?\d+\.\d+)\)\]\)"
                matches = re.findall(fix_pos_pattern, previous_lines_text)
                #If the patterns is found several times, the last appearance is the one corresponding to the event
                timestamp, xpos, ypos = matches[-1]
                value = f"[{xpos}, {ypos}]"

            # If no trigger were recorded in the psychopy log, we need to approximate its timestamp
            # with the closest log event.
            if not timestamp:
                if keyword == "movie":
                    # We coded the resting-state task such that the movie starts at the trigger.
                    trigger_timestamp = onset
                elif keyword in ["blank", "cog", "mot", "cog"]:
                    # The closest event for qct is "EXP 	eyetracker.clearEvents()"
                    trigger_pattern = r"(\d+\.\d+)\s+EXP\s+eyetracker.clearEvents()"
                    trigger_timestamp = float(re.findall(trigger_pattern, logtext)[0])
                else:
                    # The closest event for bht is "EXP  text_2: autoDraw = False"
                    trigger_pattern = r"(\d+\.\d+)\s+EXP\s+text_2: autoDraw = False"
                    trigger_timestamp = float(re.findall(trigger_pattern, logtext)[0])

            # Subtract the timestamp of the first trigger to the onset of the task to get events
            # onset in the fMRI recording time.
            onset = onset - trigger_timestamp

            # Keep only 1 decimal of precision
            onset = "{:.1f}".format(round(onset, 1))
            duration = "{:.1f}".format(round(duration, 1))

            # We have all the information needed for the event, it can be inserted in the dataframe.
            event = {
                "onset": onset,
                "duration": duration,
                "trial-type": trial_type,
                "value": value,
            }
            event_dataframe = pd.concat(
                [event_dataframe, pd.DataFrame([event])], ignore_index=True
            )

            # Remove the start timestamp from the dictionary to avoid double counting
            del start_timestamp[keyword]

    output_folder = os.path.dirname(log)
    base_name = os.path.basename(log)
    # The from_log might break the BIDS compatibility, but for now I don't know how else
    # to distinguish between events.tsv generated from the channels versus the psychopy log
    output_file = os.path.join(
        output_folder, base_name.replace(".log", "_from_log_events.tsv")
    )
    return event_dataframe

In [None]:
write_event_file_from_log(session_path / "qct_2023-11-03_19h58.22.540_0_session_29.log")

In [124]:
from __future__ import annotations
import pandas as pd
import numpy as np
import re
from pathlib import Path

TRIAL_TYPE = {
    "eye_movement_fixation": "cog",
    "ft_hand": "mot",
    "fixation": "blank",
    "grating": "vis",
    "movie": "movie",
    "bh_body": "red",
    "bh_end": "lightred",
    "end_trial_msg": "end-message",
    "polygon_4": "in",
    "polygon1": "out",
    "polygon_6": "in-last",
    "polygon_8": "out-last",
    "bh_body_2": "hold",
    "bh_end_2": "hold-warning",
    "bh_end_3": "refractory",
    "polygon_5": "out",  # old
    "polygon_7": "out",  # old
}


def psychopy2pandas(log_path: str | Path) -> pd.DataFrame:
    """
    Convert a PsychoPy log file to a *Pandas* DataFrame.

    Parameters
    ----------
    log_file : :obj:`os.pathlike`
        The path to the PsychoPy log file.

    Returns
    -------
    df : :obj:`pandas.DataFrame`
        A DataFrame containing event information.
    
    """
    

    df = pd.read_csv(
        log_path,
        sep="\t",
        names=["onset", "level", "desc"],
        dtype={"onset": float},
    )

    # Refer all onsets to the first trigger (first DATA entry)
    df.onset -= df[df.level.str.contains("DATA")].onset.values[0]

    # Extract events
    df[["trial_type", "start_end"]] = df["desc"].str.extract(r"({}):\s+autoDraw\s*=\s*(\w+)".format("|".join(TRIAL_TYPE.keys())))

    # Extract hand of motor block of qct
    df["hand"] = df["desc"].str.extract(r"ft_hand:\s*text\s*=\s*\'(RIGHT|LEFT)\'")
    df.loc[df.hand.notna(), "trial_type"] = "ft_hand"
    # Normalize L/R values of motor blocks
    df = df.replace({"hand": {"RIGHT": "R", "LEFT": "L"}})

    # Extract coordinates of cognitive block of qct
    df[["x", "y"]] = df["desc"].str.extract(r"New trial \(rep=\d+, index=\d+\): OrderedDict\(\[\(\'xpos\', (-?\d+\.\d+)\), \(\'ypos\', (-?\d+\.\d+)\)\]\)")
    df[["x", "y"]] = df[["x", "y"]].astype(float)
    df.loc[df.x.notna(), "trial_type"] = "eye_movement_fixation"

    # Drop duplicates (all columns exactly the same)
    df = df.drop_duplicates()
   
    return df


def pandas2bids(input_df: pd.DataFrame) -> pd.DataFrame:
    """
    Convert a Pandas DataFrame with event information to a BIDS-compatible DataFrame.

    This function takes an input DataFrame with event information, performs various data transformations
    to create a BIDS-compatible DataFrame, and returns the resulting DataFrame.

    Parameters
    ----------
    input_df : :obj:`pandas.DataFrame`
        The input DataFrame containing event information.

    Returns
    -------
    df : :obj:`pandas.DataFrame`
        A BIDS-compatible DataFrame with columns 'onset', 'duration', 'trial_type', and 'value'.

    Notes:
    - Rows without 'trial_type' are dropped from the input DataFrame.
    - New columns 'duration' and 'value' are added to the resulting DataFrame.
    - Durations are calculated based on 'start_end' information and assigned to the appropriate rows.
    - Values are retrieved from previous rows for specific trial types and assigned to the relevant rows.
    - Event names are normalized based on 'TRIAL_TYPE' (please ensure 'TRIAL_TYPE' is defined).
    - Mock events in 'end-message' are replaced with appropriate values, and block numbers are assigned.

    Examples
    --------
    >>> input_df = pd.DataFrame(...)
    >>> output_df = pandas2bids(input_df)

    """

    # Drop rows without trial type
    df = input_df[input_df.trial_type.notna()]
    # Prepare new columns (duration and value)
    df = df.reindex(columns=["onset", "duration", "trial_type", "value", "start_end", "hand", "x", "y"])
    df["value"] = df["value"].astype(str)
    
    for et in set(df.trial_type.values):
        # Create a subdataframe with only this trial type
        subdf = df[df.trial_type == et]

        if len(subdf) < 2:  # No need to try if not a block
            continue

        # Calculate durations
        onsets = subdf.start_end.notna() & subdf.start_end.str.contains("True")
        offsets = subdf.start_end.notna() & subdf.start_end.str.contains("False")
        
        if len(subdf[onsets].onset.values) == len(subdf[offsets].onset.values):
            durations = subdf[offsets].onset.values - subdf[onsets].onset.values
        else:
            durations = subdf[offsets].onset.values[::2] - subdf[onsets].onset.values
        
        # And assign the duration to the first event row (the one containing autoDraw = True)
        subdf.loc[onsets, "duration"] = durations

        # Retrieve values from previous row for cognitive and motor blocks
        if et == "eye_movement_fixation":
            shifted = subdf.loc[subdf.start_end.isna() & subdf.x.notna(), ["x", "y"]].values
            subdf.loc[onsets, "value"] = [f"({v[0]}, {v[1]})" for v in shifted]
        elif et == "ft_hand":
            shifted = subdf.loc[subdf.start_end.isna(), "hand"].values
            subdf.loc[onsets, "value"] = shifted

        # Move back to general dataframe
        df[df.trial_type == et] = subdf

    # Drop rows from which data was copied to the principal event row.
    df = df.drop(df[df.start_end.notna() & df.start_end.str.contains("False")].index)
    df = df.drop(df[df.start_end.isna() & df.x.notna()].index)
    df = df.drop(df[df.start_end.isna() & df.hand.notna()].index)
  
    # Normalize event names
    df = df.replace({"trial_type": TRIAL_TYPE})
    
    # Replace mock events in bht
    if "end-message" in set(df.trial_type.values):
        end_index = df[df.trial_type == "end-message"].index[0]
        df.loc[:end_index] = df.loc[:end_index].replace(
            {
                "trial_type": {"in": "green", "out": "yellow", "in-last": "light-green", "out-last": "gold"},
            },
        )
        df.loc[:end_index, "value"] = "mock"
        
        # After the mock there are 5 "true" blocks.
        len_remaining = len(df.loc[end_index + 1:, "value"])
        df.loc[end_index + 1:, "value"] = [f"block{v}" for block in range(1, 7) for v in [block] * 13][:len_remaining]
        
    return df[["onset", "duration", "trial_type", "value"]]

In [125]:
DATA_PATH = Path("/data/datasets/hcph-pilot-sourcedata/recordings/psychopy")
BIDS_PATH = Path("/data/datasets/hcph")
session_path = DATA_PATH / "session-2023-11-03"

# Create a BIDS dataframe with rows defining a trial_type
output_df = psychopy2pandas(session_path / "bht_2023-11-03_20h27.56.353_0_session_29.log")
bids_df = pandas2bids(output_df)
bids_df[40:]

Unnamed: 0,onset,duration,trial_type,value
277,158.2357,2.3044,out,block3
282,160.5401,2.7026,in,block3
286,163.2427,2.3043,out,block3
290,165.547,2.7025,in,block3
294,168.2495,2.288,out,block3
298,170.5375,2.7025,in,block3
302,173.24,2.3051,out,block3
306,175.5451,2.7018,in-last,block3
310,178.2469,2.3047,out-last,block3
313,180.5516,13.0149,hold,block3


In [126]:
# Create a BIDS dataframe with rows defining a trial_type
output_df = psychopy2pandas(session_path / "qct_2023-11-03_19h58.22.540_0_session_29.log")
bids_df = pandas2bids(output_df)
bids_df[["onset", "duration", "trial_type", "value"]][40:]

Unnamed: 0,onset,duration,trial_type,value
557,83.4206,0.5655,cog,"(0.5, 0.6)"
581,83.9861,5.0088,mot,R
592,88.9949,5.0773,mot,L
616,94.0722,3.0776,blank,
630,97.1498,0.5127,cog,"(-0.7, 0.0)"
635,97.6625,0.4973,cog,"(0.5, 0.6)"
640,98.1598,0.5142,cog,"(0.7, 0.0)"
646,98.674,0.4971,cog,"(-0.5, -0.6)"
651,99.1711,0.4975,cog,"(0.5, -0.6)"
656,99.6686,0.5714,cog,"(-0.5, 0.6)"
