In [1]:
import pandas as pd
import glob
import re
import os

Dot Probe Faces

In [2]:

def process_message_file(msg_file):
    """
    Process the message file and extract event intervals.
    
    Returns a dictionary with:
      - 'experiment': tuple (exp_start, exp_end)
      - 'fixation': list of tuples (start, stop)
      - 'faces': list of tuples (start, stop, emotion) where emotion is 'L', 'R', or 'N'
      - 'pause': list of tuples (start, stop)
      
    This function reads the file line by line, splits on the first whitespace to
    separate the timestamp from the message, and then uses case-insensitive matching.
    """
    data = []
    with open(msg_file, 'r') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            # Split each line only once: timestamp and the entire message.
            parts = line.split(maxsplit=1)
            if len(parts) < 2:
                continue
            try:
                timestamp = float(parts[0])
            except ValueError:
                continue
            message = parts[1]
            data.append((timestamp, message))
    
    # Create a DataFrame from the data and sort by timestamp.
    df_msg = pd.DataFrame(data, columns=['timestamp', 'message'])
    df_msg.sort_values('timestamp', inplace=True)
    df_msg.reset_index(drop=True, inplace=True)
    
    events = {
        'experiment': None,
        'fixation': [],
        'faces': [],
        'pause': []
    }
    
    # Temporary variables for event interval tracking.
    fixation_start = None
    faces_start = None
    faces_emotion = None
    pause_start = None
    exp_start = None
    exp_end = None

    # Process each message row.
    for idx, row in df_msg.iterrows():
        ts = row['timestamp']
        msg = row['message'].strip().lower()
        
        # Experiment boundaries: Use loose matching so slight variations are captured.
        if "dot probe faces" in msg and "start" in msg:
            exp_start = ts
        elif "dot probe faces" in msg and "end" in msg:
            exp_end = ts

        # Fixation intervals.
        if "fixation_cross_start" in msg:
            fixation_start = ts
        elif "fixation_cross_stop" in msg and fixation_start is not None:
            events['fixation'].append((fixation_start, ts))
            fixation_start = None

        # Faces stimuli intervals.
        if "faces_stimuli_start" in msg:
            faces_start = ts
            faces_emotion = None  # Reset emotion
        if "emotion_side:" in msg:
            m = re.search(r"emotion_side:\s*(\w+)", msg, re.IGNORECASE)
            if m:
                emo = m.group(1).lower()
                faces_emotion = "L" if emo == "left" else "R"
        if "faces_stimuli_stop" in msg and faces_start is not None:
            # Default to "N" if no emotion was captured.
            if faces_emotion is None:
                faces_emotion = "N"
            events['faces'].append((faces_start, ts, faces_emotion))
            faces_start = None
            faces_emotion = None

        # Pause intervals.
        if "pause_start" in msg:
            pause_start = ts
        elif "pause_stop" in msg and pause_start is not None:
            events['pause'].append((pause_start, ts))
            pause_start = None

    events['experiment'] = (exp_start, exp_end)
    return events



In [3]:
def assign_flags(gaze_df, events):
    """
    Given a gaze DataFrame and event intervals, assign flag and emotion_side.
    Returns the DataFrame with added 'flag' and 'emotion_side' columns.
    """
    # Initialize new columns.
    gaze_df['flag'] = -1
    gaze_df['emotion_side'] = "N"  # Default to "N" if no emotion side is applicable.
    
    # Apply flag 0 for fixation intervals.
    for start, stop in events['fixation']:
        mask = (gaze_df['Time'] >= start) & (gaze_df['Time'] <= stop)
        gaze_df.loc[mask, 'flag'] = 0
    
    # Apply flag 1 for faces stimuli intervals and assign emotion.
    for start, stop, emotion in events['faces']:
        mask = (gaze_df['Time'] >= start) & (gaze_df['Time'] <= stop)
        gaze_df.loc[mask, 'flag'] = 1
        gaze_df.loc[mask, 'emotion_side'] = emotion  # "L", "R", or "N" if not captured.
    
    # Apply flag 2 for pause intervals.
    for start, stop in events['pause']:
        mask = (gaze_df['Time'] >= start) & (gaze_df['Time'] <= stop)
        gaze_df.loc[mask, 'flag'] = 2
    
    return gaze_df

In [4]:
def process_participant(gaze_file, msg_file):
    """
    Process a single participant's files:
      - Read the gaze file (columns: Time, calX, calY, rawX, rawY)
      - Process the message file to extract event intervals.
      - Filter gaze data to the experiment window.
      - Assign flags and emotion_side.
      - Add a 'participants' column.
    Returns the processed gaze DataFrame.
    """
    # Extract participant identifier (e.g., P006) from the filename.
    match = re.search(r"(P\d+)", os.path.basename(gaze_file))
    if not match:
        return None
    participant_id = match.group(1)
    
    # Read the gaze file.
    df_gaze = pd.read_csv(gaze_file, sep=r"\s+", header=None, engine='python')
    df_gaze.columns = ['Time', 'calX', 'calY', 'rawX', 'rawY']
    df_gaze['Time'] = pd.to_numeric(df_gaze['Time'])
    
    # Add participants column.
    df_gaze['participants'] = participant_id
    
    # Process message file to get event intervals.
    events = process_message_file(msg_file)
    # print(f"Processed {participant_id}: {events}")
    # Filter gaze data to the experiment window.
    exp_start, exp_end = events['experiment']
    if exp_start is None or exp_end is None:
        print(f"Experiment boundaries not found for {participant_id}")
        return None
    df_exp = df_gaze[(df_gaze['Time'] >= exp_start) & (df_gaze['Time'] <= exp_end)].copy()
    
    # Assign flags and emotion_side.
    df_processed = assign_flags(df_exp, events)
    return df_processed



In [None]:
# Collect gaze and message files (assumes they are in the current directory).
gaze_files = glob.glob("../../tiny_data/gaze_data_v6/gaze_directions_calibrated_*.txt")
msg_files = glob.glob("../../tiny_data/gaze_data_v6/gaze_messages_*.txt")

In [None]:


# Map each participant id to its corresponding message file.
msg_dict = {}
for mf in msg_files:
    match = re.search(r"(P\d+)", os.path.basename(mf))
    if match:
        participant_id = match.group(1)
        msg_dict[participant_id] = mf

# Process each participant's data and combine into a single DataFrame.
df_list = []
for gf in gaze_files:
    match = re.search(r"(P\d+)", os.path.basename(gf))
    if not match:
        continue
    participant_id = match.group(1)
    if participant_id in msg_dict:
        processed = process_participant(gf, msg_dict[participant_id])
        if processed is not None:
            df_list.append(processed)
    else:
        print(f"No message file found for participant {participant_id}")

if df_list:
    df_final = pd.concat(df_list, ignore_index=True)
    # Optionally sort by participant and Time.
    df_final = df_final.sort_values(['participants', 'Time']).reset_index(drop=True)
    # Save the final DataFrame to a CSV file.
    df_final.to_csv("../../processed_data/processed_data_tiny_task_faces_all.csv", index=False)
    print("Combined Processed Data:")
    print(df_final.head(20))
else:
    print("No data processed.")

Combined Processed Data:
            Time     calX     calY    rawX   rawY participants  flag  \
0   1.740664e+09  -548.70  -767.50  206.84  12.29         P004     0   
1   1.740664e+09 -1085.37 -1398.90  207.14  12.06         P004     0   
2   1.740664e+09  -288.91  -934.85  206.98  12.49         P004     0   
3   1.740664e+09  -573.69  -950.21  206.97  12.28         P004     0   
4   1.740664e+09  -670.67 -1212.10  207.10  12.25         P004     0   
5   1.740664e+09  -688.50 -1326.02  207.15  12.24         P004     0   
6   1.740664e+09  -890.46 -1385.55  207.15  12.15         P004     0   
7   1.740664e+09  -629.07 -1094.53  207.05  12.26         P004     0   
8   1.740664e+09  -791.20 -1400.31  207.17  12.20         P004     0   
9   1.740664e+09  -508.46  -671.82  206.76  12.31         P004     0   
10  1.740664e+09  -683.66  -824.34  206.88  12.20         P004     0   
11  1.740664e+09  -519.91  -605.48  206.70  12.30         P004     0   
12  1.740664e+09  -856.65 -1142.97  207