In [None]:
import pandas as pd
import numpy as np
import os

### Remove Leading Spaces from Raw Eye-Tracking Data

In [None]:
directory_path = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Raw_EyeTrackingData"

for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        df = pd.read_csv(file_path)
        df.columns = df.columns.str.lstrip()
        
        df.to_csv(file_path, index=False)
        
        print(f"Cleaned data saved to {file_path}")

  exec(code_obj, self.user_global_ns, self.user_ns)


Cleaned data saved to C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Raw_EyeTrackingData\EyeTrackingData_Participant10_2023_12_07_19_13_57.csv
Cleaned data saved to C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Raw_EyeTrackingData\EyeTrackingData_Participant15_2023_12_11_14_02_52.csv
Cleaned data saved to C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Raw_EyeTrackingData\EyeTrackingData_Participant16_2023_12_11_16_15_30.csv
Cleaned data saved to C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Raw_EyeTrackingData\EyeTrackingData_Participant20_2023_12_11_18_20_19.csv
Cleaned data saved to C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Raw_EyeTrackingData\EyeTrackingData_Participant21_2023_12_12_11_20_59.csv
Cleaned data saved to C:\Users\Mobile Workstation 3\OneDrive - Qu

### Calculate Validity BitMask

In [None]:
def gaze_origin_valid(x, y, z):
    return not np.isnan(x) and not np.isnan(y) and not np.isnan(z)

def gaze_direction_valid(x, y, z):
    norm = np.sqrt(x**2 + y**2 + z**2)
    return norm > 0 and not np.isnan(norm)

def pupil_diameter_valid(diameter):
    return diameter > 0 and not np.isnan(diameter)

def eye_openness_valid(openness):
    return 0 <= openness <= 1

# Map validity checks to bitmask positions (excluding pupil position)
validity_map = {
    'SINGLE_EYE_DATA_GAZE_ORIGIN_VALIDITY': 0,
    'SINGLE_EYE_DATA_GAZE_DIRECTION_VALIDITY': 1,
    'SINGLE_EYE_DATA_PUPIL_DIAMETER_VALIDITY': 2,
    'SINGLE_EYE_DATA_EYE_OPENNESS_VALIDITY': 3,
}

raw_folder = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Raw_EyeTrackingData"
cleaned_folder = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Cleaned_EyeTrackingData"
filtered_folder = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Filtered_EyeTrackingData"

os.makedirs(cleaned_folder, exist_ok=True)
os.makedirs(filtered_folder, exist_ok=True)

for file_name in os.listdir(raw_folder):
    if file_name.endswith(".csv"):
        file_path = os.path.join(raw_folder, file_name)
        df = pd.read_csv(file_path)
    
        df['LeftEye_BitMask'] = 0
        df['RightEye_BitMask'] = 0

        # Calculate bitmask values
        for index, row in df.iterrows():
            left_bitmask = 0
            right_bitmask = 0

            # Left eye validity checks
            if gaze_origin_valid(row['LeftGazeOriginX'], row['LeftGazeOriginY'], row['LeftGazeOriginZ']):
                left_bitmask |= (1 << validity_map['SINGLE_EYE_DATA_GAZE_ORIGIN_VALIDITY'])
            if gaze_direction_valid(row['LeftGazeX'], row['LeftGazeY'], row['LeftGazeZ']):
                left_bitmask |= (1 << validity_map['SINGLE_EYE_DATA_GAZE_DIRECTION_VALIDITY'])
            if pupil_diameter_valid(row['LeftPupilDiameter']):
                left_bitmask |= (1 << validity_map['SINGLE_EYE_DATA_PUPIL_DIAMETER_VALIDITY'])
            if eye_openness_valid(row['LeftEyeOpenness']):
                left_bitmask |= (1 << validity_map['SINGLE_EYE_DATA_EYE_OPENNESS_VALIDITY'])

            # Right eye validity checks
            if gaze_origin_valid(row['RightGazeOriginX'], row['RightGazeOriginY'], row['RightGazeOriginZ']):
                right_bitmask |= (1 << validity_map['SINGLE_EYE_DATA_GAZE_ORIGIN_VALIDITY'])
            if gaze_direction_valid(row['RightGazeX'], row['RightGazeY'], row['RightGazeZ']):
                right_bitmask |= (1 << validity_map['SINGLE_EYE_DATA_GAZE_DIRECTION_VALIDITY'])
            if pupil_diameter_valid(row['RightPupilDiameter']):
                right_bitmask |= (1 << validity_map['SINGLE_EYE_DATA_PUPIL_DIAMETER_VALIDITY'])
            if eye_openness_valid(row['RightEyeOpenness']):
                right_bitmask |= (1 << validity_map['SINGLE_EYE_DATA_EYE_OPENNESS_VALIDITY'])

            df.at[index, 'LeftEye_BitMask'] = left_bitmask
            df.at[index, 'RightEye_BitMask'] = right_bitmask

        cleaned_file_path = os.path.join(cleaned_folder, f"{file_name.split('.csv')[0]}_cleaned.csv")
        df.to_csv(cleaned_file_path, index=False)
        print(f"Cleaned file saved to: {cleaned_file_path}")

        # Apply the validity filter (using 15 as fully valid)
        data_filtered = df[(df['LeftEye_BitMask'] == 15) & (df['RightEye_BitMask'] == 15)]
        filtered_file_path = os.path.join(filtered_folder, f"{file_name.split('.csv')[0]}_filtered.csv")
        data_filtered.to_csv(filtered_file_path, index=False)
        print(f"Filtered file saved to: {filtered_file_path}")

Cleaned file saved to: C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Cleaned_EyeTrackingData\EyeTrackingData_Participant10_2023_12_07_19_13_57_cleaned.csv
Filtered file saved to: C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Filtered_EyeTrackingData\EyeTrackingData_Participant10_2023_12_07_19_13_57_filtered.csv
Cleaned file saved to: C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Cleaned_EyeTrackingData\EyeTrackingData_Participant15_2023_12_11_14_02_52_cleaned.csv
Filtered file saved to: C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Filtered_EyeTrackingData\EyeTrackingData_Participant15_2023_12_11_14_02_52_filtered.csv
Cleaned file saved to: C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Cleaned_EyeTrackingData\EyeTrackingData_Participant16_2023_12_11_16_15_30_cleane

### Calculate Gaze Direction (Ex) and Median Rolling Window of 10 (Filtered_Ex)

In [None]:
cleaned_folder = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Cleaned_EyeTrackingData"
filtered_folder = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Filtered_EyeTrackingData"

for folder in [cleaned_folder, filtered_folder]:
    for file_name in os.listdir(folder):
        if file_name.endswith(".csv"):
            file_path = os.path.join(folder, file_name)
            print(f"Processing file: {file_name}")
            dtype_spec = {
                'LeftGazeX': float,
                'LeftGazeZ': float,
                'RightGazeX': float,
                'RightGazeZ': float,
                'FixationPointX': float,
                'FixationPointZ': float,
            }
            
            df = pd.read_csv(file_path, dtype=dtype_spec)
            
            # Compute LeftEx, RightEx, and CombinedEx
            df['LeftEx'] = np.arctan2(df['LeftGazeX'], df['LeftGazeZ']) / np.pi * 180
            df['RightEx'] = np.arctan2(df['RightGazeX'], df['RightGazeZ']) / np.pi * 180
            df['CombinedEx'] = np.arctan2(df['FixationPointX'], df['FixationPointZ']) / np.pi * 180
            
            # Apply the rolling median filter with a window size of 10
            df['Filtered_LeftEx'] = df['LeftEx'].rolling(window=10, center=True).median().fillna(method='bfill').fillna(method='ffill')
            df['Filtered_RightEx'] = df['RightEx'].rolling(window=10, center=True).median().fillna(method='bfill').fillna(method='ffill')
            df['Filtered_CombinedEx'] = df['CombinedEx'].rolling(window=10, center=True).median().fillna(method='bfill').fillna(method='ffill')
            
            df.to_csv(file_path, index=False)
            print(f"Updated file with Ex values saved to: {file_path}")

Processing file: EyeTrackingData_Participant10_2023_12_07_19_13_57_cleaned.csv
Updated file with Ex values saved to: C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Cleaned_EyeTrackingData\EyeTrackingData_Participant10_2023_12_07_19_13_57_cleaned.csv
Processing file: EyeTrackingData_Participant15_2023_12_11_14_02_52_cleaned.csv
Updated file with Ex values saved to: C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Cleaned_EyeTrackingData\EyeTrackingData_Participant15_2023_12_11_14_02_52_cleaned.csv
Processing file: EyeTrackingData_Participant16_2023_12_11_16_15_30_cleaned.csv
Updated file with Ex values saved to: C:\Users\Mobile Workstation 3\OneDrive - Queen's University\Coding Scripts\EyeTrackingData\Cleaned_EyeTrackingData\EyeTrackingData_Participant16_2023_12_11_16_15_30_cleaned.csv
Processing file: EyeTrackingData_Participant20_2023_12_11_18_20_19_cleaned.csv
Updated file with Ex values saved to: C

### Calculate Time_Diff and Initial_Velocity (using Ex)

In [None]:
cleaned_folder = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Cleaned_EyeTrackingData"
filtered_folder = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Filtered_EyeTrackingData"

for folder in [cleaned_folder, filtered_folder]:
    for file_name in os.listdir(folder):
        if file_name.endswith(".csv"):
            file_path = os.path.join(folder, file_name)
            print(f"Processing file: {file_name}")
            dtype_spec = {
                'LeftGazeX': float,
                'LeftGazeZ': float,
                'RightGazeX': float,
                'RightGazeZ': float,
                'FixationPointX': float,
                'FixationPointZ': float,
                'Timestamp': str  # Ensure Timestamp is read as string for conversion
            }
            
            df = pd.read_csv(file_path, dtype=dtype_spec)
            
            # Convert 'Timestamp' column to datetime
            df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce')
            df.dropna(subset=['Timestamp'], inplace=True)
            
            # Calculate time difference in seconds
            df['Time_Diff'] = df['Timestamp'].diff().dt.total_seconds()
            df['Time_Diff'] = df['Time_Diff'].replace(0, 1e-6)  # Prevent division by zero
            df['Time_Diff'].fillna(method='bfill', inplace=True)  # Backward fill
            df['Time_Diff'].fillna(method='ffill', inplace=True)  # Forward fill
            
            # Compute velocity for raw gaze direction data
            df['Initial_Velocity_LeftEx'] = df['LeftEx'].diff() / df['Time_Diff']
            df['Initial_Velocity_RightEx'] = df['RightEx'].diff() / df['Time_Diff']
            df['Initial_Velocity_CombinedEx'] = df['CombinedEx'].diff() / df['Time_Diff']

            # Apply median filter to smooth velocity data
            for col in ['Initial_Velocity_LeftEx', 'Initial_Velocity_RightEx', 'Initial_Velocity_CombinedEx']:
                df[col] = smooth_series(df[col])

            df.to_csv(file_path, index=False)
            print(f"Intermediate file with initial velocities saved to: {file_path}")

### Calculate Filtered_Velocity (using Filtered_Ex) and Normalize [-1 to 1]

In [None]:
cleaned_folder = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Cleaned_EyeTrackingData"
filtered_folder = "C:\\Users\\Mobile Workstation 3\\OneDrive - Queen's University\\Coding Scripts\\EyeTrackingData\\Filtered_EyeTrackingData"

for folder in [cleaned_folder, filtered_folder]:
    for file_name in os.listdir(folder):
        if file_name.endswith(".csv"):
            file_path = os.path.join(folder, file_name)
            print(f"Processing file: {file_name}")
            
            df = pd.read_csv(file_path)
            
            # Identify where filtered data changes
            df['Filtered_LeftEx_Change'] = df['Filtered_LeftEx'].ne(df['Filtered_LeftEx'].shift())
            df['Filtered_RightEx_Change'] = df['Filtered_RightEx'].ne(df['Filtered_RightEx'].shift())
            df['Filtered_CombinedEx_Change'] = df['Filtered_CombinedEx'].ne(df['Filtered_CombinedEx'].shift())

            # Compute velocity for filtered gaze direction data
            df['Filtered_Velocity_LeftEx'] = df['Filtered_LeftEx'].diff() / df['Time_Diff']
            df['Filtered_Velocity_RightEx'] = df['Filtered_RightEx'].diff() / df['Time_Diff']
            df['Filtered_Velocity_CombinedEx'] = df['Filtered_CombinedEx'].diff() / df['Time_Diff']

            # Apply median filter to smooth filtered velocity data
            for col in ['Filtered_Velocity_LeftEx', 'Filtered_Velocity_RightEx', 'Filtered_Velocity_CombinedEx']:
                df[col] = smooth_series(df[col])

            df.loc[~df['Filtered_LeftEx_Change'], 'Filtered_Velocity_LeftEx'] = np.nan
            df.loc[~df['Filtered_RightEx_Change'], 'Filtered_Velocity_RightEx'] = np.nan
            df.loc[~df['Filtered_CombinedEx_Change'], 'Filtered_Velocity_CombinedEx'] = np.nan

            # Forward fill only within gaps
            df['Filtered_Velocity_LeftEx'].fillna(method='ffill', inplace=True)
            df['Filtered_Velocity_RightEx'].fillna(method='ffill', inplace=True)
            df['Filtered_Velocity_CombinedEx'].fillna(method='ffill', inplace=True)

            # Normalize velocity to range [-1, 1]
            for col in ['Filtered_Velocity_LeftEx', 'Filtered_Velocity_RightEx', 'Filtered_Velocity_CombinedEx']:
                min_val, max_val = df[col].min(), df[col].max()
                if max_val - min_val > 1e-6:
                    df[f'Normalized_{col}'] = 2 * (df[col] - min_val) / (max_val - min_val) - 1
                else:
                    df[f'Normalized_{col}'] = 0  # Avoid division by zero

            df.drop(columns=['Filtered_LeftEx_Change', 'Filtered_RightEx_Change', 'Filtered_CombinedEx_Change'], inplace=True)

            df.to_csv(file_path, index=False)
            print(f"Updated file with filtered velocities saved to: {file_path}")

### Calculate Saccade Features (Number_Saccades and Saccade_Duration)

### Calculate Saccades Z-Score

### Calculate Fixation Features (Number_Fixations and Fixation_Duration)

### Calculate Fixations Z-Score

### Calculate Pupil Dilation Z-Score