In [None]:
import pandas as pd
import numpy as np
from scipy.ndimage import uniform_filter1d

# File paths
input_file_path = '/Users/laurabenitosandoval/Downloads/gaze_data.csv'  # Input file path
output_file_path = '/Users/laurabenitosandoval/Downloads/processed_okn_velocity_data.csv'  # New output file

# Read the CSV file
data = pd.read_csv(input_file_path)

# Convert timestamps to seconds
data['time_seconds'] = (data['device_time_stamp'] - data['device_time_stamp'].min()) / 1e6

# Initialize an empty DataFrame to store results for all participants
all_participants_data = pd.DataFrame()

# Process each participant separately
for participant in data['Participant'].unique():
    # Filter data for the current participant
    participant_data = data[data['Participant'] == participant].copy()

    # Drop rows with NaN values in the gaze coordinate
    participant_data.dropna(subset=['left_gaze_point_on_display_area_y'], inplace=True)

    # Calculate raw velocity
    participant_data['velocity_raw'] = np.gradient(
        participant_data['left_gaze_point_on_display_area_y'], participant_data['time_seconds']
    )

    # Drop rows with NaN in velocity
    participant_data.dropna(subset=['velocity_raw'], inplace=True)

    # Identify key press events
    up_key_times = participant_data[participant_data['key'] == 'up']['time_seconds']

    # Detect downward movements during 'up' key press
    participant_data['movement_down_during_up'] = np.where(
        (participant_data['velocity_raw'] < 0) &
        (participant_data['time_seconds'].isin(up_key_times)),
        1,  # Movement detected
        0   # No movement
    )

    # Calculate local velocities
    participant_data['local_displacement'] = np.abs(
        np.diff(participant_data['left_gaze_point_on_display_area_y'], prepend=0)
    )
    participant_data['local_time'] = np.diff(participant_data['time_seconds'], prepend=0)
    participant_data['local_velocity'] = participant_data['local_displacement'] / participant_data['local_time']

    # Filter downward movements
    downward_movements = participant_data[participant_data['movement_down_during_up'] == 1]

    # Compute velocity threshold for the participant
    if not downward_movements.empty:
        velocity_threshold = np.median(downward_movements['local_velocity'])
    else:
        velocity_threshold = np.nan
        print(f"Participant {participant}: No sufficient downward movements to calculate a velocity threshold.")

    # Filter slow phase velocities
    participant_data['slow_phase_velocity'] = np.where(
        (np.abs(participant_data['velocity_raw']) <= velocity_threshold) & (~np.isnan(velocity_threshold)),
        participant_data['velocity_raw'],
        np.nan
    )

    # Drop rows with NaN in slow phase velocity
    participant_data.dropna(subset=['slow_phase_velocity'], inplace=True)

    # Apply a sliding window of 1 second to smooth slow phase velocity
    sampling_rate = len(participant_data) / (participant_data['time_seconds'].max() - participant_data['time_seconds'].min())
    window_size = int(sampling_rate)  # Number of data points corresponding to 1 second
    participant_data['smoothed_slow_phase_velocity'] = uniform_filter1d(
        participant_data['slow_phase_velocity'], size=window_size, mode='nearest'
    )

    # Add perception labels for SVM training
    participant_data['perception'] = participant_data['key'].map({
        'up': 'integrated',  # Integrated perception
        'left': 'segregated',  # Segregated perception
        'right': 'segregated'  # Segregated perception
    })

    # Append the processed data for the participant
    all_participants_data = pd.concat([all_participants_data, participant_data])

# Filter rows with valid perception labels
svm_data = all_participants_data[all_participants_data['perception'].notna()]

# Select columns for the output CSV file
output_columns = ['Participant', 'time_seconds', 'perception', 'smoothed_slow_phase_velocity']
svm_ready_data = svm_data[output_columns]

# Save the processed data to a new CSV file
svm_ready_data.to_csv(output_file_path, index=False)

print(f"Processed data saved to a new file: {output_file_path}")