In [25]:
import numpy as np
import pandas as pd
import re
import os
import glob
import warnings

from analysis_functions import *

import plotly.graph_objects as go
import plotly.colors as colors

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [6]:
# Define the pattern for the files
trial_pattern = re.compile(r'\d{4}-\d{2}-\d{2}-\d{2}-\d{2}_trials\.csv')
track_pattern = re.compile(r'\d{4}-\d{2}-\d{2}-\d{2}-\d{2}_eyetracking\.csv')


# Use glob to get all files in the folder
files = glob.glob('data/free-looking/*')

# Filter files using the regular expression
trial_files = [f for f in files if trial_pattern.match(os.path.basename(f))]
track_files = [f for f in files if track_pattern.match(os.path.basename(f))]

# Function to read and concatenate CSV files with error handling
def read_and_concatenate(files, sep=','):
    dfs = []
    for ii, file in enumerate(files):
        try:
            df = pd.read_csv(file, on_bad_lines='skip', sep=sep)
            df['Session'] = ii
            dfs.append(df)
        except pd.errors.ParserError as e:
            print(f"Error parsing {file}: {e}")
    return pd.concat(dfs, ignore_index=True), ii+1  # Return dataframe and session count

# Load and concatenate the trial files
trial_data, session_count = read_and_concatenate(trial_files)

# Load and concatenate the track files
track_data, session_count = read_and_concatenate(track_files, sep=';')


In [7]:
trial_data

Unnamed: 0,Block,Trial,Reaction_Time,Response,Coherence,Direction,Key_Pressed,Radius,Theta,X,Y,Session
0,0,0,2.085249,True,0.50,left,LeftArrow,0,0,0,0,0
1,0,1,1.400391,True,0.49,down,DownArrow,0,0,0,0,0
2,0,2,1.199780,True,0.49,down,DownArrow,0,0,0,0,0
3,0,3,1.289139,True,0.48,down,DownArrow,0,0,0,0,0
4,0,4,0.988407,True,0.48,right,RightArrow,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
255,4,255,14.964970,True,0.31,right,RightArrow,0,0,0,0,0
256,4,256,18.372680,False,0.32,up,DownArrow,0,0,0,0,0
257,4,257,1.755371,True,0.32,left,LeftArrow,0,0,0,0,0
258,4,258,0.866699,True,0.31,up,UpArrow,0,0,0,0,0


In [37]:
track_data.head()

Unnamed: 0,Frame,CaptureTime,LogTime,HMDPosition,HMDRotation,GazeStatus,CombinedGazeForward,CombinedGazePosition,InterPupillaryDistanceInMM,LeftEyeStatus,...,RightEyeStatus,RightEyeForward,RightEyePosition,RightPupilIrisDiameterRatio,RightPupilDiameterInMM,RightIrisDiameterInMM,FocusDistance,FocusStability,TrialNumber,Session
0,12128,1000283133106276500,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.005, 0.030, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.002, 0.030, 1.000)","(0.031, 0.000, 0.000)",0.34,4.364,12.833,2.0,0.905047,0.0,0
1,12129,1000283133111136100,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.004, 0.029, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.003, 0.029, 1.000)","(0.031, 0.000, 0.000)",0.34,4.36,12.833,2.0,0.911767,0.0,0
2,12130,1000283133116108000,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.003, 0.029, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.004, 0.029, 1.000)","(0.031, 0.000, 0.000)",0.34,4.357,12.833,2.0,0.914656,0.0,0
3,12131,1000283133121211500,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.002, 0.028, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.005, 0.028, 1.000)","(0.031, 0.000, 0.000)",0.339,4.354,12.833,2.0,0.911802,0.0,0
4,12132,1000283133126183400,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.001, 0.028, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.006, 0.028, 1.000)","(0.031, 0.000, 0.000)",0.339,4.351,12.833,2.0,0.915294,0.0,0


In [38]:
# Clean up NaN trials at end of file
track_data = track_data.dropna(subset=['TrialNumber'])

In [39]:
track_data

Unnamed: 0,Frame,CaptureTime,LogTime,HMDPosition,HMDRotation,GazeStatus,CombinedGazeForward,CombinedGazePosition,InterPupillaryDistanceInMM,LeftEyeStatus,...,RightEyeStatus,RightEyeForward,RightEyePosition,RightPupilIrisDiameterRatio,RightPupilDiameterInMM,RightIrisDiameterInMM,FocusDistance,FocusStability,TrialNumber,Session
0,12128,1000283133106276500,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.005, 0.030, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.002, 0.030, 1.000)","(0.031, 0.000, 0.000)",0.340,4.364,12.833,2.000000,0.905047,0.0,0
1,12129,1000283133111136100,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.004, 0.029, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.003, 0.029, 1.000)","(0.031, 0.000, 0.000)",0.340,4.360,12.833,2.000000,0.911767,0.0,0
2,12130,1000283133116108000,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.003, 0.029, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.004, 0.029, 1.000)","(0.031, 0.000, 0.000)",0.340,4.357,12.833,2.000000,0.914656,0.0,0
3,12131,1000283133121211500,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.002, 0.028, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.005, 0.028, 1.000)","(0.031, 0.000, 0.000)",0.339,4.354,12.833,2.000000,0.911802,0.0,0
4,12132,1000283133126183400,63857793258749,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.001, 0.028, 1.000)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.006, 0.028, 1.000)","(0.031, 0.000, 0.000)",0.339,4.351,12.833,2.000000,0.915294,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226278,238407,1000284264885012000,63857794390046,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.065, 0.072, 0.995)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(0.022, 0.072, 0.997)","(0.031, 0.000, 0.000)",0.335,4.306,12.833,0.541093,0.000000,260.0,0
226279,238408,1000284264890011900,63857794390046,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.068, 0.043, 0.997)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(0.022, 0.043, 0.999)","(0.031, 0.000, 0.000)",0.342,4.384,12.833,0.494411,0.000000,260.0,0
226280,238409,1000284264895025700,63857794390057,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.106, -0.006, 0.994)","(0.000, 0.000, 0.000)",62.1,VALID,...,INVALID,,,,,,2.000000,0.000000,260.0,0
226281,238410,1000284264900025600,63857794390057,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.099, -0.022, 0.995)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(0.024, -0.046, 0.999)","(0.031, 0.000, 0.000)",0.315,4.040,12.833,0.382186,0.000000,260.0,0


In [23]:
def create_histogram(dataframe, nbins):
    # Convert boolean 'Response' column to float
    dataframe['Response'] = dataframe['Response'].astype(float)
    
    # Create bins for 'Coherence'
    dataframe['Coherence_bin'] = pd.cut(dataframe['Coherence'], bins=nbins)
    
    # Group by the new bins and calculate the mean of 'Response' and the count of observations
    grouped_df = dataframe.groupby('Coherence_bin').agg(
        average_response=('Response', 'mean'),
        count=('Response', 'size')).reset_index()
    
    # Convert bins to string for better labeling
    grouped_df['Coherence_bin'] = grouped_df['Coherence_bin'].astype(str)
    
    # Create the bar plot using Plotly
    fig = px.bar(grouped_df, x='Coherence_bin', y='average_response', 
                 title='Histogram of Coherence vs Average Response',
                 labels={'Coherence_bin': 'Coherence', 'average_response': 'Average Response'})
    
    # Add count as text annotation
    fig.update_traces(text=grouped_df['count'], textposition='outside')
    
    # Show the plot
    fig.show()

In [24]:
create_histogram(trial_data, nbins=12)





In [32]:
def visualize_logistic_regression(dataframe):
    # Convert boolean 'Response' column to float
    dataframe['Response'] = dataframe['Response'].astype(float)
    
    # Prepare the data
    X = dataframe[['Coherence']].values
    y = dataframe['Response'].values
    
    # Standardize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)
    
    # Fit the logistic regression model
    model = LogisticRegression()
    model.fit(X_train, y_train)
    
    # Generate values for the logistic regression curve
    X_values = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
    X_values_scaled = scaler.transform(X_values)
    y_values = model.predict_proba(X_values_scaled)[:, 1]
    
    # Create the scatter plot of the original data points
    scatter = go.Scatter(x=dataframe['Coherence'], y=dataframe['Response'], mode='markers', name='Data points')
    
    # Create the logistic regression curve
    curve = go.Scatter(x=X_values.flatten(), y=y_values, mode='lines', name='Logistic Regression Curve')
    
    # Combine the scatter plot and the logistic regression curve
    fig = go.Figure(data=[scatter, curve])
    
    # Update the layout of the plot
    fig.update_layout(title='Logistic Regression Visualization',
                      xaxis_title='Coherence',
                      yaxis_title='Response',
                      showlegend=True)
    
    # Show the plot
    fig.show()

In [33]:
visualize_logistic_regression(trial_data)

### Trial Eye Movement

In [40]:
# Get correct trials in which coherence was near threshold
trial_data_focused = trial_data.loc[(trial_data['Response'] == 1.0) & (trial_data['Coherence'] < 0.33)]

In [49]:
# Get list of correct trials to filter tracking data
trials_focused = list(trial_data_focused['Trial'].astype(float))
print(*trials_focused)

35.0 41.0 42.0 43.0 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 55.0 56.0 57.0 94.0 95.0 96.0 98.0 99.0 100.0 101.0 102.0 103.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0 148.0 151.0 152.0 153.0 154.0 155.0 158.0 159.0 160.0 161.0 162.0 163.0 200.0 201.0 202.0 204.0 205.0 206.0 207.0 209.0 210.0 211.0 212.0 249.0 250.0 251.0 252.0 253.0 255.0 257.0 258.0


In [50]:
track_data_focused = track_data.loc[track_data['TrialNumber'].isin(trials_focused)]

In [52]:
track_data_focused.head(3)

Unnamed: 0,Frame,CaptureTime,LogTime,HMDPosition,HMDRotation,GazeStatus,CombinedGazeForward,CombinedGazePosition,InterPupillaryDistanceInMM,LeftEyeStatus,...,RightEyeStatus,RightEyeForward,RightEyePosition,RightPupilIrisDiameterRatio,RightPupilDiameterInMM,RightIrisDiameterInMM,FocusDistance,FocusStability,TrialNumber,Session
20580,32708,1000283236041408800,63857793361202,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.002, -0.082, 0.997)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.019, -0.082, 0.996)","(0.031, 0.000, 0.000)",0.293,3.76,12.833,2.0,0.801096,35.0,0
20581,32709,1000283236046424400,63857793361202,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.002, -0.082, 0.997)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.019, -0.082, 0.996)","(0.031, 0.000, 0.000)",0.293,3.76,12.833,2.0,0.825322,35.0,0
20582,32710,1000283236051469400,63857793361202,"(0.000, 0.000, 0.000)","(0.000, 0.000, 0.000, 1.000)",VALID,"(0.002, -0.082, 0.997)","(0.000, 0.000, 0.000)",62.1,VALID,...,VALID,"(-0.019, -0.082, 0.996)","(0.031, 0.000, 0.000)",0.293,3.76,12.833,2.0,0.936128,35.0,0


In [62]:
gaze_data = pd.DataFrame()
gaze_data[['x', 'y']] = track_data_focused['CombinedGazeForward'].str.extract(r'\(([^,]+), ([^,]+), [^)]+\)').astype(float)
gaze_data[['TrialNumber', 'Frame']] = track_data_focused[['TrialNumber', 'Frame']]
gaze_data

Unnamed: 0,x,y,TrialNumber,Frame
20580,0.002,-0.082,35.0,32708
20581,0.002,-0.082,35.0,32709
20582,0.002,-0.082,35.0,32710
20583,0.002,-0.082,35.0,32711
20584,0.002,-0.082,35.0,32712
...,...,...,...,...
224487,0.065,-0.036,258.0,236616
224488,0.065,-0.036,258.0,236617
224489,0.065,-0.036,258.0,236618
224490,0.065,-0.036,258.0,236619


In [129]:
def visualize_trial_gaze(trial_number, df, color_code="Frame"):
    trial_number = float(trial_number)
    df = df.loc[df['TrialNumber'] == trial_number].copy()
    df['Frame'] = df['Frame'] - df['Frame'].min() # Start counting frames from 0

    # Create the scatter plot with Plotly
    fig = px.scatter(df, x='x', y='y', color=color_code, title=f"2D Scatter Plot of Gaze Coordinates<br><b>Trial {int(trial_number)}</b>")

    # Center the plot at (0, 0)
    fig.update_layout(
        xaxis=dict(range=[-0.5, 0.5], zeroline=True, zerolinewidth=2, zerolinecolor='LightPink'),
        yaxis=dict(range=[-0.5, 0.5], zeroline=True, zerolinewidth=2, zerolinecolor='LightPink'),
        xaxis_title='X Coordinate',
        yaxis_title='Y Coordinate',
        yaxis_scaleanchor='x',
        width=800,
        height=800
    )

    # Show the plot
    fig.show()

In [133]:
visualize_trial_gaze(trials_focused[-20], gaze_data, color_code="Frame")

In [125]:
def visualize_trial_gaze_distance(trial_number, df, color_code="Frame"):
    trial_number = float(trial_number)
    df = df.loc[df['TrialNumber'] == trial_number].copy()

    # Calculate the differences between consecutive x and y coordinates
    df['dx'] = df['x'].diff()
    df['dy'] = df['y'].diff()

    # Compute the Euclidean distance for each frame transition
    df['Distance'] = np.sqrt(df['dx']**2 + df['dy']**2)

    # Sum up all the distances to get the total distance traveled
    total_distance = df['Distance'].sum()

    # Create the scatter plot with Plotly
    fig = px.scatter(df, x='x', y='y', color=df[color_code], title=f"2D Scatter Plot of Gaze Coordinates<br><b>Trial {int(trial_number)}</b><br>Total distance: {total_distance:.2f}")

    # Center the plot at (0, 0)
    fig.update_layout(
        xaxis=dict(range=[-0.5, 0.5], zeroline=True, zerolinewidth=2, zerolinecolor='LightPink'),
        yaxis=dict(range=[-0.5, 0.5], zeroline=True, zerolinewidth=2, zerolinecolor='LightPink'),
        xaxis_title='X Coordinate',
        yaxis_title='Y Coordinate',
        yaxis_scaleanchor='x',
        width=800,
        height=800
    )

    # Show the plot
    fig.show()

In [132]:
visualize_trial_gaze_distance(trials_focused[-20], gaze_data, color_code='Distance')

In [113]:
total_dist

2.24672967187216

In [117]:
len(gaze_df)

1134