In [225]:
import numpy as np
import pandas as pd
import re
import os
import glob

from analysis_functions import *

### Load Data

In [226]:
# Define the pattern for the files
trial_pattern = re.compile(r'\d{4}-\d{2}-\d{2}-\d{2}-\d{2}_trials\.csv')
track_pattern = re.compile(r'\d{4}-\d{2}-\d{2}-\d{2}-\d{2}_eyetracking\.csv')


# Use glob to get all files in the folder
files = glob.glob('data/*')

# Filter files using the regular expression
trial_files = [f for f in files if trial_pattern.match(os.path.basename(f))]
track_files = [f for f in files if track_pattern.match(os.path.basename(f))]

# Function to read and concatenate CSV files with error handling
def read_and_concatenate(files, sep=','):
    dfs = []
    for ii, file in enumerate(files):
        try:
            df = pd.read_csv(file, on_bad_lines='skip', sep=sep)
            df['Session'] = ii
            dfs.append(df)
        except pd.errors.ParserError as e:
            print(f"Error parsing {file}: {e}")
    return pd.concat(dfs, ignore_index=True), ii+1  # Return dataframe and session count

# Load and concatenate the trial files
trial_data, session_count = read_and_concatenate(trial_files)

# Load and concatenate the track files
track_data, session_count = read_and_concatenate(track_files, sep=';')


In [227]:
trial_data

Unnamed: 0,Trial,Reaction_Time,Response,Coherence,Direction,Key_Pressed,Radius,Theta,X,Y,Session
0,0,1.013901,True,0.33,right,RightArrow,3.765573,55.774110,2.117973,3.113475,0
1,1,1.311054,False,0.33,right,UpArrow,0.819280,64.946530,0.346936,0.742197,0
2,2,2.244801,False,0.33,down,UpArrow,0.436710,258.719200,-0.085428,-0.428273,0
3,3,2.554199,True,0.33,left,LeftArrow,2.301572,318.498100,1.723725,-1.525125,0
4,4,2.344635,True,0.33,down,DownArrow,3.333058,231.792200,-2.061548,-2.619026,0
...,...,...,...,...,...,...,...,...,...,...,...
1789,294,1.244812,True,0.33,up,UpArrow,2.687645,319.021400,2.029051,-1.762496,5
1790,295,3.889099,False,0.33,left,DownArrow,3.378117,181.848600,-3.376359,-0.108973,5
1791,296,0.787659,False,0.33,right,LeftArrow,3.746909,251.156900,-1.210168,-3.546100,5
1792,297,5.342041,True,0.33,left,LeftArrow,2.210651,9.844179,2.178102,0.377953,5


In [228]:
track_data[:1]

Unnamed: 0,Frame,CaptureTime,LogTime,HMDPosition,HMDRotation,GazeStatus,CombinedGazeForward,CombinedGazePosition,InterPupillaryDistanceInMM,LeftEyeStatus,...,RightEyeStatus,RightEyeForward,RightEyePosition,RightPupilIrisDiameterRatio,RightPupilDiameterInMM,RightIrisDiameterInMM,FocusDistance,FocusStability,TrialNumber,Session
0,1150905,1001917444715863346,63854598109944,"(0.591, 0.918, -0.290)","(0.090, -0.589, 0.055, 0.801)",INVALID,,,,INVALID,...,INVALID,,,,,,,,,0


### Visualize Trials

In [229]:
fig = px.scatter_polar(
    trial_data,
    r = trial_data['Radius'],
    theta = trial_data['Theta'],
    color = trial_data['Response'],
    color_continuous_scale = 'Viridis',
    opacity = .25
)

fig.show()





In [230]:
# Check percentage of correct trials
true_percentage = (trial_data['Response'].mean()) * 100
true_percentage

69.06354515050167

In [231]:
# Create a histogram of 'Radius' values
fig = px.histogram(trial_data, x='Radius', nbins=100, title='Histogram of Radius Values')

# Show the plot
fig.show()

In [232]:
# Descriptive statistics
print(track_data.describe())

              Frame   CaptureTime       LogTime  InterPupillaryDistanceInMM  \
count  1.274105e+06  1.274105e+06  1.274105e+06                1.268994e+06   
mean   6.306420e+06  1.002464e+18  6.385514e+13                6.195607e+01   
std    1.284428e+07  3.411612e+14  3.411716e+08                1.445971e-01   
min    1.208900e+04  1.001917e+18  6.385460e+13                6.172500e+01   
25%    1.193900e+05  1.002074e+18  6.385475e+13                6.190000e+01   
50%    2.643750e+05  1.002694e+18  6.385537e+13                6.190000e+01   
75%    1.341057e+06  1.002700e+18  6.385538e+13                6.210000e+01   
max    3.513211e+07  1.002857e+18  6.385554e+13                6.217500e+01   

       LeftPupilIrisDiameterRatio  LeftPupilDiameterInMM  \
count                1.268234e+06           1.268234e+06   
mean                 3.081662e-01           3.857857e+00   
std                  2.645366e-02           3.293227e-01   
min                  0.000000e+00           0.00

In [233]:
# Predict correct trials by reaction time
df = pd.DataFrame(trial_data)

# Filter out trials that took longer than 10 seconds
df = df[df['Reaction_Time'] <= 10]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Reaction_Time']]
y = df['Response']

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x='Reaction_Time', y='Response', title='Linear Regression: Correct/Error Trials vs Reaction Time (Up to 10 Seconds)')
fig.add_trace(go.Scatter(x=df['Reaction_Time'], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

In [234]:
# Predict correct trials by reaction time
df = pd.DataFrame(trial_data)

# Filter out trials that took longer than 5 seconds
df = df[df['Reaction_Time'] <= 1]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Reaction_Time']]
y = df['Response']

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x='Reaction_Time', y='Response', title='Linear Regression: Correct/Error Trials vs Reaction Time (Up to 1 Seconds)')
fig.add_trace(go.Scatter(x=df['Reaction_Time'], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

### Performance Over Trials

In [235]:
# For eyetracking data, remove rows where column 'TrialNumber' has NaN
track_data = track_data.dropna(subset=['TrialNumber'])

# Convert column 'TrialNumber' from floats to integers
track_data['TrialNumber'] = track_data['TrialNumber'].astype(int)

# If removing a row based on a particular observation is needed, use this format:
track_data = track_data.loc[(track_data['LeftEyeStatus'] != 'INVALID') & (track_data['RightEyeStatus'] != 'INVALID')]

In [236]:
track_data[:1]

Unnamed: 0,Frame,CaptureTime,LogTime,HMDPosition,HMDRotation,GazeStatus,CombinedGazeForward,CombinedGazePosition,InterPupillaryDistanceInMM,LeftEyeStatus,...,RightEyeStatus,RightEyeForward,RightEyePosition,RightPupilIrisDiameterRatio,RightPupilDiameterInMM,RightIrisDiameterInMM,FocusDistance,FocusStability,TrialNumber,Session
5,1150910,1001917444740867800,63854598109944,"(0.591, 0.918, -0.290)","(0.090, -0.589, 0.055, 0.801)",VALID,"(-0.004, -0.049, 0.999)","(0.000, 0.000, 0.000)",62.175,VALID,...,VALID,"(-0.041, -0.046, 0.998)","(0.031, 0.000, 0.000)",0.273,3.601,13.209,2.0,0.0,0,0


In [237]:
# Calculate the mean of 'value' for each 'trial_number' in the larger DataFrame
mean_pupil_diameter = track_data.groupby(['TrialNumber', 'Session'])[['LeftPupilDiameterInMM', 'RightPupilDiameterInMM']].mean().reset_index()
mean_pupil_diameter['Pupil_Diameter'] = mean_pupil_diameter[['LeftPupilDiameterInMM', 'RightPupilDiameterInMM']].mean(axis=1)
mean_pupil_diameter[:1]

Unnamed: 0,TrialNumber,Session,LeftPupilDiameterInMM,RightPupilDiameterInMM,Pupil_Diameter
0,0,0,4.444397,4.282814,4.363605


In [238]:
# Add mean pupil diameter to trial data 
trial_data['Pupil_Diameter'] = mean_pupil_diameter['Pupil_Diameter'][:-session_count]

In [239]:
trial_data

Unnamed: 0,Trial,Reaction_Time,Response,Coherence,Direction,Key_Pressed,Radius,Theta,X,Y,Session,Pupil_Diameter
0,0,1.013901,True,0.33,right,RightArrow,3.765573,55.774110,2.117973,3.113475,0,4.363605
1,1,1.311054,False,0.33,right,UpArrow,0.819280,64.946530,0.346936,0.742197,0,4.311253
2,2,2.244801,False,0.33,down,UpArrow,0.436710,258.719200,-0.085428,-0.428273,0,4.585174
3,3,2.554199,True,0.33,left,LeftArrow,2.301572,318.498100,1.723725,-1.525125,0,4.389640
4,4,2.344635,True,0.33,down,DownArrow,3.333058,231.792200,-2.061548,-2.619026,0,4.440438
...,...,...,...,...,...,...,...,...,...,...,...,...
1789,294,1.244812,True,0.33,up,UpArrow,2.687645,319.021400,2.029051,-1.762496,5,3.708330
1790,295,3.889099,False,0.33,left,DownArrow,3.378117,181.848600,-3.376359,-0.108973,5,3.733448
1791,296,0.787659,False,0.33,right,LeftArrow,3.746909,251.156900,-1.210168,-3.546100,5,3.374799
1792,297,5.342041,True,0.33,left,LeftArrow,2.210651,9.844179,2.178102,0.377953,5,3.939194


In [240]:
# Predict correct trials by reaction time
df = trial_data

# Filter outlier trials
# df = df[df['Pupil_Diameter'] >= 4]
# df = df[df['Pupil_Diameter'] <= 5]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Pupil_Diameter']]
y = df['Response']

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x='Pupil_Diameter', y='Response', title='Linear Regression: Correct/Error Trials vs Pupil Diameter')
fig.add_trace(go.Scatter(x=df['Pupil_Diameter'], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

In [241]:
# Predict correct trials by reaction time
df = trial_data
predictor = 'Trial'
dependent = 'Response'
binary_response = True

# # Filter outliers
# if filter != None:
#     df = df[df[predictor] <= filter]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
if binary_response:
    df[dependent] = df[dependent].astype(int)

# Define the predictor and target variables
X = df[[predictor]]
y = df[dependent]

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x=predictor, y=dependent, title='Linear Regression: Correct/Error Trials vs Trial Number')
fig.add_trace(go.Scatter(x=df[predictor], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

### Error Trials

In [242]:
# Filter the DataFrame to include only False 'Response' trials
df_errors = trial_data[trial_data['Response'] == 0]

# Calculate the statistics based on 'Direction' and 'Key_Pressed'
key_pressed_stats = df_errors.groupby(['Direction', 'Key_Pressed']).size().unstack(fill_value=0)

key_pressed_stats

Key_Pressed,DownArrow,LeftArrow,RightArrow,UpArrow
Direction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
down,0,47,65,112
left,26,0,82,20
right,21,34,0,39
up,30,39,40,0


In [243]:
# Create a pivot table for Key_Pressed and Direction
pivot_table = trial_data.pivot_table(index='Direction', columns='Key_Pressed', values='Response', aggfunc='count', fill_value=0)

# Convert the pivot table to a format suitable for Plotly
pivot_table = pivot_table.reset_index()
pivot_table_melted = pivot_table.melt(id_vars='Direction', var_name='Key_Pressed', value_name='Count')

# Create the heatmap using Plotly
fig = go.Figure(data=go.Heatmap(
    z=pivot_table_melted['Count'],
    x=pivot_table_melted['Key_Pressed'],
    y=pivot_table_melted['Direction'],
    colorscale='Viridis',
    text=pivot_table_melted['Count'],
    texttemplate="%{text}",
    textfont={"size":14}
))

# Update layout to ensure square cells and add titles
fig.update_layout(
    title='Heatmap of Key Pressed by Direction',
    xaxis_title='Key Pressed',
    yaxis_title='Direction',
    xaxis=dict(scaleanchor='y', scaleratio=1),  # Ensures square cells
    yaxis=dict(scaleanchor='x', scaleratio=1)   # Ensures square cells
)

# Show plot
fig.show()

### Eccentricity

In [249]:
# Predict correct trials by reaction time
df = pd.DataFrame(trial_data)

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Radius']]
y = df['Response']

# Transform the predictor variable into polynomial features
poly = PolynomialFeatures(degree=5)  # You can change the degree to fit higher order polynomials
X_poly = poly.fit_transform(X)

# Perform polynomial regression
model = LinearRegression()
model.fit(X_poly, y)

# Predict values
df['Predicted'] = model.predict(X_poly)

# Sort the DataFrame by 'Radius' for smooth plotting
df_sorted = df.sort_values(by='Radius')

# Visualization with Plotly
fig = px.scatter(df, x='Radius', y='Response', title='Polynomial Regression: Correct/Error Trials vs Eccentricity')
fig.add_trace(go.Scatter(x=df_sorted['Radius'], y=df_sorted['Predicted'], mode='lines', name='Polynomial Fit'))


# Show plot
fig.show()

In [255]:
# Predict correct trials by reaction time
df = pd.DataFrame(trial_data)

# Filter outlier trials
df = df[df['Reaction_Time'] <= 10]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Radius']]
y = df['Reaction_Time']

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x='Radius', y='Reaction_Time', title='Linear Regression: Reaction Time Predicted by Eccentricity')
fig.add_trace(go.Scatter(x=df['Radius'], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

### Gaze Data

In [247]:
# gaze_data = track_data[['Frame', 'CombinedGazeForward']]
# gaze_data = gaze_data[1:-2000]
# gaze_data[['x', 'y']] = gaze_data['CombinedGazeForward'].str.extract(r'\(([^,]+), ([^,]+), [^)]+\)').astype(float)
# gaze_data

# # Create the scatter plot with Plotly
# fig = px.scatter(gaze_data, x='x', y='y', color="Frame", title='2D Scatter Plot of Gaze Coordinates')

# # Update the layout to center the plot at (0, 0)
# fig.update_layout(
#     xaxis=dict(range=[-1, 1], zeroline=True, zerolinewidth=2, zerolinecolor='LightPink'),
#     yaxis=dict(range=[-1, 1], zeroline=True, zerolinewidth=2, zerolinecolor='LightPink'),
#     xaxis_title='X Coordinate',
#     yaxis_title='Y Coordinate',
#     yaxis_scaleanchor='x'
# )

# # Show the plot
# fig.show()