### Import Modules

In [115]:
import numpy as np
import pandas as pd
import re
import os
import glob
import warnings

from analysis_functions import *

import plotly.graph_objects as go
import plotly.colors as colors


### Load Data

In [2]:
# Define the pattern for the files
trial_pattern = re.compile(r'\d{4}-\d{2}-\d{2}-\d{2}-\d{2}_trials\.csv')
track_pattern = re.compile(r'\d{4}-\d{2}-\d{2}-\d{2}-\d{2}_eyetracking\.csv')


# Use glob to get all files in the folder
files = glob.glob('data/*')

# Filter files using the regular expression
trial_files = [f for f in files if trial_pattern.match(os.path.basename(f))]
track_files = [f for f in files if track_pattern.match(os.path.basename(f))]

# Function to read and concatenate CSV files with error handling
def read_and_concatenate(files, sep=','):
    dfs = []
    for ii, file in enumerate(files):
        try:
            df = pd.read_csv(file, on_bad_lines='skip', sep=sep)
            df['Session'] = ii
            dfs.append(df)
        except pd.errors.ParserError as e:
            print(f"Error parsing {file}: {e}")
    return pd.concat(dfs, ignore_index=True), ii+1  # Return dataframe and session count

# Load and concatenate the trial files
trial_data, session_count = read_and_concatenate(trial_files)

# Load and concatenate the track files
track_data, session_count = read_and_concatenate(track_files, sep=';')


In [3]:
trial_data

Unnamed: 0,Trial,Reaction_Time,Response,Coherence,Direction,Key_Pressed,Radius,Theta,X,Y,Session
0,0,1.013901,True,0.33,right,RightArrow,3.765573,55.77411,2.117973,3.113475,0
1,1,1.311054,False,0.33,right,UpArrow,0.819280,64.94653,0.346936,0.742197,0
2,2,2.244801,False,0.33,down,UpArrow,0.436710,258.71920,-0.085428,-0.428273,0
3,3,2.554199,True,0.33,left,LeftArrow,2.301572,318.49810,1.723725,-1.525125,0
4,4,2.344635,True,0.33,down,DownArrow,3.333058,231.79220,-2.061548,-2.619026,0
...,...,...,...,...,...,...,...,...,...,...,...
2985,294,0.689270,False,0.33,down,UpArrow,1.307471,69.47350,0.458452,1.224460,9
2986,295,0.578857,True,0.33,up,UpArrow,0.690750,128.06360,-0.425872,0.543846,9
2987,296,0.512390,True,0.33,right,RightArrow,3.068993,81.62679,0.446908,3.036279,9
2988,297,1.843933,False,0.33,right,UpArrow,3.932345,264.90500,-0.349221,-3.916807,9


In [4]:
track_data[:1]

Unnamed: 0,Frame,CaptureTime,LogTime,HMDPosition,HMDRotation,GazeStatus,CombinedGazeForward,CombinedGazePosition,InterPupillaryDistanceInMM,LeftEyeStatus,...,RightEyeStatus,RightEyeForward,RightEyePosition,RightPupilIrisDiameterRatio,RightPupilDiameterInMM,RightIrisDiameterInMM,FocusDistance,FocusStability,TrialNumber,Session
0,1150905,1001917444715863346,63854598109944,"(0.591, 0.918, -0.290)","(0.090, -0.589, 0.055, 0.801)",INVALID,,,,INVALID,...,INVALID,,,,,,,,,0


### Visualize Trials

In [5]:
fig = px.scatter_polar(
    trial_data,
    r = trial_data['Radius'],
    theta = trial_data['Theta'],
    color = trial_data['Response'],
    color_continuous_scale = 'Viridis',
    opacity = .25
)

fig.show()

  sf: grouped.get_group(s if len(s) > 1 else s[0])


In [6]:
# Check percentage of correct trials
true_percentage = (trial_data['Response'].mean()) * 100
true_percentage

70.36789297658864

In [7]:
# Create a histogram of 'Radius' values
fig = px.histogram(trial_data, x='Radius', nbins=100, title='Histogram of Radius Values')

# Show the plot
fig.show()

In [25]:
# Create a histogram of 'Radius' values
fig = px.histogram(trial_data, x='Theta', nbins=100, title='Histogram of Polar Angle Values')

# Show the plot
fig.show()

In [8]:
# Predict correct trials by reaction time
df = pd.DataFrame(trial_data)

# Filter out trials that took longer than 10 seconds
df = df[df['Reaction_Time'] <= 10]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Reaction_Time']]
y = df['Response']

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x='Reaction_Time', y='Response', title='Linear Regression: Correct/Error Trials vs Reaction Time (Up to 10 Seconds)')
fig.add_trace(go.Scatter(x=df['Reaction_Time'], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

In [9]:
# Predict correct trials by reaction time
df = pd.DataFrame(trial_data)

# Filter out trials that took longer than 5 seconds
df = df[df['Reaction_Time'] <= 1]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Reaction_Time']]
y = df['Response']

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x='Reaction_Time', y='Response', title='Linear Regression: Correct/Error Trials vs Reaction Time (Up to 1 Seconds)')
fig.add_trace(go.Scatter(x=df['Reaction_Time'], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

### Performance Over Trials

In [10]:
# Predict correct trials by reaction time
df = trial_data
predictor = 'Trial'
dependent = 'Response'
binary_response = True

# # Filter outliers
# if filter != None:
#     df = df[df[predictor] <= filter]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
if binary_response:
    df[dependent] = df[dependent].astype(int)

# Define the predictor and target variables
X = df[[predictor]]
y = df[dependent]

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x=predictor, y=dependent, title='Linear Regression: Correct/Error Trials vs Trial Number')
fig.add_trace(go.Scatter(x=df[predictor], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

### Pupil Diameter (Move this to gaze notebook.)

In [11]:
# For eyetracking data, remove rows where column 'TrialNumber' has NaN
track_data = track_data.dropna(subset=['TrialNumber'])

# Convert column 'TrialNumber' from floats to integers
track_data['TrialNumber'] = track_data['TrialNumber'].astype(int)

# If removing a row based on a particular observation is needed, use this format:
track_data = track_data.loc[(track_data['LeftEyeStatus'] != 'INVALID') & (track_data['RightEyeStatus'] != 'INVALID')]

In [12]:
track_data[:1]

Unnamed: 0,Frame,CaptureTime,LogTime,HMDPosition,HMDRotation,GazeStatus,CombinedGazeForward,CombinedGazePosition,InterPupillaryDistanceInMM,LeftEyeStatus,...,RightEyeStatus,RightEyeForward,RightEyePosition,RightPupilIrisDiameterRatio,RightPupilDiameterInMM,RightIrisDiameterInMM,FocusDistance,FocusStability,TrialNumber,Session
5,1150910,1001917444740867800,63854598109944,"(0.591, 0.918, -0.290)","(0.090, -0.589, 0.055, 0.801)",VALID,"(-0.004, -0.049, 0.999)","(0.000, 0.000, 0.000)",62.175,VALID,...,VALID,"(-0.041, -0.046, 0.998)","(0.031, 0.000, 0.000)",0.273,3.601,13.209,2.0,0.0,0,0


In [13]:
# Calculate the mean of 'value' for each 'trial_number' in the larger DataFrame
mean_pupil_diameter = track_data.groupby(['TrialNumber', 'Session'])[['LeftPupilDiameterInMM', 'RightPupilDiameterInMM']].mean().reset_index()
mean_pupil_diameter['Pupil_Diameter'] = mean_pupil_diameter[['LeftPupilDiameterInMM', 'RightPupilDiameterInMM']].mean(axis=1)
mean_pupil_diameter[:1]

Unnamed: 0,TrialNumber,Session,LeftPupilDiameterInMM,RightPupilDiameterInMM,Pupil_Diameter
0,0,0,4.444397,4.282814,4.363605


In [14]:
# Add mean pupil diameter to trial data 
trial_data['Pupil_Diameter'] = mean_pupil_diameter['Pupil_Diameter'][:-session_count]

In [15]:
trial_data

Unnamed: 0,Trial,Reaction_Time,Response,Coherence,Direction,Key_Pressed,Radius,Theta,X,Y,Session,Predicted,Pupil_Diameter
0,0,1.013901,1,0.33,right,RightArrow,3.765573,55.77411,2.117973,3.113475,0,0.714983,4.363605
1,1,1.311054,0,0.33,right,UpArrow,0.819280,64.94653,0.346936,0.742197,0,0.714907,4.311253
2,2,2.244801,0,0.33,down,UpArrow,0.436710,258.71920,-0.085428,-0.428273,0,0.714832,4.585174
3,3,2.554199,1,0.33,left,LeftArrow,2.301572,318.49810,1.723725,-1.525125,0,0.714756,4.389640
4,4,2.344635,1,0.33,down,DownArrow,3.333058,231.79220,-2.061548,-2.619026,0,0.714680,4.440438
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2985,294,0.689270,0,0.33,down,UpArrow,1.307471,69.47350,0.458452,1.224460,9,0.692678,3.871700
2986,295,0.578857,1,0.33,up,UpArrow,0.690750,128.06360,-0.425872,0.543846,9,0.692602,3.586814
2987,296,0.512390,1,0.33,right,RightArrow,3.068993,81.62679,0.446908,3.036279,9,0.692526,3.484209
2988,297,1.843933,0,0.33,right,UpArrow,3.932345,264.90500,-0.349221,-3.916807,9,0.692450,3.366692


In [16]:
# Predict correct trials by reaction time
df = trial_data

# Filter outlier trials
# df = df[df['Pupil_Diameter'] >= 4]
# df = df[df['Pupil_Diameter'] <= 5]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Pupil_Diameter']]
y = df['Response']

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x='Pupil_Diameter', y='Response', title='Linear Regression: Correct/Error Trials vs Pupil Diameter')
fig.add_trace(go.Scatter(x=df['Pupil_Diameter'], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

### Error Trials

In [17]:
# Filter the DataFrame to include only False 'Response' trials
df_errors = trial_data[trial_data['Response'] == 0]

# Calculate the statistics based on 'Direction' and 'Key_Pressed'
key_pressed_stats = df_errors.groupby(['Direction', 'Key_Pressed']).size().unstack(fill_value=0)

key_pressed_stats

Key_Pressed,DownArrow,LeftArrow,RightArrow,UpArrow
Direction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
down,0,77,98,176
left,39,0,119,31
right,34,71,0,63
up,54,61,63,0


In [18]:
# Create a pivot table for Key_Pressed and Direction
pivot_table = trial_data.pivot_table(index='Direction', columns='Key_Pressed', values='Response', aggfunc='count', fill_value=0)

# Convert the pivot table to a format suitable for Plotly
pivot_table = pivot_table.reset_index()
pivot_table_melted = pivot_table.melt(id_vars='Direction', var_name='Key_Pressed', value_name='Count')

# Create the heatmap using Plotly
fig = go.Figure(data=go.Heatmap(
    z=pivot_table_melted['Count'],
    x=pivot_table_melted['Key_Pressed'],
    y=pivot_table_melted['Direction'],
    colorscale='Viridis',
    text=pivot_table_melted['Count'],
    texttemplate="%{text}",
    textfont={"size":14}
))

# Update layout to ensure square cells and add titles
fig.update_layout(
    title='Heatmap of Key Pressed by Direction',
    xaxis_title='Key Pressed',
    yaxis_title='Direction',
    xaxis=dict(scaleanchor='y', scaleratio=1),  # Ensures square cells
    yaxis=dict(scaleanchor='x', scaleratio=1)   # Ensures square cells
)

# Show plot
fig.show()

### Eccentricity

In [29]:
# Predict correct trials by reaction time
df = pd.DataFrame(trial_data)

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Radius']]
y = df['Response']

# Transform the predictor variable into polynomial features
poly = PolynomialFeatures(degree=5)  # You can change the degree to fit higher order polynomials
X_poly = poly.fit_transform(X)

# Perform polynomial regression
model = LinearRegression()
model.fit(X_poly, y)

# Predict values
df['Predicted'] = model.predict(X_poly)

# Sort the DataFrame by 'Radius' for smooth plotting
df_sorted = df.sort_values(by='Radius')

# Visualization with Plotly
fig = px.scatter(df, x='Radius', y='Response', title='Polynomial Regression: Correct/Error Trials vs Eccentricity')
fig.add_trace(go.Scatter(x=df_sorted['Radius'], y=df_sorted['Predicted'], mode='lines', name='Polynomial Fit'))


# Show plot
fig.show()

In [116]:
# Predict correct trials by reaction time
df = trial_data
predictor = 'Radius'
dependent = 'Response'
binary_response = True

# # Filter outliers
# if filter != None:
#     df = df[df[predictor] <= filter]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
if binary_response:
    df[dependent] = df[dependent].astype(int)

# Define the predictor and target variables
X = df[[predictor]]
y = df[dependent]

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x=predictor, y=dependent, title='Linear Regression: Correct/Error Trials vs Eccentricity')
fig.add_trace(go.Scatter(x=df[predictor], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

In [117]:
# Predict correct trials by reaction time
df = pd.DataFrame(trial_data)

# Filter outlier trials
df = df[df['Reaction_Time'] <= 10]

# Convert 'Response' column to numeric (True -> 1, False -> 0)
df['Response'] = df['Response'].astype(int)

# Define the predictor and target variables
X = df[['Radius']]
y = df['Reaction_Time']

# Perform linear regression
model = LinearRegression()
model.fit(X, y)

# Predict values
df['Predicted'] = model.predict(X)

# Visualization with Plotly
fig = px.scatter(df, x='Radius', y='Reaction_Time', title='Linear Regression: Reaction Time Predicted by Eccentricity')
fig.add_trace(go.Scatter(x=df['Radius'], y=df['Predicted'], mode='lines', name='Regression Line'))

# Show plot
fig.show()

### Hemifields

In [118]:
fig = px.scatter_polar(
    trial_data,
    r = trial_data['Radius'],
    theta = trial_data['Theta'],
    color = trial_data['Response'],
    color_continuous_scale = 'picnic',
    opacity = .125
)

fig.show()

In [119]:
trial_data['Theta'] = trial_data['Theta'] 

In [120]:
trial_data_upper = trial_data[(trial_data['Theta'] > 270) | (trial_data['Theta'] < 90)]
trial_data_lower = trial_data[(trial_data['Theta'] > 90) | (trial_data['Theta'] < 270)]

In [121]:
# Check percentage of correct trials
trial_data_1 = trial_data[(trial_data['Theta'] >= 0) & (trial_data['Theta'] <= 90)]
trial_data_2 = trial_data[(trial_data['Theta'] >= 90) & (trial_data['Theta'] <= 180)]
trial_data_3 = trial_data[(trial_data['Theta'] >= 180) & (trial_data['Theta'] <= 270)]
trial_data_4 = trial_data[(trial_data['Theta'] >= 270) & (trial_data['Theta'] <= 360)]

true_percentage_1 = (trial_data_1['Response'].mean()) * 100
true_percentage_2 = (trial_data_2['Response'].mean()) * 100
true_percentage_3 = (trial_data_3['Response'].mean()) * 100
true_percentage_4 = (trial_data_4['Response'].mean()) * 100

print(true_percentage_1)
print(true_percentage_2)
print(true_percentage_3)
print(true_percentage_4)

71.13543091655266
71.18193891102258
68.93333333333334
70.23809523809523


In [33]:
fig = px.scatter_polar(
    trial_data_1,
    r = trial_data_1['Radius'],
    theta = trial_data_1['Theta'],
    color = trial_data_1['Response'],
    color_continuous_scale = 'picnic',
    opacity = .125
)

fig.show()

### Wedges

In [34]:
trial_data.head()

Unnamed: 0,Trial,Reaction_Time,Response,Coherence,Direction,Key_Pressed,Radius,Theta,X,Y,Session,Predicted,Pupil_Diameter
0,0,1.013901,1,0.33,right,RightArrow,3.765573,55.77411,2.117973,3.113475,0,0.689845,4.363605
1,1,1.311054,0,0.33,right,UpArrow,0.81928,64.94653,0.346936,0.742197,0,0.712327,4.311253
2,2,2.244801,0,0.33,down,UpArrow,0.43671,258.7192,-0.085428,-0.428273,0,0.715246,4.585174
3,3,2.554199,1,0.33,left,LeftArrow,2.301572,318.4981,1.723725,-1.525125,0,0.701016,4.38964
4,4,2.344635,1,0.33,down,DownArrow,3.333058,231.7922,-2.061548,-2.619026,0,0.693145,4.440438


In [122]:
def wedges(df:pd.DataFrame, count=12):
    assert 360 % count == 0, "Count must be integer factor of 360"

    increment = 360 // count
    region_beg = 0
    region_end = increment

    wedge_collection = []

    for wedge in range(count):
        wedge_df = df[(df['Theta'] >= region_beg) & (df['Theta'] <= region_end)]
        region_beg += increment
        region_end += increment
        wedge_collection.append(wedge_df)

    return wedge_collection


In [79]:
count = 36
wedge_dfs = wedges(trial_data, count=36) 

In [80]:
correct_by_wedge = []

for wedge in wedge_dfs:
    print(wedge['Response'].mean() * 100)
    correct_by_wedge.append(wedge['Response'].mean() * 100)


67.3076923076923
75.67567567567568
70.0
75.0
76.82926829268293
74.64788732394366
75.60975609756098
62.637362637362635
63.76811594202898
60.71428571428571
64.70588235294117
73.86363636363636
72.11538461538461
76.71232876712328
70.88607594936708
71.5909090909091
78.2051282051282
72.97297297297297
68.88888888888889
71.875
71.875
75.34246575342466
57.14285714285714
63.74999999999999
76.74418604651163
66.66666666666666
66.29213483146067
64.1025641025641
62.06896551724138
67.16417910447761
70.27027027027027
75.82417582417582
78.3132530120482
74.68354430379746
62.22222222222222
75.70093457943925


In [171]:
# Suppress FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

def polar_accuracy(dataframe, wedge_bin_count=12, radius_bin_count=5, var_of_interest='Response', min=0, max=1, start=0):
    # Read the CSV data
    df = dataframe

    voi = var_of_interest

    wedge_theta = 360 // wedge_bin_count
    radius_step = 5 / radius_bin_count

    # Define bin edges for theta and radius
    theta_bins = np.arange(start, 361+start, wedge_theta)
    radius_bins = np.arange(0, 5, radius_step)

    # Create bins
    df['theta_bin'] = pd.cut(df['Theta'], bins=theta_bins, labels=theta_bins[:-1], include_lowest=True)
    df['radius_bin'] = pd.cut(df['Radius'], bins=radius_bins, labels=radius_bins[:-1], include_lowest=True)

    # Calculate average accuracy for each bin
    binned_data = df.groupby(['theta_bin', 'radius_bin'])[voi].mean().reset_index()

    # Create a color scale
    colorscale = colors.sequential.Viridis

    # Create the polar plot
    fig = go.Figure()

    for i, r in enumerate(radius_bins[:-1]):
        r_data = binned_data[binned_data['radius_bin'] == r]
        
        # Ensure all theta bins are present
        all_theta = pd.DataFrame({'theta_bin': theta_bins[:-1]})
        r_data = all_theta.merge(r_data, on='theta_bin', how='left').fillna(0)
        
        fig.add_trace(go.Barpolar(
            r=[1] * len(r_data),
            theta=r_data['theta_bin'],
            marker_color=r_data[voi],
            marker_colorscale=colorscale,
            marker_showscale=i == 0,  # Only show color scale for the first trace
            marker_cmin=min,  # Set the minimum of the color scale to 0
            marker_cmax=max,  # Set the maximum of the color scale to 1
            width=wedge_theta,
            base=r,
            showlegend=False,  # Hide the radius traces from the legend
            hovertemplate='Mean Response: %{marker.color:.2f}<extra></extra>',
            customdata=np.column_stack((np.full(len(r_data), r), np.full(len(r_data), r+1)))
        ))

    fig.update_layout(
        title='Binned Accuracy Across Visual Field',
        polar=dict(
            radialaxis=dict(range=[0, 4], ticksuffix="", tickmode="array", tickvals=[0, 1, 2, 3, 4]),
            angularaxis=dict(direction="clockwise")
        ),
        coloraxis_colorbar=dict(
            title="Accuracy",
            ticksuffix="%",
            tickmode="array",
            tickvals=[0, 0.25, 0.5, 0.75, 1],
            ticktext=["0%", "25%", "50%", "75%", "100%"]
        )
    )

    fig.show()

In [179]:
polar_accuracy(trial_data, wedge_bin_count=12, radius_bin_count=5)

In [175]:
polar_accuracy(trial_data, wedge_bin_count=2, radius_bin_count=5, start=0)

In [176]:
polar_accuracy(trial_data, wedge_bin_count=36, var_of_interest='Reaction_Time', min=0, max=10)