In [37]:
import os
import math
import numpy as np
import pandas as pd
from ast import literal_eval
import pymc3
import itertools
import arviz as az
import scipy.stats as st
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as  mpatches
pd.options.mode.chained_assignment = None  # default='warn'

from helper_functions import pre_process_input_data, pre_process_eye_data

## Writing all successfull runs of an individual participant into a single file

In [18]:
subject_ids = ["AE07EM1", "AG11ER1", "AM19ER1", "AN10HA1", "AR02AA", "AS01RH1", "EH08OI1", "EI13RE1", "EL10RM1", "EU29TT1", "FR04AN", "IT02IT1", "LA22KK1", "LJ28VA1", "LL13AE1", "MO07LN1", "OC19AS1", "OK01UE", "OL01AC1", "ON27IN1", "OR13UN1", "RE25EK", "RO25EA1", "TE26EI1", "UD06AD", "VE21IR1", "ZC12VE"]


In [22]:
root_dir = os.getcwd()

data_dir = "/experimental_data/"

target_string = "output"
target_string_eye_tracking = "eye_tracking"
done_string = "done"

successfull_runs = []

for subdir, dirs, files in os.walk(root_dir+data_dir):
    for file in files:
        if done_string in file:
            successfull_runs.append(file)

In [23]:
snippets = []

for file_name in successfull_runs:
    temp = file_name.split("_")
    code = temp[0]
    exp_trial = temp[-1]
    exp_trial = exp_trial[:2]  # cut off .csv
    
    snippets.append([code, exp_trial])

In [24]:
successfull_runs_df = pd.DataFrame(snippets, columns = ['code', 'trial'])
successfull_runs_df

Unnamed: 0,code,trial
0,OK01UE,10
1,OK01UE,25
2,OK01UE,16
3,OK01UE,27
4,OK01UE,07
...,...,...
821,UD06AD,16
822,UD06AD,13
823,UD06AD,35
824,UD06AD,45


In [32]:
fixations_complete_df = pd.DataFrame()
saccades_complete_df = pd.DataFrame()

for id_code in np.unique(successfull_runs_df.code):
    
    file_name = f'experimental_eye_data_{id_code}.csv'
    eye_data_successfull_runs = pd.DataFrame()
    
    successfull_runs = list(successfull_runs_df.loc[successfull_runs_df['code'] == id_code].trial)
    
    path = root_dir + data_dir + str(id_code) + "/eye_data"
    for subdir, dirs, files in os.walk(path):
        for data_file in files:
            if ".csv" in str(data_file):

                temp = str(data_file).split("_")
                # extract features of run from file_name coding
                level = temp[4][0]
                drift = temp[4][1]
                input_noise = temp[4][2]
                exp_trial = temp[-1]
                exp_trial = exp_trial[:2]
                # check for trial number of successfull trials
                #print(exp_trial, successfull_runs)
                if exp_trial in successfull_runs:

                    #f"experimental_data/{id_code}/eye_data/{data_file}"
                    temp_data = pre_process_eye_data(pd.read_csv((f"experimental_data/{id_code}/eye_data/{data_file}"), index_col=False))
                    temp_data["ID"] = id_code
                    temp_data["level"] = level
                    temp_data["drift"] = drift
                    temp_data["input_noise"] = input_noise

                    eye_data_successfull_runs = pd.concat([eye_data_successfull_runs, temp_data])
    
    # concat fixation data
    temp_fixations = eye_data_successfull_runs[eye_data_successfull_runs.fixationOnset == 1]
    fixations_complete_df = pd.concat([fixations_complete_df, temp_fixations])
    
    # concat saccade data
    temp_saccades = eye_data_successfull_runs[eye_data_successfull_runs.saccadeOnset == 1]
    saccades_complete_df = pd.concat([saccades_complete_df, temp_saccades])
    
    # writing subject data to file
    eye_data_successfull_runs.to_csv(file_name, sep=',', index=False)

# writing fixation and saccade data to files individually
fixations_complete_df = fixations_complete_df.reset_index()
saccades_complete_df = saccades_complete_df.reset_index()

fixations_complete_df.to_csv('experimental_eye_data_fixations.csv', sep=',')
saccades_complete_df.to_csv('experimental_eye_data_saccades.csv', sep=',')

# Metrics for situational anaylsis

We will focus on the usual suspects, that are: fixation_location_y, fixation_duration, and saccade_amplitude. But we will also assess the total number of fixations (n_fixations) and the total number of saccades (n_saccades) in a given time window.

Else we will investigate more the saccade directionality, meaning that we will assess saccadic landing sites. Specifically we will test if saccades land closer to obstacles, boarders (progressive saccades) or closer to the spaceship (regressive saccades).

# Analysing crash situations

First, we need to find for every participant (code) all runs in which a crash occured.

In [33]:
root_dir = os.getcwd()

data_dir = "/experimental_data/"

target_string = "output"
target_string_eye_tracking = "eye_tracking"
crash_string = "crash"

crash_runs = []

for subdir, dirs, files in os.walk(root_dir+data_dir):
    for file in files:
        if crash_string in file:
            crash_runs.append(file)
            
snippets = []

for file_name in crash_runs:
    temp = file_name.split("_")
    code = temp[0]
    exp_trial = temp[-1]
    exp_trial = exp_trial[:2]  # cut off .csv
    
    snippets.append([code, exp_trial])
    
crash_runs_df = pd.DataFrame(snippets, columns = ['code', 'trial'])
crash_runs_df

Unnamed: 0,code,trial
0,OK01UE,17
1,OK01UE,08
2,OK01UE,00
3,OK01UE,09
4,OK01UE,46
...,...,...
589,UD06AD,50
590,UD06AD,30
591,UD06AD,47
592,UD06AD,41


In [40]:
id_code = crash_runs_df.code.iloc[0]
run = crash_runs_df.trial.iloc[0]

root_dir = os.getcwd()
data_dir = "/experimental_data/"
eye_path = root_dir + data_dir + str(id_code) + "/eye_data"
input_path = root_dir + data_dir + str(id_code) + "/data"

for subdir, dirs, files in os.walk(eye_path):
        for data_file in files:
            if str(run) in str(data_file):
                
                temp = str(data_file).split("_")
                # extract features of run from file_name coding
                level = temp[4][0]
                drift = temp[4][1]
                input_noise = temp[4][2]
                exp_trial = temp[-1]
                exp_trial = exp_trial[:2]

                #f"experimental_data/{id_code}/eye_data/{data_file}"
                eye_data = pre_process_eye_data(pd.read_csv((f"experimental_data/{id_code}/eye_data/{data_file}"), index_col=False))
                eye_data["ID"] = id_code
                eye_data["level"] = level
                eye_data["drift"] = drift
                eye_data["input_noise"] = input_noise

# we need the points in time that are in the input_data file
input_data_file_name = f"{id_code}_output_{level}{drift}{input_noise}_crashed_{run}.csv"
input_data = pre_process_input_data(pd.read_csv((f"experimental_data/{id_code}/data/{input_data_file_name}"), index_col=False))
end_time = input_data.time_played.iloc[-2]
                
end_time


8.399715423583984

In [41]:
eye_data

Unnamed: 0,TimeTag,LeftEyeX,LeftEyeY,LeftPupilDiameter,RightEyeX,RightEyeY,RightPupilDiameter,DigitalIn,LeftBlink,RightBlink,...,Saccade,saccadeOnset,N_saccade,saccade_direction_x,saccade_direction_y,saccade_amplitude,ID,level,drift,input_noise
0,5425.2320,-56.152222,117.421021,54.269531,-51.206299,102.531860,54.347656,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
1,5425.2325,-55.812439,117.350708,54.238281,-50.960815,102.292847,54.332031,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
2,5425.2330,-55.417358,117.424805,54.210938,-50.714050,101.981201,54.324219,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
3,5425.2335,-55.218018,117.399048,54.187500,-50.769165,101.682861,54.300781,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
4,5425.2340,-55.120483,117.460449,54.171875,-50.584625,101.477783,54.292969,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20929,5435.6965,,,9999.000000,,,9999.000000,16777215.0,1.0,1.0,...,0.0,0,,,,,OK01UE,5,T,W
20930,5435.6970,,,9999.000000,,,9999.000000,16777215.0,1.0,1.0,...,0.0,0,,,,,OK01UE,5,T,W
20931,5435.6975,,,9999.000000,,,9999.000000,16777215.0,1.0,1.0,...,0.0,0,,,,,OK01UE,5,T,W
20932,5435.6980,,,9999.000000,,,9999.000000,16777215.0,1.0,1.0,...,0.0,0,,,,,OK01UE,5,T,W


In [36]:
test_data.columns

Index(['TimeTag', 'LeftEyeX', 'LeftEyeY', 'LeftPupilDiameter', 'RightEyeX',
       'RightEyeY', 'RightPupilDiameter', 'DigitalIn', 'LeftBlink',
       'RightBlink', 'DigitalOut', 'LeftEyeFixationFlag',
       'RightEyeFixationFlag', 'LeftEyeSaccadeFlag', 'RightEyeSaccadeFlag',
       'MessageCode', 'LeftEyeRawX', 'LeftEyeRawY', 'RightEyeRawX',
       'RightEyeRawY', 'time_tag', 'Fixation', 'fixationOnset', 'N_fixation',
       'fixation_duration', 'converging_eye_x', 'converging_eye_y',
       'converging_eye_x_adjusted', 'converging_eye_y_adjusted',
       'exploring_fixation', 'Saccade', 'saccadeOnset', 'N_saccade',
       'saccade_direction_x', 'saccade_direction_y', 'saccade_amplitude', 'ID',
       'level', 'drift', 'input_noise'],
      dtype='object')

We need to define a window of time before the crash. Within this time window, we will run our analyses and compare it to aggregated data of successfull trials under the same set of conditions.

In visualize_sequence, I look at 5s. This window seems adequate. Maybe we will look at the first 5s before the crash and then the next 5s before that.

In [43]:
time_window = 5
start_time = max(0, end_time - time_window)

eye_data_ = eye_data[eye_data.time_tag.between(start_time, end_time)]

eye_data_

Unnamed: 0,TimeTag,LeftEyeX,LeftEyeY,LeftPupilDiameter,RightEyeX,RightEyeY,RightPupilDiameter,DigitalIn,LeftBlink,RightBlink,...,Saccade,saccadeOnset,N_saccade,saccade_direction_x,saccade_direction_y,saccade_amplitude,ID,level,drift,input_noise
6800,5428.6320,11.044495,107.840088,47.421875,42.886108,86.467651,47.621094,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
6801,5428.6325,10.801514,108.017334,47.429688,43.104309,86.435669,47.613281,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
6802,5428.6330,10.690735,108.355713,47.425781,42.937683,86.382690,47.613281,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
6803,5428.6335,10.738525,108.397827,47.425781,42.998657,86.317749,47.628906,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
6804,5428.6340,10.707214,108.718994,47.421875,42.877563,86.416748,47.625000,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16795,5433.6295,-9.551636,215.083740,60.371094,22.267822,202.916748,60.039062,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
16796,5433.6300,-9.656677,215.285889,60.382812,22.405945,203.299072,60.042969,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
16797,5433.6305,-9.554932,215.437378,60.375000,22.375000,203.362671,60.058594,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W
16798,5433.6310,-9.173706,215.418457,60.378906,22.466370,203.644897,60.062500,16777215.0,0.0,0.0,...,0.0,0,,,,,OK01UE,5,T,W


The simple metrics first...

In [47]:
N_fixations = sum(eye_data_.fixationOnset)
N_fixations

14

In [45]:
N_saccades = sum(eye_data_.saccadeOnset)
N_saccades

8

In [50]:
fixDurs = eye_data_[eye_data_.fixationOnset == 1].fixation_duration
fixDurs

7308     0.2065
7824     0.1920
8284     0.2235
8786     0.1515
9196     0.1590
9626     0.1970
10374    0.8710
12186    0.4405
13122    0.5460
14290    0.1165
14583    0.0940
14824    0.5670
16032    0.1805
16461    0.2195
Name: fixation_duration, dtype: float64

In [52]:
fixLocs = eye_data_[eye_data_.fixationOnset == 1].converging_eye_y_adjusted
fixLocs

7308     667.547852
7824     219.326843
8284     436.501221
8786     345.240295
9196     639.894501
9626     345.057495
10374    314.195801
12186    472.007324
13122    327.468506
14290    540.748108
14583    425.086060
14824    330.014465
16032    528.230835
16461    369.575684
Name: converging_eye_y_adjusted, dtype: float64

In [53]:
saccAmps = eye_data_[eye_data_.saccadeOnset == 1].saccade_amplitude
saccAmps

7225     110.657625
7742     224.731083
8228      88.991052
9106     322.415650
9533     290.720292
12138     56.764296
14236     98.302266
15978    104.240252
Name: saccade_amplitude, dtype: float64

Now the more complicated ones...

For each saccadeOnset, I will extract the point in time the saccadeOnset happened (time_tag) and the landing site of the saccade. I will search for the nearest row in the input_data in terms of time_played and extract the positions of all the obstacles and walls on screen, as well as the spaceship. In the following, I will then simply the squared distance of saccade landing site and the closest obstacle, or wall or spaceship (squared for penelizing higher distances and favoring smaller ones). The background is, that when the saccade is programmed, we hypothesize that saccades are programmed (first row of saccade) to land near visual markers for more accurate perception -> the decision-making is based more on visual perception, than on high-level cognitive abstraction.