In [1]:
import os
import math
import numpy as np
import pandas as pd
from ast import literal_eval
from scipy.special import kl_div
import itertools
import arviz as az
import scipy.stats as st
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as  mpatches
pd.options.mode.chained_assignment = None  # default='warn'

from helper_functions import *
from metrics import get_dist_to_spaceship_fix_rest, get_dist_to_spaceship_fix_rest_multiple, get_dist_to_obstacles_fix_explore, get_dist_to_obstacles_fix_explore_multiple, get_dist_to_obstacles_sacc, get_dist_to_obstacles_sacc_multiple

Instances within levels that require left vs. right decisions. First entry is y position within level, which can be converted to time. Sensond entry is *correct* decision in terms of less risky.

In [2]:
decision_instances = {
    1: [[130, 'Right'], [340, 'Right'], [375, 'Left']],
    2: [[130, 'Right'], [350, 'Right'], [470, 'Right'], [615, 'Left']],
    3: [[140, 'Left'], [185, 'Left'], [235, 'Left']], 
    4: [[190, 'Left'], [375, 'Right'], [420, 'Right'], [600, 'Right'], [715, 'Right'], [830, 'Left']],
    5: [[150, 'Left'], [200, 'Right'], [310, 'Left'], [335, 'Left']],
    6: [[190, 'Left'], [375, 'Left'], [475, 'Right']],
}

unsure_about = {
    4: [[375, 'Right'], [715, 'Right']]
}

Only successful runs...

In [3]:
root_dir = os.getcwd()

data_dir = "/experimental_data/"

target_string = "output"
done_string = "done"

successfull_runs = []

for subdir, dirs, files in os.walk(root_dir+data_dir):
    for file in files:
        if done_string in file:
            successfull_runs.append(file)

In [4]:
snippets = []

for file_name in successfull_runs:
    temp = file_name.split("_")
    code = temp[0]
    exp_trial = temp[-1]
    exp_trial = exp_trial[:2]  # cut off .csv
    
    snippets.append([code, exp_trial])

In [5]:
successfull_runs_df = pd.DataFrame(snippets, columns = ['code', 'trial'])
successfull_runs_df

Unnamed: 0,code,trial
0,OK01UE,10
1,OK01UE,25
2,OK01UE,16
3,OK01UE,27
4,OK01UE,07
...,...,...
854,UD06AD,16
855,UD06AD,13
856,UD06AD,35
857,UD06AD,45


In [6]:
decision_instances_data = pd.DataFrame()

for id_code in np.unique(successfull_runs_df.code):
    
    successfull_runs = list(successfull_runs_df.loc[successfull_runs_df['code'] == id_code].trial)
    
    path = root_dir + data_dir + str(id_code) + "/data"
    for subdir, dirs, files in os.walk(path):
        for data_file in files:
            if ".csv" in str(data_file):

                temp = str(data_file).split("_")
                # extract features of run from file_name coding
                level = temp[2][0]
                drift = temp[2][1]
                input_noise = temp[2][2]
                exp_trial = temp[-1]
                exp_trial = exp_trial[:2]
                # check for trial number of successfull trials

                if exp_trial in successfull_runs and level != '0':

                    #f"experimental_data/{id_code}/eye_data/{data_file}"
                    temp_data = pre_process_input_data(pd.read_csv((f"experimental_data/{id_code}/data/{data_file}"), index_col=False))
                    
                    # add identifying information to temp_data
                    temp_data["ID"] = id_code
                    temp_data["level"] = level
                    temp_data["drift"] = drift
                    temp_data["input_noise"] = input_noise
                    temp_data["decision_instance_y"] = np.nan
                    temp_data["decision_instance_safe_choice"] = np.nan
                    temp_data["N_visible_obstacles"] = np.nan
                    
                    # extract data from decision_instance
                    decision_instances_features = decision_instances[int(level)]
                    
                    for features in decision_instances_features:
                        
                        t_decision_instance = features[0]*0.05+0.7  # time point of decision instance
                        
                        # row in temp_data that's closest in terms of time_played to t_decision_instance
                        input_data_subset = temp_data.iloc[(temp_data['time_played'] - t_decision_instance).abs().argsort()[:1]]
                        
                        # insert decision_instance identifier
                        input_data_subset.decision_instance_y = features[0]
                        
                        # which option is less risky in this instance
                        input_data_subset.decision_instance_safe_choice = features[1]
                        
                        # append row to overall data
                        decision_instances_data = pd.concat([decision_instances_data, input_data_subset])                    


In [7]:
decision_instances_data = decision_instances_data.reset_index()

In [8]:
decision_instances_data.visible_obstacles

0       [[891, 42], [693, 276], [1053, 420], [1017, 65...
1       [[1293, 24], [951, 96], [1239, 114], [1185, 38...
2         [[675, 54], [621, 180], [531, 234], [855, 396]]
3                               [[1015, 456], [799, 564]]
4                                            [[953, 810]]
                              ...                        
3241    [[1241, 0], [917, 144], [1097, 342], [1241, 504]]
3242                 [[822, 396], [606, 504], [948, 774]]
3243                [[939, 606], [1011, 642], [741, 696]]
3244                                         [[812, 726]]
3245                                                   []
Name: visible_obstacles, Length: 3246, dtype: object

Of the visible obstacles which one is nearest to spaceship in terms of y coordinates. Then check whether this obstacle is left or right from spaceship.

In [9]:
# find closest obstacle on y-axis:

decision_instances_data['closest_obstacle_x_coord'] = np.nan
decision_instances_data['closest_obstacle_y_coord'] = np.nan

for rowID, row in decision_instances_data.iterrows():
    min_distance = 10000 # arbitrarily high number
    for obstacle in row.visible_obstacles:
        # considering distance on vertcial (y) axis
        if abs(obstacle[1] - row.player_pos[1]) < min_distance:
            min_distance = abs(obstacle[1] - row.player_pos[1])
            decision_instances_data['closest_obstacle_x_coord'].iloc[rowID] = obstacle[0]
            decision_instances_data['closest_obstacle_y_coord'].iloc[rowID] = obstacle[1]
                

In [10]:
# find out whether participant steered around closest obstacle left vs. right
cond = (decision_instances_data.closest_obstacle_x_coord > 954)
# if obstacle.x is smaller than spaceship.x, then right, elif greater, then left
decision_instances_data['avoidance_by_steering'] = np.where(cond, 'Left', 'Right')

In [11]:
decision_instances_data['avoidance_by_steering']

0       Right
1        Left
2       Right
3        Left
4       Right
        ...  
3241     Left
3242    Right
3243    Right
3244    Right
3245    Right
Name: avoidance_by_steering, Length: 3246, dtype: object

In [12]:
cond = (decision_instances_data.avoidance_by_steering == decision_instances_data.decision_instance_safe_choice)

decision_instances_data['less_risky_option_chosen'] = np.where(cond, True, False)

In [13]:
# inserting identfier for decision instance
decision_instances_data['decision_instance_id'] = decision_instances_data.level.astype(str) + decision_instances_data.decision_instance_y.astype(str)

In [14]:
decision_instances_data['decision_instance_id']

0       3140
1       3185
2       3235
3       2130
4       2350
        ... 
3241    4830
3242    2130
3243    2350
3244    2470
3245    2615
Name: decision_instance_id, Length: 3246, dtype: object

### Inserting additional information

In [15]:
# N_visible_obstacles
decision_instances_data['N_visible_obstacles'] = decision_instances_data.visible_obstacles.apply(lambda x: len(x))

# I AM HERE

Avoidance of obstacle in direction of drift?

In [16]:
decision_instances_data['N_visible_drift_tiles'] = decision_instances_data.visible_drift_tiles.apply(lambda row: len(row))


In [17]:
decision_instances_data['N_visible_drift_tiles']

0       1
1       2
2       0
3       0
4       0
       ..
3241    2
3242    0
3243    0
3244    0
3245    0
Name: N_visible_drift_tiles, Length: 3246, dtype: int64

In [18]:
decision_instances_data.visible_drift_tiles.iloc[0]

[[522, 54]]

In [19]:
decision_instances_data.visible_drift_tiles.iloc[0][0][0]

522

In [21]:
decision_instances_data.player_pos.iloc[0][0]

954

In [33]:
decision_instances_data['drift_side'] = decision_instances_data.loc[decision_instances_data.N_visible_drift_tiles > 0, "visible_drift_tiles"].apply(lambda row: 954 - row[0][0])

In [34]:
decision_instances_data['drift_side']

0       432.0
1       300.0
2         NaN
3         NaN
4         NaN
        ...  
3241    410.0
3242      NaN
3243      NaN
3244      NaN
3245      NaN
Name: drift_side, Length: 3246, dtype: float64

if drift_side is > 0: drift to the right;
elif drift_side is < 0: drift to the left

In [49]:
decision_instances_data_copy = decision_instances_data.copy()

decision_instances_data_copy.loc[decision_instances_data_copy['drift_side'] > 0, 'drift_direction'] = 'Right'
decision_instances_data_copy.loc[decision_instances_data_copy['drift_side'] < 0, 'drift_direction'] = 'Left'


  decision_instances_data_copy.loc[decision_instances_data_copy['drift_side'] > 0, 'drift_direction'] = 'Right'


In [50]:
decision_instances_data_copy

Unnamed: 0,index,frame,trial,attempt,time_played,time_tag,level_size_y,player_pos,collision,current_input,...,N_visible_obstacles,last_walls_tile,closest_obstacle_x_coord,closest_obstacle_y_coord,avoidance_by_steering,less_risky_option_chosen,decision_instance_id,N_visible_drift_tiles,drift_direction,drift_side
0,451,451,3,1,7.692423,6451.940005,9018,"[954, 270]",False,Left,...,5,,693.0,276.0,Right,False,3140,1,Right,432.0
1,580,580,3,1,9.952013,6451.940005,9018,"[954, 270]",False,Left,...,6,,1185.0,384.0,Left,True,3185,2,Right,300.0
2,725,725,3,1,12.452023,6451.940005,9018,"[954, 270]",False,,...,4,,531.0,234.0,Right,False,3235,0,,
3,397,397,2,1,7.196248,8239.990344,18018,"[954, 270]",False,,...,2,,1015.0,456.0,Left,False,2130,0,,
4,992,992,2,1,18.200840,8239.990344,18018,"[954, 270]",False,,...,1,,953.0,810.0,Right,True,2350,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3241,2297,2297,4,3,42.189174,22850.897579,18018,"[954, 270]",False,Left,...,4,"[1348, 4746]",1097.0,342.0,Left,True,4830,2,Right,410.0
3242,407,407,2,1,7.204197,22382.011747,18018,"[954, 270]",False,,...,3,"[1200, 16086]",822.0,396.0,Right,True,2130,0,,
3243,1026,1026,2,1,18.196864,22382.011747,18018,"[954, 270]",False,,...,3,"[1299, 12372]",939.0,606.0,Right,True,2350,0,,
3244,1366,1366,2,1,24.195278,22382.011747,18018,"[954, 270]",False,,...,1,"[1280, 10332]",812.0,726.0,Right,True,2470,0,,


now is avoidance equal to drift_direction?

In [51]:
cond = (decision_instances_data_copy.avoidance_by_steering == decision_instances_data_copy.drift_direction)

decision_instances_data_copy['coherent_avoid_drift'] = np.where(cond, True, False)

In [52]:
decision_instances_data_copy['coherent_avoid_drift']

0        True
1       False
2       False
3       False
4       False
        ...  
3241    False
3242    False
3243    False
3244    False
3245    False
Name: coherent_avoid_drift, Length: 3246, dtype: bool

### Writing to .csv

In [53]:
# saving progress
decision_instances_data_copy.to_csv('input_data/decision_instances.csv', sep=',')