In [1]:
import parse_data.prepare_data as prepare_data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import globals
import data_strings
import data_extraction.get_indices as get_indices
import analysis.wall_visibility_and_choice as wall_visibility_and_choice

### Create a dataframe to feed into a GLM using Choice, Outcome, Wall Separation, and PlayerID to predict P(Choose High)

In [2]:
data_folder = data_strings.DATA_FOLDER
json_filenames_all_social = data_strings.JSON_FILENAMES_SOCIAL
json_filenames_all_solo = data_strings.JSON_FILENAMES_SOLO

In [3]:
json_filenames = json_filenames_all_social

In [4]:
df, trial_lists = prepare_data.prepare_data(data_folder, json_filenames, combine=False)

filepath: D:\Users\Tom\OneDrive\PhD\SWC\data\first_experiments_2409\240913\2024-09-13_11-31-00_YansuJerrySocial.json
Data is from period before 2024-09-13 00:00:00
Running dataframe through playerinfo_playerposition_conversion.
Loading complete.
Preprocessing complete.
filepath: D:\Users\Tom\OneDrive\PhD\SWC\data\second_experiments_2409\240927\2024-09-27_14-25-20_SaraEmilySocial.json
Loading complete.
Preprocessing complete.
filepath: D:\Users\Tom\OneDrive\PhD\SWC\data\third_experiments_2410\241017\2024-10-17_14-28-40_ShamirAbigailSocial.json
Loading complete.
Preprocessing complete.
filepath: D:\Users\Tom\OneDrive\PhD\SWC\data\fourth_experiments_2410\241017\2024-10-17_16-41-38_ZimoElsaSocial.json
Loading complete.
Preprocessing complete.
filepath: D:\Users\Tom\OneDrive\PhD\SWC\data\fifth_experiments_241112\2024-11-12_13-31-14_KhadijaWendySocial.json
Loading complete.
Preprocessing complete.
filepath: D:\Users\Tom\OneDrive\PhD\SWC\data\sixth_experiments_241112\2024-11-12_15-23-24_Franc

### Regressor value extraction functions

In [75]:
def extract_wall_sep(trial_list):
    
    wall_sep = np.full(len(trial_list), np.nan)
    for i, trial in enumerate(trial_list):
        wall_sep_this_trial = get_indices.get_wall_difference(trial=trial)
        wall_sep[i] = wall_sep_this_trial

    return wall_sep

def extract_first_wall_seen(trial_list, player_id):
        
    high_wall_first_visible_session = wall_visibility_and_choice.get_given_wall_first_visible_session(trial_list,
                                                                                                        player_id,
                                                                                                        wall_index=0,
                                                                                                        current_fov=110)

    low_wall_first_visible_session = wall_visibility_and_choice.get_given_wall_first_visible_session(trial_list,
                                                                                                        player_id, 
                                                                                                        wall_index=1,
                                                                                                        current_fov=110)
    low_wall_first_visible_session = low_wall_first_visible_session*2
    
    first_visible_session = high_wall_first_visible_session + low_wall_first_visible_session

    return first_visible_session


def extract_player_choice(trial_list, player_id, inferred_choice):
    
    # array of wall numbers where player won, np.nan where player did not
    player_choice = wall_visibility_and_choice.get_player_wall_choice(trial_list, player_id,
                                                                        inferred_choice=inferred_choice, debug=False)

    # 2 where player chose High, 0 where player chose Low, np.nan where player lost
    high_wall_chosen_session = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                    given_wall_index=0)
    high_wall_chosen_session = high_wall_chosen_session*2

    # 1 where player chose Low, 0 where player chose High, np.nan where player lost
    low_wall_chosen_session  = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                    given_wall_index=1)

    # 1 or 2 where player chose Low or High respectively, np.nan where player lost
    chosen_wall_session = high_wall_chosen_session + low_wall_chosen_session

    return chosen_wall_session


def extract_trial_outcome(trial_list, player_id):
    
    trigger_activators = get_indices.get_trigger_activators(trial_list)
    this_player_won = (trigger_activators-1)*-1 if player_id == 0 else trigger_activators

    return this_player_won

### Extract 1D arrays for each player for the regressor values

#### Wall Separation

In [5]:
trial_list = trial_lists[15]

wall_sep = np.full(len(trial_list), np.nan)
for i, trial in enumerate(trial_list):
    wall_sep_this_trial = get_indices.get_wall_difference(trial=trial)
    wall_sep[i] = wall_sep_this_trial


In [6]:
np.count_nonzero(wall_sep == 4)/np.count_nonzero(wall_sep)

0.18518518518518517

#### First Seen

In [7]:
player_id = 0

In [8]:
    
high_wall_first_visible_session = wall_visibility_and_choice.get_given_wall_first_visible_session(trial_list,
                                                                                                    player_id,
                                                                                                    wall_index=0,
                                                                                                    current_fov=110)

low_wall_first_visible_session = wall_visibility_and_choice.get_given_wall_first_visible_session(trial_list,
                                                                                                     player_id, 
                                                                                                     wall_index=1,
                                                                                                     current_fov=110)
low_wall_first_visible_session = low_wall_first_visible_session*2



first_visible_session = high_wall_first_visible_session + low_wall_first_visible_session

wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[False]
 [ True]]
wall_becomes_visible_index for trial 1 is [nan  0.]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[ True]
 [ True]]
wall ini

In [9]:
first_visible_session

array([0., 2., 2., 1., 1., 1., 2., 1., 1., 1., 0., 0., 2., 0., 1., 1., 2.,
       2., 1., 0., 1., 2., 2., 1., 2., 0., 2., 2., 0., 2., 0., 1., 1., 1.,
       0., 1., 1., 1., 2., 1., 0., 1., 0., 2., 2., 2., 1., 2., 1., 1., 2.,
       1., 2., 1., 2., 1., 2., 1., 2., 0., 2., 1., 2., 2., 1., 1., 0., 0.,
       0., 1., 0., 2., 2., 2., 1., 2., 2., 2., 2., 0., 1., 0., 2., 1., 0.,
       2., 0., 2., 0., 1., 1., 2., 2., 1., 1., 2., 0., 1., 1., 0., 1., 1.,
       1., 1., 2., 1., 2., 1., 0., 2., 1., 2., 1., 2., 2., 1., 2., 0., 1.,
       0., 2., 1., 1., 1., 0., 1., 0., 0., 1., 2., 0., 2., 2., 2., 1.])

#### Choice

In [10]:
# array of wall numbers where player won, np.nan where player did not
player_choice = wall_visibility_and_choice.get_player_wall_choice(trial_list, player_id,
                                                                    inferred_choice=False, debug=False)

# 2 where player chose High, 0 where player chose Low, np.nan where player lost
high_wall_chosen_session = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                given_wall_index=0)
high_wall_chosen_session = high_wall_chosen_session*2

# 1 where player chose Low, 0 where player chose High, np.nan where player lost
low_wall_chosen_session  = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                given_wall_index=1)

# 1 or 2 where player chose Low or High respectively, np.nan where player lost
chosen_wall_session_wins_only = high_wall_chosen_session + low_wall_chosen_session

In [11]:
chosen_wall_session_wins_only

array([nan,  1.,  1.,  2.,  2., nan, nan,  2., nan,  2.,  2., nan,  1.,
       nan, nan, nan,  1., nan, nan, nan,  2.,  2.,  2., nan,  1., nan,
       nan, nan, nan, nan,  1.,  2.,  2.,  2.,  2., nan, nan, nan, nan,
        2.,  2.,  2.,  2.,  1., nan, nan,  2., nan,  2.,  2.,  2., nan,
        2.,  2.,  1.,  2.,  1.,  2., nan, nan, nan, nan,  1.,  1.,  2.,
       nan, nan,  2., nan,  2.,  2.,  2.,  1., nan, nan, nan,  1., nan,
        1.,  2., nan, nan, nan,  2., nan,  1., nan, nan,  2., nan, nan,
        1.,  2.,  2., nan,  1.,  2., nan, nan,  2.,  2.,  2.,  2., nan,
       nan,  2., nan, nan,  2.,  1.,  2.,  2., nan,  2., nan, nan, nan,
        2.,  2., nan,  1.,  2., nan,  2.,  2., nan, nan,  2., nan, nan,
       nan,  1., nan, nan, nan])

#### Choice (with inferred)

In [12]:
# array of wall numbers where player choice is confident, np.nan where player lost and choice is unconfident
player_choice = wall_visibility_and_choice.get_player_wall_choice(trial_list, player_id,
                                                                    inferred_choice=True, debug=False)

# 2 where player chose High, 0 where player chose Low, np.nan where player lost
high_wall_chosen_session = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                given_wall_index=0)
high_wall_chosen_session = high_wall_chosen_session*2

# 1 where player chose Low, 0 where player chose High, np.nan where player lost
low_wall_chosen_session  = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                given_wall_index=1)

# 1 or 2 where player chose Low or High respectively, np.nan where player lost
chosen_wall_session_wins_and_losses = high_wall_chosen_session + low_wall_chosen_session

  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


In [13]:
chosen_wall_session_wins_and_losses

array([nan,  1.,  1.,  2.,  2.,  2.,  1.,  2.,  0.,  2.,  2., nan,  1.,
       nan, nan,  2.,  1.,  1.,  2.,  2.,  2.,  2.,  2., nan,  1.,  0.,
        1., nan, nan, nan,  1.,  2.,  2.,  2.,  2.,  2.,  1.,  2.,  2.,
        2.,  2.,  2.,  2.,  1.,  2.,  0.,  2.,  2.,  2.,  2.,  2.,  2.,
        2.,  2.,  1.,  2.,  1.,  2., nan,  2., nan,  0.,  1.,  1.,  2.,
       nan,  1.,  2.,  1.,  2.,  2.,  2.,  1.,  2., nan,  2.,  1.,  2.,
        1.,  2., nan,  1.,  0.,  2.,  0.,  1., nan,  1.,  2.,  2.,  0.,
        1.,  2.,  2., nan,  1.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,
        1.,  2.,  2., nan,  2.,  1.,  2.,  2., nan,  2.,  0., nan, nan,
        2.,  2., nan,  1.,  2.,  0.,  2.,  2.,  2., nan,  2., nan, nan,
       nan,  1.,  0., nan,  2.])

#### Outcome

In [14]:
trigger_activators = get_indices.get_trigger_activators(trial_list)
this_player_won = (trigger_activators-1)*-1 if player_id == 0 else trigger_activators

In [15]:
this_player_won

array([-0.,  1.,  1.,  1.,  1., -0., -0.,  1., -0.,  1.,  1., -0.,  1.,
       -0., -0., -0.,  1., -0., -0., -0.,  1.,  1.,  1., -0.,  1., -0.,
       -0., -0., -0., -0.,  1.,  1.,  1.,  1.,  1., -0., -0., -0., -0.,
        1.,  1.,  1.,  1.,  1., -0., -0.,  1., -0.,  1.,  1.,  1., -0.,
        1.,  1.,  1.,  1.,  1.,  1., -0., -0., -0., -0.,  1.,  1.,  1.,
       -0., -0.,  1., -0.,  1.,  1.,  1.,  1., -0., -0., -0.,  1., -0.,
        1.,  1., -0., -0., -0.,  1., -0.,  1., -0., -0.,  1., -0., -0.,
        1.,  1.,  1., -0.,  1.,  1., -0., -0.,  1.,  1.,  1.,  1., -0.,
       -0.,  1., -0., -0.,  1.,  1.,  1.,  1., -0.,  1., -0., -0., -0.,
        1.,  1., -0.,  1.,  1., -0.,  1.,  1., -0., -0.,  1., -0., -0.,
       -0.,  1., -0., -0., -0.])

### Filter trials to only include those with full information for the GLM 
- Remove trials without recorded choice (np.nan in choice array) (whether I'm using inferred-choice or not)
- Remove trials without a first visible wall (np.nan in first seen array)
- Filter HighLow trials initially

The best way to do the above may be to keep an array of 'original indices', filter this array in the same way as I do my normal trial list filtering, and then I have an indices array with preserved numbering that I can use to index valid trials to add to my dataframe

In [66]:
# identify indices of trial list with HighLow trials
high_low_trial_indices = get_indices.get_trials_trialtype(trial_list, trial_type=globals.HIGH_LOW)

In [67]:
# apply masks for one visible wall, and a retrievable choice, to the set of HighLow trials
retrievable_choice_mask = ~np.isnan(chosen_wall_session_wins_and_losses[high_low_trial_indices])
one_wall_first_visible_mask = first_visible_session[high_low_trial_indices] > 0

# combine masks into one 
final_mask = retrievable_choice_mask & one_wall_first_visible_mask

In [68]:
filtered_indices = high_low_trial_indices[final_mask]

In [69]:
filtered_indices

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  12,  15,  17,  18,
        20,  21,  22,  24,  26,  31,  32,  33,  36,  37,  38,  39,  41,
        43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  56,  57,
        61,  64,  71,  72,  73,  77,  78,  82,  83,  85,  89,  90,  92,
        93,  95,  97,  98, 100, 101, 102, 103, 104, 105, 106, 109, 110,
       111, 113, 114, 121, 122, 123, 125, 131, 132, 134])

#### Filtering function

In [76]:
def filter_valid_trial_indices(trial_list, first_visible_session, chosen_wall_session):

    # identify indices of trial list with HighLow trials
    high_low_trial_indices = get_indices.get_trials_trialtype(trial_list, trial_type=globals.HIGH_LOW)

    # apply masks for one visible wall, and a retrievable choice, to the set of HighLow trials
    retrievable_choice_mask = ~np.isnan(chosen_wall_session[high_low_trial_indices])
    one_wall_first_visible_mask = first_visible_session[high_low_trial_indices] > 0

    # combine masks into one 
    final_mask = retrievable_choice_mask & one_wall_first_visible_mask

    filtered_indices = np.array(high_low_trial_indices)[final_mask]

    return filtered_indices

### Create a dictionary to hold, for each session and player, regressor values for the session, only including trials with fully-populated regessors
- Fields for each of the regressors applied to all trials
- Fields for each of the regressors with only valid trials


In [77]:
solo = False
player_ids = [0] if solo else [0,1]


In [78]:
analysis_results = {
    session_id: {
        player_id: {

            # unfiltered regressors
            'regressors': {
                'wall_sep': None,
                'first_seen': None,
                'choice': None,
                'outcome': None
            },

            # regressors filtered for trials fully-populated regressor values
            'regressors_filtered': {
                'wall_sep': None,
                'first_seen': None,
                'choice': None,
                'outcome': None
            }
        }
        for player_id in player_ids
    }
    for session_id in np.arange(len(trial_lists))
}

### Populate the dictionary with data

In [None]:
inferred_choice = True

In [85]:
analysis_results[5][1]['regressors']['wall_sep']

array([1., 2., 4., 2., 2., 2., 4., 2., 2., 2., 2., 1., 4., 1., 1., 1., 1.,
       2., 2., 1., 1., 2., 1., 2., 2., 2., 2., 1., 1., 2., 2., 1., 2., 1.,
       2., 2., 2., 4., 4., 2., 2., 2., 1., 1., 1., 4., 4., 1., 1., 2., 2.,
       1., 2., 4., 2., 2., 2., 1., 1., 2., 1., 2., 1., 2., 2., 1., 2., 2.,
       1., 4., 1., 2., 4., 1., 4., 2., 1., 2., 4., 2., 2., 1., 2., 2., 2.,
       4., 2., 1., 2., 1., 1., 1., 2., 4., 2., 2., 1., 2., 2., 2., 2., 2.,
       2., 1., 4., 2., 2., 2., 2., 4., 1., 2., 2., 4., 4., 2., 2., 1., 2.,
       2., 4., 2., 2., 2., 1., 4., 1., 2., 4., 4., 1., 4., 4., 2., 4.])

In [None]:


for session_id, players in analysis_results.items():
    for player_id, data in players.items():
        
        trial_list = trial_lists[session_id]
        
        # get regressors for all trials in session
        player_data = analysis_results[session_id][player_id]['regressors']
        player_data['wall_sep'] = extract_wall_sep(trial_list)
        player_data['first_seen'] = extract_first_wall_seen(trial_lists[ses], player_id)
        player_data['choice'] = extract_player_choice(trial_list, player_id, inferred_choice)
        player_data['outcome'] = extract_trial_outcome(trial_list, player_id)

        # filtered the trials to only include those with fully-populated regressors, and store for only these indices
        filtered_indices = filter_valid_trial_indices(trial_list, player_data['first_seen'], player_data['choice'])

        if filtered_indices.size > 0:
            player_data_valid_trials = analysis_results[session_id][player_id]['regressors_filtered']
            player_data_valid_trials['wall_sep'] = player_data['wall_sep'][filtered_indices]
            player_data_valid_trials['first_seen'] = player_data['first_seen'][filtered_indices]
            player_data_valid_trials['choice'] = player_data['choice'][filtered_indices]
            player_data_valid_trials['outcome'] = player_data['outcome'][filtered_indices]
        else:
            player_data_valid_trials['wall_sep'] = np.array([])
            player_data_valid_trials['first_seen'] = np.array([])
            player_data_valid_trials['choice'] = np.array([])
            player_data_valid_trials['outcome'] = np.array([])




wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[False]
 [ True]]
wall_becomes_visible_index for trial 1 is [nan  0.]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[ True]
 [ True]]
wall ini

  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[ True]
 [ True]]
wall_becomes_visible_index for trial 1 is [0. 0.]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[ True]
 [False]]
wall initial visibility is: [[False]
 [False]]
wall initial visibility is: [[ True]
 [ True]]
wall initial visibility is: [[False]
 [ True]]
wall initial visibility is: [[ True]
 [ True]]
wall initi

#### Populate a dataframe, with a row for each trial, and fields for regressors (only including trials with fully-populated regressors)