In [12]:
%load_ext autoreload
%autoreload 2

import parse_data.prepare_data as prepare_data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import globals
import data_strings
import data_extraction.get_indices as get_indices
import analysis.wall_visibility_and_choice as wall_visibility_and_choice
from trajectory_analysis import trajectory_vectors
from plotting import plot_octagon
import parse_data.identify_filepaths as identify_filepaths 
from data_extraction.trial_list_filters import filter_trials_other_visible
from analysis import opponent_visibility


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Create a dataframe to feed into a GLM using D2H, D2L, First Seen, Wall Separation, and PlayerID (random effect) to predict P(Choose High)

In [13]:
data_folder = data_strings.DATA_FOLDER
json_filenames_social, json_filenames_solo = identify_filepaths.get_filenames()

In [14]:
json_filenames_social

['240913_1\\2024-09-13_11-31-00_YW13_JL13_Social.json',
 '240927_1\\2024-09-27_14-25-20_SH27_EN27_Social.json',
 '241017_1\\2024-10-17_14-28-40_SP17_AW17_Social.json',
 '241017_2\\2024-10-17_16-41-38_ZH17_EM17_Social.json',
 '241112_1\\2024-11-12_13-31-14_KA12_WM12_Social.json',
 '241112_2\\2024-11-12_15-23-24_FA12_SL12_Social.json',
 '241113_1\\2024-11-13_14-18-54_NK13_RD13_Social.json',
 '241113_2\\2024-11-13_15-28-07_YL13_HC13_Social.json',
 '241119_1\\2024-11-19_14-24-49_AV19_XG19_Social.json',
 '241119_2\\2024-11-19_15-22-56_SB19_HH19_Social.json',
 '241120_1\\2024-11-20_14-17-44_JS20_RR20_Social.json',
 '241120_2\\2024-11-20_15-16-21_ZS20_VC20_Social.json',
 '241203_1\\2024-12-03_14-31-51_PO03_NN03_Social.json',
 '241203_2\\2024-12-03_15-27-28_EX03_BC03_Social.json',
 '241210_1\\2024-12-10_14-21-17_TE10_TK10_Social.json',
 '241210_2\\2024-12-10_15-20-11_RK10_RU10_Social.json',
 '241219_1\\2024-12-19_15-28-24_JU19_SY19_Social.json',
 '241220_1\\2024-12-20_17-44-50_KS20_CS20_Social

In [15]:
# restrict data for testing
json_filenames_social = json_filenames_social[:8]
json_filenames_solo = json_filenames_solo[:32]

In [16]:
df, trial_lists_social = prepare_data.prepare_data(data_folder, json_filenames_social, combine=False)

filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_json_files\240913_1\2024-09-13_11-31-00_YW13_JL13_Social.json
Data is from period before 2024-09-13 00:00:00
Running dataframe through playerinfo_playerposition_conversion.
Loading complete.
Preprocessing complete.
filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_json_files\240927_1\2024-09-27_14-25-20_SH27_EN27_Social.json
Loading complete.
Preprocessing complete.
filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_json_files\241017_1\2024-10-17_14-28-40_SP17_AW17_Social.json
Loading complete.
Preprocessing complete.
filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_json_files\241017_2\2024-10-17_16-41-38_ZH17_EM17_Social.json
Loading complete.
Preprocessing complete.
filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_json_files\241112_1\2024-11-12_13-31-14_KA12_WM12_Social.json
Loading complete.
Preprocessing complete.
filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_jso

In [17]:
df, trial_lists_solo = prepare_data.prepare_data(data_folder, json_filenames_solo, combine=False)

filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_json_files\240913_1\2024-09-13_11-23-37_YW13_FirstSolo.json
Data is from period before 2024-09-13 00:00:00
Running dataframe through playerinfo_playerposition_conversion.
Loading complete.
Preprocessing complete.
filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_json_files\240913_1\2024-09-13_11-53-34_YW13_SecondSolo.json
Data is from period before 2024-09-13 00:00:00
Running dataframe through playerinfo_playerposition_conversion.
Loading complete.
Preprocessing complete.
filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_json_files\240913_1\2024-09-13_11-23-50_JL13_FirstSolo.json
Data is from period before 2024-09-13 00:00:00
Running dataframe through playerinfo_playerposition_conversion.
Loading complete.
Preprocessing complete.
filepath: C:\Users\tomha\OneDrive\PhD\SWC\data\pseudonymised_json_files\240913_1\2024-09-13_11-53-56_JL13_SecondSolo.json
Data is from period before 2024-09-13 00:00:00
Running 

In [7]:
# create a list of combined pre- and post- social solo sessions, removing 5 trials from each pre
trial_lists_combined_solo = []
cut_trials = 5
for i in range(0,len(trial_lists_solo), 2): # iterate over each individual
    # get the trial lists for both solo sessions
    trial_list_first_solo = trial_lists_solo[i]
    trial_list_second_solo = trial_lists_solo[i + 1]

    # cut first cut_trials trials (learning controls/associations) from the first solo
    trial_list_first_solo = trial_list_first_solo[cut_trials:]

    # combine trial lists from the first and second solo sessions (the current and consecutive index)
    trial_list = trial_list_first_solo + trial_list_second_solo

    trial_lists_combined_solo.append(trial_list)


In [46]:
len(trial_lists_combined_solo), len(trial_lists_social)

(16, 8)

### Regressor value extraction functions (for one session)

In [18]:
def extract_wall_sep(trial_list):
    ''' Return wall separation for one session '''
    
    wall_sep = np.full(len(trial_list), np.nan)
    for i, trial in enumerate(trial_list):
        wall_sep_this_trial = get_indices.get_wall_difference(trial=trial)
        wall_sep[i] = wall_sep_this_trial

    return wall_sep


def extract_first_wall_seen(trial_list, player_id):
    ''' Return first visible walls for one player across one session.
        1 for WALL_1, 2 for WALL_2, np.nan for no visible wall (or both initially visible) '''
        
    high_wall_first_visible_session = wall_visibility_and_choice.get_given_wall_first_visible_session(trial_list,
                                                                                                        player_id,
                                                                                                        wall_index=0,
                                                                                                        current_fov=110)

    low_wall_first_visible_session = wall_visibility_and_choice.get_given_wall_first_visible_session(trial_list,
                                                                                                        player_id, 
                                                                                                        wall_index=1,
                                                                                                        current_fov=110)
    low_wall_first_visible_session = low_wall_first_visible_session*2
    
    first_visible_session = high_wall_first_visible_session + low_wall_first_visible_session

    first_visible_session[first_visible_session == 0] = np.nan

    return first_visible_session

# double check code
def extract_distances_to_walls(trial_list, player_id):
    ''' Return a trial_num, 2 sized array, where column 1
        is distance to WALL_1, and column 2 is distance to WALL_2.
        Data applies to one full session, and specified player_id'''
    
    # get octagon alcove coordinates
    alcove_coordinates = plot_octagon.return_alcove_centre_points()

    positions_session = np.full((len(trial_list), 2), np.nan)
    walls_session = np.full((len(trial_list), 2), np.nan)
    distances_session = np.full((len(trial_list), 2), np.nan)

    # get distances for each trial in the session
    for i, trial in enumerate(trial_list):
        # get WALL_1 and WALL_2 coordinates
        trial_walls = get_indices.get_walls(trial)
        high_wall_idx = trial_walls[0] - 1
        low_wall_idx = trial_walls[1] - 1
        trial_high_coordinates = alcove_coordinates[:,high_wall_idx]
        trial_low_coordinates = alcove_coordinates[:, low_wall_idx]

        # index trajectory at timepoint 0 to get player starting coordinates
        trajectory = trajectory_vectors.extract_trial_player_trajectory(trial=trial, player_id=player_id)
        trial_start_position = trajectory[:,0]

        # find distance between start position and WALL_1/WALL_2
        d2h = np.linalg.norm(trial_high_coordinates - trial_start_position) # WALL_1
        d2l = np.linalg.norm(trial_low_coordinates - trial_start_position) # WALL_2

        walls_session[i,:] = trial_walls 
        positions_session[i,:] = trial_start_position
        distances_session[i,:] = np.hstack((d2h, d2l))

    return distances_session


def extract_opponent_visibility_slice_onset(trial_list, player_id, current_fov=110):
    ''' Return opponent visibility at slice onset for one player for one session '''
    
    # slice onset angle of Other from self centre FoV
    orientation_angle_to_other_session = opponent_visibility.get_angle_of_opponent_from_player_session(player_id, trial_list)

    # boolean array of Other visible
    other_visible_session = opponent_visibility.get_other_visible_session(orientation_angle_to_other_session, current_fov)
    other_visible_session = other_visible_session.astype(int) # converted to int for categorical regressor

    return other_visible_session


def extract_player_choice(trial_list, player_id, inferred_choice=True):
    ''' Return (inferred by default) player choice for one player for one session.
        Where inferred and actual choice are both missing, values are np.nan '''

    # array of wall numbers where player won, np.nan where player did not
    player_choice = wall_visibility_and_choice.get_player_wall_choice(trial_list, player_id,
                                                                        inferred_choice=inferred_choice, debug=False)

    # 2 where player chose High, 0 where player chose Low, np.nan where player lost
    high_wall_chosen_session = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                    given_wall_index=0)
    high_wall_chosen_session = high_wall_chosen_session*2

    # 1 where player chose Low, 0 where player chose High, np.nan where player lost
    low_wall_chosen_session  = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                    given_wall_index=1)

    # 1 or 2 where player chose Low or High respectively, np.nan where player lost
    chosen_wall_session = high_wall_chosen_session + low_wall_chosen_session

    return chosen_wall_session


def extract_trial_outcome(trial_list, player_id):
    ''' Return whether this player won the trial for one player for one session '''
    
    trigger_activators = get_indices.get_trigger_activators(trial_list)
    this_player_won_session = (trigger_activators-1)*-1 if player_id == 0 else trigger_activators

    return this_player_won_session

### Extract 1D arrays for each player for the regressor values (Sandbox, applied to a single trial list)

In [70]:
trial_list = trial_lists_social[6]
trial_indices = get_indices.get_trials_trialtype(trial_list, trial_type=globals.HIGH_LOW)
trial_list = [trial_list[i] for i in trial_indices]
player_id = 0
current_fov = 110

In [71]:
len(trial_list)

93

#### Wall Separation

In [None]:

wall_sep = np.full(len(trial_list), np.nan)
for i, trial in enumerate(trial_list):
    wall_sep_this_trial = get_indices.get_wall_difference(trial=trial)
    wall_sep[i] = wall_sep_this_trial


In [14]:
np.count_nonzero(wall_sep == 4)/np.count_nonzero(wall_sep)

0.3157894736842105

#### First Seen

In [16]:
    
high_wall_first_visible_session = wall_visibility_and_choice.get_given_wall_first_visible_session(trial_list,
                                                                                                    player_id,
                                                                                                    wall_index=0,
                                                                                                    current_fov=110)

low_wall_first_visible_session = wall_visibility_and_choice.get_given_wall_first_visible_session(trial_list,
                                                                                                     player_id, 
                                                                                                     wall_index=1,
                                                                                                     current_fov=110)
low_wall_first_visible_session = low_wall_first_visible_session*2



first_visible_session = high_wall_first_visible_session + low_wall_first_visible_session

In [17]:
first_visible_session

array([2., 1., 1., 2., 0., 0., 2., 1., 1., 1., 2., 2., 1., 1., 2., 2., 2.,
       2., 2., 1., 1., 0., 0., 2., 2., 0., 2., 2., 1., 2., 2., 2., 2., 2.,
       2., 2., 0., 1., 1., 2., 2., 0., 2., 1., 0., 0., 2., 2., 1., 0., 2.,
       2., 0., 2., 0., 0., 1., 1., 1., 1., 2., 2., 2., 2., 1., 2., 2., 2.,
       2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 2., 2., 0., 2., 1., 1., 2.,
       2., 2., 2., 1., 1., 0., 2., 2., 2., 1., 1., 2., 1., 1., 0., 2., 1.,
       1., 2., 1., 2., 2., 2., 2., 1., 1., 2., 1., 0.])

#### Distance to High, Low
(Check code validity)

In [None]:
# get octagon alcove coordinates
alcove_coordinates = plot_octagon.return_alcove_centre_points()

start_positions = np.full((len(trial_list), 2), np.nan)
session_walls = np.full((len(trial_list), 2), np.nan)
distances = np.full((len(trial_list), 2), np.nan)

# get distances for each trial in the session
for i, trial in enumerate(trial_list):
    # get WALL_1 and WALL_2 for each trial in the session
    trial_walls = get_indices.get_walls(trial)
    high_wall_idx = trial_walls[0] - 1
    low_wall_idx = trial_walls[1] - 1
    trial_high_coordinates = alcove_coordinates[:,high_wall_idx]
    trial_low_coordinates = alcove_coordinates[:, low_wall_idx]

    # index trajectory at timepoint 0 to get starting position
    trajectory = trajectory_vectors.extract_trial_player_trajectory(trial=trial, player_id=player_id)
    trial_start_position = trajectory[:,0]

    # find distance between start position and WALL_1/WALL_2
    d2h = np.linalg.norm(trial_high_coordinates - trial_start_position) # WALL_1
    d2l = np.linalg.norm(trial_low_coordinates - trial_start_position)

    session_walls[i,:] = trial_walls 
    start_positions[i,:] = trial_start_position
    distances[i,:] = np.hstack((d2h, d2l))


#### Opponent visibility

In [82]:
# slice onset angle of Other from self centre FoV
orientation_angle_to_other_session = opponent_visibility.get_angle_of_opponent_from_player_session(player_id, trial_list)

# boolean array of Other visible
other_visible_session = opponent_visibility.get_other_visible_session(orientation_angle_to_other_session, current_fov)
other_visible_session = other_visible_session.astype(int) # converted to int for categorical regressor


In [81]:
other_visible_session

array([0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0])

#### Player choice

In [None]:
# array of wall numbers where player choice is confident, np.nan where player lost and choice is unconfident
player_choice = wall_visibility_and_choice.get_player_wall_choice(trial_list, player_id,
                                                                    inferred_choice=True, debug=False)

# 2 where player chose High, 0 where player chose Low, np.nan where player lost
high_wall_chosen_session = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                given_wall_index=0)
high_wall_chosen_session = high_wall_chosen_session*2

# 1 where player chose Low, 0 where player chose High, np.nan where player lost
low_wall_chosen_session  = get_indices.was_given_wall_chosen(trial_list, player_choice,
                                                                given_wall_index=1)

# 1 or 2 where player chose Low or High respectively, np.nan where player lost
chosen_wall_session_wins_and_losses = high_wall_chosen_session + low_wall_chosen_session

### Filter trials to only include those with full information for the GLM 
- Remove trials without recorded choice (np.nan in choice array) (whether I'm using inferred-choice or not)
- Remove trials without a first visible wall (np.nan in first seen array)
- Filter HighLow trials initially

The best way to do the above may be to keep an array of 'original indices', filter this array in the same way as I do my normal trial list filtering, and then I have an indices array with preserved numbering that I can use to index valid trials to add to my dataframe

In [109]:
np.sum(one_wall_first_visible_mask)

np.int64(82)

In [110]:
np.sum(retrievable_choice_mask)

np.int64(87)

In [104]:
# identify indices of trial list with HighLow trials
high_low_trial_indices = get_indices.get_trials_trialtype(trial_list, trial_type=globals.HIGH_LOW)

# get choice and first visible wall data for one player, session
player_choice_session = extract_player_choice(trial_list, player_id)
first_wall_seen_session = extract_first_wall_seen(trial_list, player_id)


# apply masks for one visible wall and a retrievable choice to the set of HighLow trials
retrievable_choice_mask = ~np.isnan(player_choice_session[high_low_trial_indices])
one_wall_first_visible_mask = ~np.isnan(first_wall_seen_session[high_low_trial_indices])

# combine masks into one 
final_mask = retrievable_choice_mask & one_wall_first_visible_mask

In [115]:
filtered_indices = high_low_trial_indices[final_mask]
filtered_trials = [trial_list[i] for i in filtered_indices]

#### Filtering function

In [9]:
def filter_valid_trial_indices(trial_list, player_id, solo=False):
    ''' Return the indices of a filtered trial list that correspond to trials in which there is a 
        retrievable choice and an identifiable first seen wall.'''

    # identify indices of trial list with HighLow trials
    high_low_trial_indices = get_indices.get_trials_trialtype(trial_list, trial_type=globals.HIGH_LOW)

    print(f"{high_low_trial_indices.size} high_low_trial_indices for player_id {player_id}")


    # get choice and first visible wall data for one player, session
    if not solo:
        player_choice_session = extract_player_choice(trial_list, player_id)
    else:
        player_choice_session = extract_player_choice(trial_list, player_id, inferred_choice=False)
    
    first_wall_seen_session = extract_first_wall_seen(trial_list, player_id)

    # apply masks for one visible wall and a retrievable choice to the set of HighLow trials
    retrievable_choice_mask = ~np.isnan(player_choice_session[high_low_trial_indices])

    print(f"{np.sum(retrievable_choice_mask)} retrievable choices for player_id {player_id}")

    one_wall_first_visible_mask = ~np.isnan(first_wall_seen_session[high_low_trial_indices])

    print(f"{np.sum(one_wall_first_visible_mask)} first visible walls for player_id {player_id}")


    # combine masks
    final_mask = retrievable_choice_mask & one_wall_first_visible_mask

    # filter the trial list indices based on masks
    filtered_indices = high_low_trial_indices[final_mask]

    assert filtered_indices.size > 0
    
    print(f"{filtered_indices.size} filtered trials for player_id {player_id}")
    
    return filtered_indices

### Create a dictionary to hold, for each session and player, regressor values for the session, only including trials with fully-populated regessors
- Fields for each of the regressors applied to all trials
- Fields for each of the regressors with only valid trials


In [19]:
solo = False
player_ids = [0] if solo else [0,1]


In [20]:
analysis_results = {
    experiment_id: {
        player_id: {
            session_type: {

                'regressors': {
                    'wall_sep': None,
                    'first_seen': None,
                    'd2h': None,
                    'd2l': None,
                    'opponent_visible': None,
                },

                'dependent': {
                    'choice': None
                }
                
            }
            for session_type in ['solo', 'social']
        }   
        for player_id in player_ids
    }
    for experiment_id in np.arange(len(trial_lists_social))
}

In [None]:
analysis_results_solo = {
    session_id: {
        player_id: {

            # unfiltered regressors
            'regressors': {
                'wall_sep': None,
                'first_seen': None,
                'd2h': None,
                'd2l': None,
                'opponent_visible': None,
            },

            # regressors filtered for trials fully-populated regressor values
            'regressors_filtered': {
                'wall_sep': None,
                'first_seen': None,
                'd2h': None,
                'd2l': None,
                'opponent_visible': None,
            },

            'dependent': {
                'choice': None
            }

        }
        for player_id in player_ids
    }
    for session_id in np.arange(len(trial_lists_social))
}

### Populate the dictionary with data

In [52]:
inferred_choice = True

In [98]:
np.count_nonzero(analysis_results[14][1]['regressors_filtered']['outcome'] == 1)

48

In [121]:
len(trial_lists_solo)

32

In [78]:
analysis_results[0][1]['social']['dependent']['outcome'].size

78

In [21]:
for experiment_id, players in analysis_results.items():
    for player_id, data in players.items():
        
        # get the trial lists for this session and player
        trial_list_social = trial_lists_social[experiment_id]
        trial_list_solo = trial_lists_combined_solo[experiment_id*2 + player_id] # check this fits the above

        # filter the trial list for regressor valid trials
        print(f"Trial list social length for experimentId {experiment_id} and playerId {player_id}: {len(trial_list_social)}")
        trial_list_social = [trial_list_social[i] for i in filter_valid_trial_indices(trial_list_social, player_id)]
        trial_list_solo = [trial_list_solo[i] for i in filter_valid_trial_indices(trial_list_solo, player_id=0, solo=True)]
        
        # regressors social
        player_data = analysis_results[experiment_id][player_id]['social']
        distances = extract_distances_to_walls(trial_list_social, player_id)
        player_data['regressors']['wall_sep'] = extract_wall_sep(trial_list_social)
        player_data['regressors']['first_seen'] = extract_first_wall_seen(trial_list_social, player_id)
        player_data['regressors']['d2h'] = distances[:,0]
        player_data['regressors']['d2l'] = distances[:,1]
        player_data['regressors']['opponent_visible'] = extract_opponent_visibility_slice_onset(trial_list_social, player_id)

        # dependent variable social
        player_data['dependent']['outcome'] = extract_trial_outcome(trial_list_social, player_id)

        # regressors solo
        player_data = analysis_results[experiment_id][player_id]['solo']
        distances = extract_distances_to_walls(trial_list_solo, player_id=0)
        player_data['regressors']['wall_sep'] = extract_wall_sep(trial_list_solo)
        player_data['regressors']['first_seen'] = extract_first_wall_seen(trial_list_solo, player_id=0)
        player_data['regressors']['d2h'] = distances[:,0]
        player_data['regressors']['d2l'] = distances[:,1]

        # dependent variable social
        player_data['dependent']['outcome'] = extract_trial_outcome(trial_list_solo, player_id)

        # regressors solo
        player_data = analysis_results[experiment_id][player_id]['solo']





Trial list social length for experimentId 0 and playerId 0: 129
102 high_low_trial_indices for player_id 0
82 retrievable choices for player_id 0
82 first visible walls for player_id 0
66 filtered trials for player_id 0
52 high_low_trial_indices for player_id 0
52 retrievable choices for player_id 0
38 first visible walls for player_id 0
38 filtered trials for player_id 0
Trial list social length for experimentId 0 and playerId 1: 129
102 high_low_trial_indices for player_id 1
96 retrievable choices for player_id 1
84 first visible walls for player_id 1
78 filtered trials for player_id 1
51 high_low_trial_indices for player_id 0
51 retrievable choices for player_id 0
44 first visible walls for player_id 0
44 filtered trials for player_id 0
Trial list social length for experimentId 1 and playerId 0: 104
79 high_low_trial_indices for player_id 0


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


48 retrievable choices for player_id 0
50 first visible walls for player_id 0
30 filtered trials for player_id 0
52 high_low_trial_indices for player_id 0
52 retrievable choices for player_id 0
45 first visible walls for player_id 0
45 filtered trials for player_id 0
Trial list social length for experimentId 1 and playerId 1: 104
79 high_low_trial_indices for player_id 1


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


79 retrievable choices for player_id 1
59 first visible walls for player_id 1
59 filtered trials for player_id 1
50 high_low_trial_indices for player_id 0
50 retrievable choices for player_id 0
45 first visible walls for player_id 0
45 filtered trials for player_id 0
Trial list social length for experimentId 2 and playerId 0: 111
93 high_low_trial_indices for player_id 0


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


75 retrievable choices for player_id 0
66 first visible walls for player_id 0
51 filtered trials for player_id 0
39 high_low_trial_indices for player_id 0
39 retrievable choices for player_id 0
31 first visible walls for player_id 0
31 filtered trials for player_id 0
Trial list social length for experimentId 2 and playerId 1: 111
93 high_low_trial_indices for player_id 1


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


80 retrievable choices for player_id 1
69 first visible walls for player_id 1
61 filtered trials for player_id 1
50 high_low_trial_indices for player_id 0
50 retrievable choices for player_id 0
39 first visible walls for player_id 0
39 filtered trials for player_id 0
Trial list social length for experimentId 3 and playerId 0: 176
136 high_low_trial_indices for player_id 0


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


101 retrievable choices for player_id 0
118 first visible walls for player_id 0
89 filtered trials for player_id 0
56 high_low_trial_indices for player_id 0
56 retrievable choices for player_id 0
46 first visible walls for player_id 0
46 filtered trials for player_id 0
Trial list social length for experimentId 3 and playerId 1: 176
136 high_low_trial_indices for player_id 1


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


130 retrievable choices for player_id 1
115 first visible walls for player_id 1
109 filtered trials for player_id 1
67 high_low_trial_indices for player_id 0
67 retrievable choices for player_id 0
51 first visible walls for player_id 0
51 filtered trials for player_id 0
Trial list social length for experimentId 4 and playerId 0: 101
77 high_low_trial_indices for player_id 0


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


50 retrievable choices for player_id 0
36 first visible walls for player_id 0
23 filtered trials for player_id 0
56 high_low_trial_indices for player_id 0
56 retrievable choices for player_id 0
29 first visible walls for player_id 0
29 filtered trials for player_id 0
Trial list social length for experimentId 4 and playerId 1: 101
77 high_low_trial_indices for player_id 1


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


63 retrievable choices for player_id 1
58 first visible walls for player_id 1
47 filtered trials for player_id 1
51 high_low_trial_indices for player_id 0
51 retrievable choices for player_id 0
39 first visible walls for player_id 0
39 filtered trials for player_id 0
Trial list social length for experimentId 5 and playerId 0: 122
93 high_low_trial_indices for player_id 0


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


90 retrievable choices for player_id 0
71 first visible walls for player_id 0
69 filtered trials for player_id 0
77 high_low_trial_indices for player_id 0
77 retrievable choices for player_id 0
58 first visible walls for player_id 0
58 filtered trials for player_id 0
Trial list social length for experimentId 5 and playerId 1: 122
93 high_low_trial_indices for player_id 1


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


75 retrievable choices for player_id 1
64 first visible walls for player_id 1
54 filtered trials for player_id 1
76 high_low_trial_indices for player_id 0
76 retrievable choices for player_id 0
54 first visible walls for player_id 0
54 filtered trials for player_id 0
Trial list social length for experimentId 6 and playerId 0: 114
93 high_low_trial_indices for player_id 0


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


87 retrievable choices for player_id 0
82 first visible walls for player_id 0
76 filtered trials for player_id 0
39 high_low_trial_indices for player_id 0
39 retrievable choices for player_id 0
27 first visible walls for player_id 0
27 filtered trials for player_id 0
Trial list social length for experimentId 6 and playerId 1: 114
93 high_low_trial_indices for player_id 1


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


92 retrievable choices for player_id 1
76 first visible walls for player_id 1
75 filtered trials for player_id 1
46 high_low_trial_indices for player_id 0
46 retrievable choices for player_id 0
39 first visible walls for player_id 0
39 filtered trials for player_id 0
Trial list social length for experimentId 7 and playerId 0: 134
109 high_low_trial_indices for player_id 0


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


84 retrievable choices for player_id 0
81 first visible walls for player_id 0
60 filtered trials for player_id 0
41 high_low_trial_indices for player_id 0
41 retrievable choices for player_id 0
23 first visible walls for player_id 0
23 filtered trials for player_id 0
Trial list social length for experimentId 7 and playerId 1: 134
109 high_low_trial_indices for player_id 1


  cosine_similarity_this_wall = dot_product[wall_num]/(player_to_alcove_vector_norms[wall_num] * player_vector_norm)
  return np.argmax(np.nanmean(cosine_similarity_trajectory, axis=1))
  highest_alignment_val = np.max(np.nanmean(cosine_similarity_trajectory, axis=1))


103 retrievable choices for player_id 1
86 first visible walls for player_id 1
81 filtered trials for player_id 1
42 high_low_trial_indices for player_id 0
42 retrievable choices for player_id 0
28 first visible walls for player_id 0
28 filtered trials for player_id 0


#### Populate a dataframe, with a row for each trial, and fields for regressors (only including trials with fully-populated regressors)

In [22]:
glm_df_social = pd.DataFrame()

for session_id, players in analysis_results.items():
    for player_id in players:
        
        # take each filtered_regressor array and fill the relevant df field for this player
        player_data = analysis_results[session_id][player_id]['social']['regressors']
        outcome = analysis_results[session_id][player_id]['social']['dependent']['outcome']
        df_player = pd.DataFrame(
                    {
                        "SessionID" : session_id,
                        "PlayerID" : player_id,
                        "GlmPlayerID" : session_id*2 + player_id,
                        "ChooseHigh" : outcome,
                        "WallSep" : player_data['wall_sep'],
                        "FirstSeenWall" : player_data['first_seen'],
                        "D2H" : player_data['d2h'],
                        "D2L" : player_data['d2l'],
                        "OpponentVisible" : player_data['opponent_visible']
                    }
        )


        # append this smaller dataframe to the the full dataframe
        glm_df_social = pd.concat([glm_df_social, df_player], ignore_index=True)

In [25]:
glm_df_solo = pd.DataFrame()

for session_id, players in analysis_results.items():
    for player_id in players:
        
        # take each filtered_regressor array and fill the relevant df field for this player
        player_data = analysis_results[session_id][player_id]['solo']['regressors']
        outcome = analysis_results[session_id][player_id]['solo']['dependent']['outcome']
        df_player = pd.DataFrame(
                    {
                        "SessionID" : session_id,
                        "PlayerID" : player_id,
                        "GlmPlayerID" : session_id,
                        "ChooseHigh" : outcome,
                        "WallSep" : player_data['wall_sep'],
                        "FirstSeenWall" : player_data['first_seen'],
                        "D2H" : player_data['d2h'],
                        "D2L" : player_data['d2l']
                    }
        )

        # append this smaller dataframe to the the full dataframe
        glm_df_solo = pd.concat([glm_df_solo, df_player], ignore_index=True)


glm_df_solo["FirstSeenWall"] = glm_df_solo["FirstSeenWall"].astype("category")
glm_df_solo["WallSep"] = glm_df_solo["WallSep"].astype("category")

In [None]:
glm_df_solo[(glm_df_solo['SessionID'] == 2) & (glm_df_solo['PlayerID'] == 0)].iloc[-20:-10]

In [119]:
glm_df['ChooseHigh']

0       2.0
1       1.0
2       1.0
3       1.0
4       2.0
       ... 
2466    1.0
2467    1.0
2468    1.0
2469    2.0
2470    1.0
Name: ChooseHigh, Length: 2471, dtype: float64

### Build the GLM in statsmodels

In [132]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [153]:
# # Convert categorical predictors into factors
glm_df["Separation"] = glm_df["WallSep"].astype("category")
glm_df["Outcome"] = glm_df["PlayerWin"].astype("category")
glm_df["FirstSeenWall"] = glm_df["FirstSeenWall"].astype("category")

# Fit a logistic regression (GLM with binomial link)
glm_model = smf.glm(
    formula="ChooseHigh ~ WallSep + PlayerWin + FirstSeenWall",
    data=glm_df,
    family=sm.families.Binomial()
).fit()

# Display the results
print(glm_model.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:             ChooseHigh   No. Observations:                 2471
Model:                            GLM   Df Residuals:                     2467
Model Family:                Binomial   Df Model:                            3
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                   -inf
Date:                Mon, 10 Feb 2025   Deviance:                   1.2997e+05
Time:                        15:10:07   Pearson chi2:                 7.55e+18
No. Iterations:                     4   Pseudo R-squ. (CS):                nan
Covariance Type:            nonrobust                                         
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
Intercept             4.591e+15 

  special.gammaln(n - y + 1) + y * np.log(mu / (1 - mu + 1e-20)) +
  n * np.log(1 - mu + 1e-20)) * var_weights


In [None]:
smf.mixedlm("Outcome ~ X1 + X2", data, groups=data["PlayerID"])

In [None]:
glmm_model = smf.mixedlm(

    formula = "ChooseHigh ~ WallSep + PlayerWin + FirstSeenWall",
    data = glm_df,
    groups = glm_df['GlmPlayerID'],
    family = sm.families.Binomial()
).fit()

print(glmm_model.summary())

In [1]:
from pymer4.models import Lmer

model_formula = 'ChooseHigh ~ D2L + D2H + FirstSeenWall + WallSep + (1|GlmPlayerID)'

model = Lmer(model_formula, data=glm_df_solo, family='binomial')
results=model.fit()

print(results)

: 