#### Creating data for sequential choice models
The series of models I plan to use in my thesis project, inspired by habitat-selection functions from movement ecology, use environmental covariates of available resources (e.g., distance from current location, turning angle from previous bearing angle, size/point value, etc.) to predict whether a resource will be chosen next. By specifying a series of models containing different parameters, I will fit different cognitive heuristic strategies to human sequential choice data and determine parameters of significance to the foraging strategies of participants. 

To start, I must take existing foraging data in the form of logs of collected objects during play and expand it out to include the covariates of interest of all available objects. This data needs to also reflect the removal and reintroduction of collected objects during the course of play. 

In [1]:
# import libraries
import pandas as pd
import numpy as np
import re
from tqdm import tqdm

In [2]:
# read in data
forage_data = pd.read_csv(
    "../../data/piloting/march24_pilot_forage_data_w_ids.csv"
)
all_lvls_obj_location_data = pd.read_csv(
    "../../data/arrangements/object-location-data.csv"
)

In [None]:
forage_data = forage_data[forage_data.subject != 12345].index()
# need to figure out how to reset index now

##### Adding covariates and all available objects
The two covariates of interest needed to be added to this expanded data set are distance from current location and turning angle 

In [8]:
# distance matrices store distances between coconuts
def create_distance_matrices(obj_locs=pd.DataFrame):

    # create list for levels
    dist_matrix_list = []

    # for each level, create matrix of all distances
    for level in range(0, 10):
        # filter for this level
        level_locs = obj_locs[obj_locs.level == "_level_"+str(level+1)]

        # reset index for consistent indexing
        level_locs.index = [*range(0, len(level_locs))]

        # create empty matrix
        level_m = np.zeros((len(level_locs), len(level_locs)), dtype=float)

        # fill this level's matrix
        for i in range(0, len(level_locs)):
            for j in range(0, len(level_locs)):
                # check if same location
                if i == j:
                    level_m[i, j] = 0  # set to zero, save computation
                else:
                    # calc distance and store in matrix
                    level_m[i, j] = np.sqrt(
                        np.pow(level_locs.x[i]-level_locs.x[j], 2) +
                        np.pow(level_locs.y[i]-level_locs.y[j], 2)
                    )

        # add to list
        dist_matrix_list.append(level_m)

    return dist_matrix_list

In [9]:
d = create_distance_matrices(all_lvls_obj_location_data)

In [10]:
# calculate turning angles
def calculate_turning_angles(
    previous_head_angle=float, current_obj_id=int, obj_locs=pd.DataFrame
):

    # empty list for angles
    ta_list = []

    # loop through df and calculate each ta
    for obj in obj_locs.obj_ID:
        if obj == current_obj_id:
            ta_list.append(pd.NA)
        else:
            ta = np.atan2(
                obj_locs[obj_locs.obj_ID == obj].y[obj-1] -
                obj_locs[obj_locs.obj_ID ==
                         current_obj_id].y[current_obj_id-1],
                obj_locs[obj_locs.obj_ID == obj].x[obj-1] -
                obj_locs[obj_locs.obj_ID == current_obj_id].x[current_obj_id-1]
            ) - previous_head_angle

            ta_list.append(ta)

    return ta_list

In [11]:
# neighbors distance
def neighborhood_value(
    obj_ind=int, lvl_dist_matrix=list, num_neighbors=int, avail_indices=list
):
    # filter the df for currently available obj and their distances from focal obj
    active_obj_dists = lvl_dist_matrix[obj_ind][avail_indices]

    return np.sum(np.reciprocal(sorted(active_obj_dists)[1:num_neighbors]))

In [12]:
# combine together to create function to expand df
def expand_df_with_covariates(
    orig_df=pd.DataFrame,
    all_obj_locs_df=pd.DataFrame
):

    # create empty df to append
    expanded_df = pd.DataFrame({
        'subject': int,
        'level': str,
        'collection_num': int,
        'obj_ID': int,
        'point_value': int,
        'distance': float,
        'turning_angle': float,
        'neighbor_value': float,
        'used': int,
        'time': float
    }, index=[])

    # create df to track collections
    collect_df = pd.DataFrame({
        'obj_ID': int,
        'time_to_respawn': float
    }, index=[])

    # create all distance matrices
    lvl_dist_matrices = create_distance_matrices(all_obj_locs_df)

    # turn string levels into ints
    lvl_ints = [
        int(re.search(r'\d+', lvl_s).group()) for lvl_s in orig_df.level
    ]

    # loop through df and expand
    for i in tqdm(range(0, len(orig_df)-1)):

        # determine if end of level or run
        if (orig_df.iloc[i].level != orig_df.iloc[i+1].level) | (orig_df.iloc[i].subject != orig_df.iloc[i+1].subject):
            # do not add data and continue on to next level or subject
            continue

        # slice row
        row_of_interest = orig_df.iloc[i]

        # set level location distances and arrangement or reset on new level
        if (i == 0) | (orig_df.iloc[i].level != orig_df.iloc[i-1].level):
            # print('Evaluating subject ' +
            #       str(row_of_interest.subject) + ' on level '+str(lvl_ints[i]))

            # reset collection_num
            collect_num = 0

            # clear collect df
            collect_df = pd.DataFrame({
                'obj_ID': int,
                'time_to_respawn': float
            }, index=[])

            # use level from lvl_ints list to determine locations to consider
            curr_matrix = lvl_dist_matrices[lvl_ints[i]-1]

            # and level arrangement from the locations df
            curr_level_locs = all_lvls_obj_location_data[
                all_lvls_obj_location_data.level == row_of_interest.level
            ]

            # set heading angle from 0,0 to current location
            heading_angle = np.atan2(row_of_interest.y, row_of_interest.x)
        else:
            collect_num += 1

            # grab previously calculated turning angle from df
            heading_angle = expanded_df[(expanded_df.collection_num == (
                collect_num-1)) & (expanded_df.used == 1)].turning_angle.iloc[0]

        # see if there should be any respawns
        if len(collect_df) > 0:

            # see if any obj need to respawn
            obj_to_respawn = collect_df[collect_df.time_to_respawn <
                                        row_of_interest.time].obj_ID

            # filter out if there is an obj to respawn
            if len(obj_to_respawn) > 0:
                collect_df = collect_df[collect_df.time_to_respawn >
                                        row_of_interest.time]

        # find covariate values of each object available to forager
        # grab obj_ID
        curr_obj_ID = row_of_interest.obj_ID

        # distances from current object
        avail_obj_indices = [
            x for x in [*range(0, len(curr_matrix))] if x not in (collect_df.obj_ID-1)
        ]

        # get distances for available objects
        dist_from_curr = curr_matrix[curr_obj_ID-1][avail_obj_indices]

        # neighborhood influenced distance values of available objects
        nhood_val_from_curr = [
            neighborhood_value(obj_ID-1, curr_matrix, 3, avail_obj_indices) for obj_ID in avail_obj_indices
        ]

        # turning angles from current object
        tas_from_curr = calculate_turning_angles(
            heading_angle, curr_obj_ID, curr_level_locs
        )

        tas_from_curr = [
            tas_from_curr[i] for i in avail_obj_indices
        ]

        # collection, add to collect df and set respawn time
        collect_df = collect_df._append({
            'obj_ID': curr_obj_ID,
            'time_to_respawn': row_of_interest.time+5
        }, ignore_index=True)

        # set length of expansion
        rep_len = len(dist_from_curr)

        # create used/unused criterion vector
        used_list = [0]*rep_len

        # set the next collected obj to a 1
        used_list[orig_df.iloc[i+1].obj_ID-1] = 1

        # assemble into df
        row_expansion = pd.DataFrame({
            'subject': [row_of_interest.subject]*rep_len,
            'level': [row_of_interest.level]*rep_len,
            'collection_num': [collect_num]*rep_len,
            'obj_ID': [i+1 for i in avail_obj_indices],
            'point_value': [row_of_interest.point_value]*rep_len,
            'distance': dist_from_curr,
            'turning_angle': tas_from_curr,
            'neighbor_value': nhood_val_from_curr,
            'used': used_list,
            'time': [row_of_interest.time]*rep_len
        })

        # add to the new expanded df
        expanded_df = pd.concat(
            [expanded_df, row_expansion], ignore_index=True)

    # finish and return
    return expanded_df

In [13]:
forage_data.iloc[330:359]

Unnamed: 0,subject,level,object_size,x,y,points,time,obj_ID,point_value
330,1,_level_1,extra small,10.0,-20.0,660,222.626205,20,2
331,1,_level_1,extra small,15.0,-15.0,662,222.909302,26,2
332,1,_level_1,extra small,12.5,-12.5,664,223.026199,29,2
333,1,_level_1,extra small,15.0,6.5,666,224.726395,17,2
334,1,_level_1,extra small,15.0,10.0,668,225.009094,15,2
335,1,_level_1,extra small,17.5,12.5,670,225.241699,13,2
336,1,_level_1,extra small,20.0,15.0,672,225.541794,10,2
337,1,_level_1,extra small,17.5,17.5,674,225.825806,6,2
338,1,_level_1,extra small,15.0,20.0,676,226.125397,2,2
339,1,_level_1,extra small,12.5,17.5,678,226.308395,5,2


In [14]:
toy_df = forage_data.loc[0:353]
expand_df = expand_df_with_covariates(toy_df, all_lvls_obj_location_data)

  collect_df = collect_df._append({
  expanded_df = pd.concat(
100%|██████████| 349/349 [00:10<00:00, 32.45it/s]


In [None]:
len(forage_data[(forage_data.level == '_level_1')
    & (forage_data.subject == 1)])

350

In [22]:
num_obs

[68,
 67,
 66,
 65,
 64,
 63,
 62,
 61,
 60,
 59,
 58,
 57,
 57,
 57,
 57,
 56,
 57,
 56,
 55,
 54,
 54,
 58,
 57,
 56,
 56,
 56,
 56,
 56,
 56,
 56,
 57,
 57,
 57,
 56,
 57,
 57,
 57,
 57,
 56,
 56,
 57,
 56,
 56,
 58,
 58,
 57,
 56,
 55,
 54,
 53,
 53,
 53,
 52,
 52,
 52,
 56,
 55,
 54,
 54,
 53,
 54,
 54,
 54,
 54,
 59,
 58,
 57,
 56,
 56,
 56,
 56,
 56,
 56,
 56,
 56,
 56,
 56,
 55,
 54,
 53,
 52,
 52,
 52,
 53,
 52,
 52,
 52,
 57,
 56,
 55,
 55,
 55,
 55,
 56,
 56,
 56,
 56,
 58,
 57,
 57,
 56,
 56,
 55,
 55,
 55,
 55,
 54,
 54,
 53,
 56,
 55,
 54,
 55,
 54,
 56,
 56,
 56,
 56,
 56,
 58,
 57,
 56,
 55,
 55,
 54,
 55,
 55,
 55,
 55,
 54,
 57,
 56,
 55,
 54,
 55,
 57,
 57,
 57,
 57,
 57,
 56,
 57,
 56,
 56,
 56,
 56,
 55,
 55,
 55,
 54,
 59,
 58,
 57,
 56,
 55,
 56,
 57,
 57,
 58,
 59,
 58,
 57,
 57,
 57,
 57,
 57,
 56,
 55,
 58,
 58,
 57,
 56,
 55,
 54,
 54,
 55,
 54,
 53,
 53,
 53,
 56,
 55,
 54,
 54,
 54,
 54,
 54,
 55,
 54,
 59,
 58,
 57,
 56,
 55,
 55,
 56,
 56,
 55,
 55,
 56,
