#### Creating data for sequential choice models
The series of models I plan to use in my thesis project, inspired by habitat-selection functions from movement ecology, use environmental covariates of available resources (e.g., distance from current location, turning angle from previous bearing angle, size/point value, etc.) to predict whether a resource will be chosen next. By specifying a series of models containing different parameters, I will fit different cognitive heuristic strategies to human sequential choice data and determine parameters of significance to the foraging strategies of participants. 

To start, I must take existing foraging data in the form of logs of collected objects during play and expand it out to include the covariates of interest of all available objects. This data needs to also reflect the removal and reintroduction of collected objects during the course of play. 

In [9]:
# import libraries
import pandas as pd
import numpy as np
import re

In [2]:
# read in data
forage_data = pd.read_csv(
    "../../data/piloting/march24_pilot_forage_data_w_ids.csv"
)
all_lvls_obj_location_data = pd.read_csv(
    "../../data/arrangements/object-location-data.csv"
)

##### Adding covariates and all available objects
The two covariates of interest needed to be added to this expanded data set are distance from current location and turning angle 

In [3]:
# distance matrices store distances between coconuts
def create_distance_matrices(obj_locs=pd.DataFrame):

    # create list for levels
    dist_matrix_list = []

    # for each level, create matrix of all distances
    for level in range(0, 10):
        # filter for this level
        level_locs = obj_locs[obj_locs.level == "_level_"+str(level+1)]

        # reset index for consistent indexing
        level_locs.index = [*range(0, len(level_locs))]

        # create empty matrix
        level_m = np.zeros((len(level_locs), len(level_locs)), dtype=float)

        # fill this level's matrix
        for i in range(0, len(level_locs)):
            for j in range(0, len(level_locs)):
                # calc distance and store in matrix
                level_m[i, j] = np.sqrt(
                    np.pow(level_locs.x[i]-level_locs.x[j], 2) +
                    np.pow(level_locs.y[i]-level_locs.y[j], 2)
                )

        # add to list
        dist_matrix_list.append(level_m)

    return dist_matrix_list

In [36]:
# calculate turning angles
def calculate_turning_angles(
    previous_head_angle=float, current_obj_id=int, obj_locs=pd.DataFrame
):

    # empty list for angles
    ta_list = []

    # loop through df and calculate each ta
    for obj in obj_locs.obj_ID:
        if obj == current_obj_id:
            ta_list.append(pd.NA)
        else:
            ta = np.atan2(
                obj_locs[obj_locs.obj_ID == obj].y[obj-1] -
                obj_locs[obj_locs.obj_ID ==
                         current_obj_id].y[current_obj_id-1],
                obj_locs[obj_locs.obj_ID == obj].x[obj-1] -
                obj_locs[obj_locs.obj_ID == current_obj_id].x[current_obj_id-1]
            ) - previous_head_angle

            ta_list.append(ta)

    return ta_list

In [5]:
lvls = create_distance_matrices(all_lvls_obj_location_data)

In [29]:
# neighbors distance
def neighborhood_value(
    obj_ind=int, lvl_ind=int, lvl_matrices=list, num_neighbors=int, already_collected=list
):

    # filter for level distances
    lvl_dists = lvl_matrices[lvl_ind]

    # filter for already collected items
    if len(already_collected) > 0:
        available_indices = [
            x for x in [*range(0, len(lvl_dists[obj_ind]))] if x not in already_collected
        ]
        active_obj_dists = lvl_dists[obj_ind][available_indices]
    else:
        active_obj_dists = lvl_dists[obj_ind]

    return np.sum(np.reciprocal(sorted(active_obj_dists)[1:num_neighbors]))

In [None]:
# combine together to create function to expand df
def expand_df_with_covariates(
    orig_df=pd.DataFrame,
    all_obj_locs_df=pd.DataFrame
):

    # create empty df to append
    expanded_df = pd.DataFrame({
        'subject': int,
        'level': str,
        'collection_num': int,
        'obj_ID': int,
        'point_value': int,
        'distance': float,
        'turning_angle': float,
        'neighbor_value': float,
        'used': int,
        'time': float
    })

    # create df to track collections
    collect_df = pd.DataFrame({
        'obj_ID': int,
        'time_to_respawn': float
    })

    # create all distance matrices
    lvl_dist_matrices = create_distance_matrices(all_obj_locs_df)

    # turn string levels into ints
    lvl_ints = [
        int(re.search(r'\d+', lvl_s).group()) for lvl_s in orig_df.level
    ]

    # loop through df and expand
    for i in range(0, len(orig_df)):

        # slice row
        row_of_interest = orig_df.iloc[i]

        # use level from lvl_ints list to determine locations to consider
        curr_matrix = lvl_dist_matrices[lvl_ints[i]-1]

        # and from the locations df
        curr_level_locs = all_lvls_obj_location_data[
            all_lvls_obj_location_data.level == row_of_interest.level
        ]

        # find covariate values of each object available to forager
        # grab obj_ID
        curr_obj_ID = row_of_interest.obj_ID

        # distances from current object
        dist_from_curr = curr_matrix[curr_obj_ID-1]

        # neighborhood influenced distance values
        hood_val_from_curr = neighborhood_value(
            curr_obj_ID-1, lvl_ints[i]-1, curr_matrix, 3, collect_df
        )

        # need to determine if this is the start of a new level
        if forage_data.level[i] != forage_data.level[i-1]:
            # starting from 0,0, so heading is determined by angle
            # formed from there
            heading_angle = np.atan2(row_of_interest.y, row_of_interest.x)
        else:
            # grab previously calculated turning angle from df
            heading_angle = expanded_df.turning_angle

        # turning angles from current object
        tas_from_curr = calculate_turning_angles(
            heading_angle, curr_obj_ID, curr_level_locs
        )

In [37]:
all_lvls_obj_location_data

Unnamed: 0,level,obj_ID,point_value,x,y
0,_level_1,1,2,15.0,23.5
1,_level_1,2,2,15.0,20.0
2,_level_1,3,2,10.0,20.0
3,_level_1,4,2,20.0,20.0
4,_level_1,5,2,12.5,17.5
...,...,...,...,...,...
665,_level_10,62,2,-25.0,-25.0
666,_level_10,63,2,-15.0,-25.0
667,_level_10,64,2,-30.0,-30.0
668,_level_10,65,2,-20.0,-30.0


In [30]:
forage_data.iloc[0]

subject                  1
level             _level_1
object_size    extra small
x                     15.0
y                    -10.0
points                   0
time             91.403702
obj_ID                  32
point_value              2
Name: 0, dtype: object

In [None]:
lvl_ints = []
for i in range(0, len(forage_data)):
    lvl_s = forage_data.iloc[i].level
    lvl = int(re.search(r'\d+', lvl_s).group())
    lvl_ints.append(lvl)