# PR Study 1 - Sample Size = 90

### 'calc_action_values' generates action values via SR, PR 
- Input data: instructed goals, instructed reward, the SR & PR matrices, the action (0 or 1), mapping of states to columns of SR/PR matrix
- Input algorithm: how planning is carried out 
    - one_or_many = is one goal or all gals used in the computation
        - one_type refers to how the single goal is selected
            - options are 'rwd','ev','prob','bin'
    - sr_or_pr = is sr or pr used. Answers are "sr", "pr" or "both", the latter reflecting hybridization
    - neglect_reward = neglect small reward
    - neglect_rare_transition = neglect rare transitions
    - inverse_prior_confidence = strength of prior that state transition is close to 0
        - ratios = relative frequencies of transitions in dictionary
            - to get ratios, use "ret_relative_frequencies" function (input: data file from study)

In [83]:
def individual_av_components(action=0,goals_reward={},goals_ratio={},sr_matrix=[],pr_matrix=[],state_map={},
                       one_or_many='many',one_type='rwd',sr_or_pr='sr',neglect_reward='neg_rwd',
                      neglect_rare_transition='neg_rare',
                       inverse_prior_confidence=0,sr_pr_weighting=1,range_adapt='norm',nr_t=0.1,round_av=1,pr_comp='average'):
    
    import pandas as pd
    import numpy as np
    
    #copy dictionaries to work on the copies only here
    goals_ratios=goals_ratio.copy()
    goals_rewards=goals_reward.copy()
    
    #load SR and PR matrices
    pr_mat=np.load(pr_matrix)
    pr_mat=np.transpose(pr_mat)
    sr_mat=np.load(sr_matrix)

    # determine whether SR , PR or Hybrid used
    if sr_or_pr=='sr':
        current_representation=sr_mat
    elif sr_or_pr=='pr':
        current_representation=pr_mat    
            
    
    #get action name
    if action==0:
        action_name='trident'
    elif action==1:
        action_name='planet'
    else:
        print('wrong input for action -- can only be 0 or 1!')
        return
    
    #remove goals if Neglect_Reward
    goals_removed=[]
    
    if neglect_reward=='neg_rwd':
        for key,value in goals_rewards.items():
            if value<nr_t*max(goals_rewards.values()):
                goals_removed.append(key)
        for goal_r in goals_removed:
            goals_rewards.pop(goal_r)
            goals_ratios.pop(goal_r)
    
    max_goal=max(goals_rewards, key=goals_rewards.get)
    max_prob=max(goals_ratios, key=goals_ratios.get)
    goals_evs={}
    for key,value in goals_rewards.items():
        goals_evs[key]=current_representation[action,state_map[key]]*value
    
    max_ev=max(goals_evs, key=goals_evs.get)
    #check if agent only selects one goal to focus on
    if one_or_many=='one':
        #only retain the highest-reward goal
        if one_type=='rwd':
            rem_list=[k for k in goals_rewards.keys() if k != max_goal]
            goals_rewards = {key: goals_rewards[key]
             for key in goals_rewards if key not in rem_list}
            goals_ratios = {key: goals_ratios[key]
             for key in goals_ratios if key not in rem_list}
            
        #only retain the highest-probability reward
        elif one_type=='prob':
            rem_list=[k for k in goals_ratios.keys() if k != max_prob]
            goals_rewards = {key: goals_rewards[key]
             for key in goals_rewards if key not in rem_list}
            goals_ratios = {key: goals_ratios[key]
             for key in goals_ratios if key not in rem_list}
        
        elif one_type=='ev':
            rem_list=[k for k in goals_ratios.keys() if k != max_ev]
            goals_rewards = {key: goals_rewards[key]
             for key in goals_rewards if key not in rem_list}
            goals_ratios = {key: goals_ratios[key]
             for key in goals_ratios if key not in rem_list}

    #gw is a placeholder so we can update goals_rewards dict by dividing by the max value 
    gw={}
    if range_adapt=='norm':
        for key,value in goals_rewards.items():
            gw[key]=value/max(goals_rewards.values())
        for key,value in goals_rewards.items():
            goals_rewards[key]=gw[key]
            
    num_goals=len(goals_rewards.keys())

    
    # determine whether SR , PR or Hybrid used
    if sr_or_pr=='sr':
        current_representation=sr_mat
        div_num_goals='no'
        
    elif sr_or_pr=='pr':
        current_representation=pr_mat
        div_num_goals='yes'
    
    # compute action values if for either SR or PR alone
    if sr_or_pr=='sr' or sr_or_pr=='pr':
        action_value=[0,0,0,0]
        experience=0
        reward_info=[0,0,0,0]
        counter=0
        for goal,reward in goals_rewards.items():

            if div_num_goals=='yes':
                reward=reward/num_goals
            #neglect rare transitions
            if neglect_rare_transition=='neg_rare' and neglect_reward=='':
                if (current_representation[action,state_map[goal]])>(current_representation[int(np.abs(action-1)),state_map[goal]]):
                    if goals_ratios[goal][1]==0:
                        action_value[counter]=0
                        experience+=goals_ratios[goal][0]
                        reward_info[counter]=reward
                        counter+=1

                    else:
                        action_value[counter]=(goals_ratios[goal][0]/goals_ratios[goal][1])
                        experience+=goals_ratios[goal][0]
                        reward_info[counter]=reward
                        counter+=1

            elif neglect_rare_transition=='neg_rare' and neglect_reward=='neg_rwd':
                if (current_representation[action,state_map[goal]])>(current_representation[int(np.abs(action-1)),state_map[goal]]):
                    if goals_ratios[goal][1]==0:
                        action_value[counter]=0
                        experience+=goals_ratios[goal][0]
                        reward_info[counter]=reward
                        counter+=1

                    else:
                        action_value[counter]=(goals_ratios[goal][0]/goals_ratios[goal][1])
                        experience+=goals_ratios[goal][0]
                        reward_info[counter]=1
                        counter+=1

                    
            #consider rare transitions
            elif neglect_rare_transition=='' and neglect_reward=='neg_rwd':
                if goals_ratios[goal][1]==0:
                        action_value+=0
                else:
                    action_value[counter]=(goals_ratios[goal][0]/goals_ratios[goal][1])
                    experience+=goals_ratios[goal][0]
                    reward_info[counter]=1
                    counter+=1

            elif neglect_rare_transition=='' and neglect_reward=='':
                if goals_ratios[goal][1]==0:
                        action_value+=0
                else:
                    action_value[counter]=(goals_ratios[goal][0]/goals_ratios[goal][1])
                    experience+=goals_ratios[goal][0]
                    reward_info[counter]=reward
                    counter+=1

        
    ## compute action values if for hybrid agent
    elif sr_or_pr=='both':
        action_value=0
        experience=0
        for goal,reward in goals_rewards.items():
            sr_probability=0
            pr_probability=0
            
            #neglect rare transitions
            if neglect_rare_transition=='neg_rare' and neglect_reward=='':
                if (sr_mat[action,state_map[goal]])>(sr_mat[int(np.abs(action-1)),state_map[goal]]):
                    sr_probability=sr_matrix[action,state_map[goal]]
                    experience+=goals_ratios[goal][0]
        
                if (pr_mat[action,state_map[goal]])>(pr_mat[int(np.abs(action-1)),state_map[goal]]):
                    pr_probability=pr_matrix[action,state_map[goal]]
                    experience+=goals_ratios[goal][0]
           
        
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=reward*combined_probability
                experience+=goals_ratios[goal][0]
                    
            elif neglect_rare_transition=='neg_rare' and neglect_reward=='neg_rwd':
                if (sr_mat[action,state_map[goal]])>(sr_mat[int(np.abs(action-1)),state_map[goal]]):
                    sr_probability=sr_matrix[action,state_map[goal]]
                    experience+=goals_ratios[goal][0]
                    
                if (pr_mat[action,state_map[goal]])>(pr_mat[int(np.abs(action-1)),state_map[goal]]):
                    pr_probability=pr_matrix[action,state_map[goal]]
                    experience+=goals_ratios[goal][0]
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=1*combined_probability
                experience+=goals_ratios[goal][0]
                    
            #consider rare transitions
            elif neglect_rare_transition=='' and neglect_reward=='':
                sr_probability=sr_matrix[action,state_map[goal]]
                pr_probability=pr_matrix[action,state_map[goal]]
                
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=reward*combined_probability
                experience+=goals_ratios[goal][0]
            
            elif neglect_rare_transition=='' and neglect_reward=='neg_rwd':
                sr_probability=sr_matrix[action,state_map[goal]]
                pr_probability=pr_matrix[action,state_map[goal]]
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=1*combined_probability
                experience+=goals_ratios[goal][0]
    

    experience=experience/num_goals
    
    #action 1 is coded as negative, action 0 is coded as positive                  
    if action==1:
        action_value=action_value
        experience=experience
    if round_av==1:
        action_value=[round(x,2) for x in action_value]
        
#     if pr_comp=='average':
#         action_value=action_value/num_goals
    return action_value,reward_info
        

In [84]:
def calc_action_values(action=0,goals_reward={},goals_ratio={},sr_matrix=[],pr_matrix=[],state_map={},
                       one_or_many='many',one_type='rwd',sr_or_pr='sr',neglect_reward='neg_rwd',
                      neglect_rare_transition='neg_rare',
                       inverse_prior_confidence=0,sr_pr_weighting=1,range_adapt='norm',nr_t=0.1,round_av=1,pr_comp='average'):
    
    import pandas as pd
    import numpy as np
    
    #copy dictionaries to work on the copies only here
    goals_ratios=goals_ratio.copy()
    goals_rewards=goals_reward.copy()
    
    #load SR and PR matrices
    pr_mat=np.load(pr_matrix)
    pr_mat=np.transpose(pr_mat)
    sr_mat=np.load(sr_matrix)

    # determine whether SR , PR or Hybrid used
    if sr_or_pr=='sr':
        current_representation=sr_mat
    elif sr_or_pr=='pr':
        current_representation=pr_mat    
            
    
    #get action name
    if action==0:
        action_name='trident'
    elif action==1:
        action_name='planet'
    else:
        print('wrong input for action -- can only be 0 or 1!')
        return
    
    #remove goals if Neglect_Reward
    goals_removed=[]
    
    if neglect_reward=='neg_rwd':
        for key,value in goals_rewards.items():
            if value<nrt*max(goals_rewards.values()):
                goals_removed.append(key)
        for goal_r in goals_removed:
            goals_rewards.pop(goal_r)
            goals_ratios.pop(goal_r)
    
    max_goal=max(goals_rewards, key=goals_rewards.get)
    max_prob=max(goals_ratios, key=goals_ratios.get)
    goals_evs={}
    for key,value in goals_rewards.items():
        goals_evs[key]=current_representation[action,state_map[key]]*value
    
    max_ev=max(goals_evs, key=goals_evs.get)
    #check if agent only selects one goal to focus on
    if one_or_many=='one':
        #only retain the highest-reward goal
        if one_type=='rwd':
            rem_list=[k for k in goals_rewards.keys() if k != max_goal]
            goals_rewards = {key: goals_rewards[key]
             for key in goals_rewards if key not in rem_list}
            goals_ratios = {key: goals_ratios[key]
             for key in goals_ratios if key not in rem_list}
            
        #only retain the highest-probability reward
        elif one_type=='prob':
            rem_list=[k for k in goals_ratios.keys() if k != max_prob]
            goals_rewards = {key: goals_rewards[key]
             for key in goals_rewards if key not in rem_list}
            goals_ratios = {key: goals_ratios[key]
             for key in goals_ratios if key not in rem_list}
        
        elif one_type=='ev':
            rem_list=[k for k in goals_ratios.keys() if k != max_ev]
            goals_rewards = {key: goals_rewards[key]
             for key in goals_rewards if key not in rem_list}
            goals_ratios = {key: goals_ratios[key]
             for key in goals_ratios if key not in rem_list}

    #gw is a placeholder so we can update goals_rewards dict by dividing by the max value 
    gw={}
    if range_adapt=='norm':
        for key,value in goals_rewards.items():
            gw[key]=value/max(goals_rewards.values())
        for key,value in goals_rewards.items():
            goals_rewards[key]=gw[key]
            
    num_goals=len(goals_rewards.keys())

    
    # determine whether SR , PR or Hybrid used
    if sr_or_pr=='sr':
        current_representation=sr_mat
        
    elif sr_or_pr=='pr':
        current_representation=pr_mat
    
    # compute action values if for either SR or PR alone
    if sr_or_pr=='sr' or sr_or_pr=='pr':
        action_value=0
        experience=0
        
        for goal,reward in goals_rewards.items():
            
            #neglect rare transitions
            if neglect_rare_transition=='neg_rare' and neglect_reward=='':
                if (current_representation[action,state_map[goal]])>(current_representation[int(np.abs(action-1)),state_map[goal]]):
                    if goals_ratios[goal][1]==0:
                        action_value+=0
                    else:
                        action_value+=(goals_ratios[goal][0]/goals_ratios[goal][1])*reward
                        experience+=goals_ratios[goal][0]

            elif neglect_rare_transition=='neg_rare' and neglect_reward=='neg_rwd':
                if (current_representation[action,state_map[goal]])>(current_representation[int(np.abs(action-1)),state_map[goal]]):
                    if goals_ratios[goal][1]==0:
                        action_value+=0
                    else:
                        action_value+=(goals_ratios[goal][0]/goals_ratios[goal][1])*1
                        experience+=goals_ratios[goal][0]
                    
            #consider rare transitions
            elif neglect_rare_transition=='' and neglect_reward=='neg_rwd':
                if goals_ratios[goal][1]==0:
                        action_value+=0
                else:
                    action_value+=(goals_ratios[goal][0]/goals_ratios[goal][1])*1
                    experience+=goals_ratios[goal][0]
            elif neglect_rare_transition=='' and neglect_reward=='':
                if goals_ratios[goal][1]==0:
                        action_value+=0
                else:
                    action_value+=(goals_ratios[goal][0]/goals_ratios[goal][1])*reward
                    experience+=goals_ratios[goal][0]
        
    ## compute action values if for hybrid agent
    elif sr_or_pr=='both':
        action_value=0
        experience=0
        for goal,reward in goals_rewards.items():
            sr_probability=0
            pr_probability=0
            
            #neglect rare transitions
            if neglect_rare_transition=='neg_rare' and neglect_reward=='':
                if (sr_mat[action,state_map[goal]])>(sr_mat[int(np.abs(action-1)),state_map[goal]]):
                    sr_probability=sr_matrix[action,state_map[goal]]
                    experience+=goals_ratios[goal][0]
        
                if (pr_mat[action,state_map[goal]])>(pr_mat[int(np.abs(action-1)),state_map[goal]]):
                    pr_probability=pr_matrix[action,state_map[goal]]
                    experience+=goals_ratios[goal][0]
           
        
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=reward*combined_probability
                experience+=goals_ratios[goal][0]
                    
            elif neglect_rare_transition=='neg_rare' and neglect_reward=='neg_rwd':
                if (sr_mat[action,state_map[goal]])>(sr_mat[int(np.abs(action-1)),state_map[goal]]):
                    sr_probability=sr_matrix[action,state_map[goal]]
                    experience+=goals_ratios[goal][0]
                    
                if (pr_mat[action,state_map[goal]])>(pr_mat[int(np.abs(action-1)),state_map[goal]]):
                    pr_probability=pr_matrix[action,state_map[goal]]
                    experience+=goals_ratios[goal][0]
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=1*combined_probability
                experience+=goals_ratios[goal][0]
                    
            #consider rare transitions
            elif neglect_rare_transition=='' and neglect_reward=='':
                sr_probability=sr_matrix[action,state_map[goal]]
                pr_probability=pr_matrix[action,state_map[goal]]
                
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=reward*combined_probability
                experience+=goals_ratios[goal][0]
            
            elif neglect_rare_transition=='' and neglect_reward=='neg_rwd':
                sr_probability=sr_matrix[action,state_map[goal]]
                pr_probability=pr_matrix[action,state_map[goal]]
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=1*combined_probability
                experience+=goals_ratios[goal][0]
    

    experience=experience/num_goals
                    
    #action 1 is coded as negative, action 0 is coded as positive                  
    if action==1:
        action_value=action_value*-1
        experience=experience*-1
    if round_av==1:
        action_value=round(action_value,2)
    if pr_comp=='average':
        action_value=action_value/num_goals
        
    return action_value
        

In [85]:
def calc_relative_frequencies(action=0,goals_rewards={},goals_ratios={},sr_matrix=[],pr_matrix=[],state_map={},
                       one_or_many='many',one_type='rwd',sr_or_pr='sr',neglect_reward=1,
                      neglect_rare_transition=1,inverse_prior_confidence=0,sr_pr_weighting=1):
    
    import pandas as pd
    import numpy as np
    
    
    #get action name
    if action==0:
        action_name='trident'
    elif action==1:
        action_name='planet'
    else:
        print('wrong input for action -- can only be 0 or 1!')
        return
    
    #remove goals if Neglect_Reward
    goals_removed=[]
    
    if neglect_reward==1:
        for key,value in goals_rewards.items():
            if value<0.1*max(goals_rewards.values()):
                goals_removed.append(key)
        for goal_r in goals_removed:
            goals_reward.pop(goal_r)
            goals_ratios.pop(goal_r)
    
    max_goal=max(goals_rewards, key=goals_rewards.get)
    max_prob=max(goals_ratios, key=goals_ratios.get)
    
    #check if agent only selects one goal to focus on
    if one_or_many=='one':
        #only retain the highest-reward goal
        if one_type=='rwd':
            rem_list=[k for k in goals_rewards.keys() if k != max_goal]
            goals_rewards = {key: goals_rewards[key]
             for key in goals_rewards if key not in rem_list}
            goals_ratios = {key: goals_ratios[key]
             for key in goals_ratios if key not in rem_list}
            
        #only retain the highest-probability reward
        elif one_type=='prob':
            rem_list=[k for k in goals_ratios.keys() if k != max_prob]
            goals_rewards = {key: goals_rewards[key]
             for key in goals_reward if key not in rem_list}
            goals_ratios = {key: goals_ratios[key]
             for key in goals_ratios if key not in rem_list}
    
    #load SR and PR matrices
    pr_mat=np.load(pr_matrix)
    sr_mat=np.load(sr_matrix)
    
    # determine whether SR , PR or Hybrid used
    if sr_or_pr=='sr':
        current_representation=sr_mat
        
    elif sr_or_pr=='pr':
        current_representation=np.transpose(pr_mat)
    
    # compute action values if for either SR or PR alone
    if sr_or_pr=='sr' or sr_or_pr=='pr':
        action_value=0
        
        for goal,reward in goals_rewards.items():
            
            #neglect rare transitions
            if neglect_rare_transition==1 and neglect_reward==0:
                if (current_representation[action,state_map[goal]])>(current_representation[int(np.abs(action-1)),state_map[goal]]):
                    action_value+=(goals_ratios[goal][0]/(goals_ratios[goal][1]+inverse_prior_confidence))*reward
            elif neglect_rare_transition==1 and neglect_reward==1:
                if (current_representation[action,state_map[goal]])>(current_representation[int(np.abs(action-1)),state_map[goal]]):
                    action_value+=(goals_ratios[goal][0]/(goals_ratios[goal][1]+inverse_prior_confidence))*1
            
            #consider rare transitions
            elif neglect_rare_transition==0 and neglect_reward==1:
                action_value+=(goals_ratios[goal][0]/(goals_ratios[goal][1]+inverse_prior_confidence))*1
                
            elif neglect_rare_transition==0 and neglect_reward==0:
                action_value+=(goals_ratios[goal][0]/(goals_ratios[goal][1]+inverse_prior_confidence))*reward
            
        
    ## compute action values if for hybrid agent
    elif sr_or_pr=='both':
        action_value=0
        
        for goal,reward in goals_rewards.items():
            sr_probability=0
            pr_probability=0
            
            #neglect rare transitions
            if neglect_rare_transition==1 and neglect_reward==0:
                if (sr_mat[action,state_map[goal]])>(sr_mat[int(np.abs(action-1)),state_map[goal]]):
                    sr_probability=(goals_ratios[goal][0]/(goals_ratios[goal][1]+inverse_prior_confidence))
                    
                if (pr_mat[action,state_map[goal]])>(pr_mat[int(np.abs(action-1)),state_map[goal]]):
                    pr_probability=(goals_ratios[goal][2]/(goals_ratios[goal][3]+inverse_prior_confidence))
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=reward*combined_probability
                    
            elif neglect_rare_transition==1 and neglect_reward==1:
                if (sr_mat[action,state_map[goal]])>(sr_mat[int(np.abs(action-1)),state_map[goal]]):
                    sr_probability=(goals_ratios[goal][0]/(goals_ratios[goal][1]+inverse_prior_confidence))
                    
                if (pr_mat[action,state_map[goal]])>(pr_mat[int(np.abs(action-1)),state_map[goal]]):
                    pr_probability=(goals_ratios[goal][2]/(goals_ratios[goal][3]+inverse_prior_confidence))
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=1*combined_probability
                    
            #consider rare transitions
            elif neglect_rare_transition==0 and neglect_reward==0:
                sr_probability=(goals_ratios[goal][0]/(goals_ratios[goal][1]+inverse_prior_confidence))    
                pr_probability=(goals_ratios[goal][2]/(goals_ratios[goal][3]+inverse_prior_confidence))
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=reward*combined_probability
            
            elif neglect_rare_transition==0 and neglect_reward==1:
                sr_probability=(goals_ratios[goal][0]/(goals_ratios[goal][1]+inverse_prior_confidence))    
                pr_probability=(goals_ratios[goal][2]/(goals_ratios[goal][3]+inverse_prior_confidence))
                
                combined_probability = sr_pr_weighting*pr_probability+(1-sr_pr_weighting)*sr_probability
                action_value+=1*combined_probability
    
        

                
                    
    #action 1 is coded as negative, action 0 is coded as positive                  

    return pr_probability,sr_probability
        

# Get the relative frequencies for SR and PR learned during training

In [86]:
def get_relative_frequencies(csv_file):
    import pandas as pd
    import numpy as np
    
    df=pd.read_csv(csv_file)
    starting_states=df.s1_image.dropna().unique()
    immediate_states=df.s2_image.dropna().unique()
    distal_states=df.s3_image.dropna().unique()
    
    ratios_dictoinary={}
    for im in immediate_states:
        for st in starting_states:
            ratios_dictoinary['{}_{}'.format(st[:-4],im[:-4])]=len(df[(df.s1_image.str.contains(st,na=False)) & (df.s2_image.str.contains(im,na=False))])
    
    for im in distal_states:
        for st in starting_states:
            ratios_dictoinary['{}_{}'.format(st[:-4],im[:-4])]=len(df[(df.s1_image.str.contains(st,na=False)) & (df.s3_image.str.contains(im,na=False))])
            
    return ratios_dictoinary        

# Retrieve specific relative frequencies from dictionary of all relative frequencies

In [87]:
def get_specific_ratio(ratio_dict,action_name,goals,sr_or_pr='sr'):
    specific_ratio_dict={}
    immediates=['tree','bell','fox','watch']
    distals=['compass','houses','train','thermometer','microphone','snorkel','tophat','north']
    for goal in goals:
        total_denom_sr=0
        goal_nom_sr=0
        total_denom_pr=0
        goal_nom_pr=0
        
        # Compute SR and PR relative frequencies experienced during learning
        if goal in immediates:
            for imm in immediates:
                total_denom_sr+=ratio_dict['{}_{}'.format(action_name,imm)]
                if imm==goal:
                    goal_nom_sr=ratio_dict['{}_{}'.format(action_name,imm)]
        elif goal in distals:
            for dis in distals:
                total_denom_sr+=ratio_dict['{}_{}'.format(action_name,dis)]
                if dis==goal:
                    goal_nom_sr=ratio_dict['{}_{}'.format(action_name,dis)]
        for action_name_iter in ['trident','planet']:
            total_denom_pr+=ratio_dict['{}_{}'.format(action_name_iter,goal)]    
        goal_nom_pr=ratio_dict['{}_{}'.format(action_name,goal)]
        #sr only
        if sr_or_pr=='sr':
            specific_ratio_dict[goal]=[goal_nom_sr,total_denom_sr]

        #pr only
        elif sr_or_pr=='pr':
            specific_ratio_dict[goal]=[goal_nom_pr,total_denom_pr]

        #hybrid
        elif sr_or_pr=='both':
            specific_ratio_dict[goal]=[goal_nom_sr,total_denom_sr,goal_nom_pr,total_denom_pr]

    return specific_ratio_dict

# Data Study 1, get action values

In [105]:
neg_rwd_thresh=0.0000001
import os
import pandas as pd

import warnings

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
subs = [x for x in os.listdir(os.curdir) if x.startswith('5') or x.startswith('6')]

# Query type number 1 -- vanilla PR vs. vanilla SR, latter has EV of 0 
PRvsGuess={2:[['fox','houses','train'],{'fox':138,'houses':215,'train':168}],
          11:[['fox','houses','train'],{'fox':138,'houses':215,'train':168}],
          13:[['fox','houses','train'],{'fox':138,'houses':215,'train':168}],
          15:[['fox','houses','train'],{'fox':138,'houses':215,'train':168}],
          1:[['fox','houses','north'],{'fox':156,'houses':208,'north':137}],
          3:[['fox','houses','north'],{'fox':156,'houses':208,'north':137}],
          4:[['fox','houses','north'],{'fox':156,'houses':208,'north':137}],
          5:[['fox','houses','north'],{'fox':156,'houses':208,'north':137}]}


# check if participants have a bias for distal rewards
DistalBias={17:[['tree','north'],{'tree':11,'north':130}],
18:[['fox','houses'],{'fox':11,'houses':130}],
21:[['watch','compass'],{'watch':50,'compass':4}],
22:[['bell','snorkel'],{'bell':50,'snorkel':4}],
25:[['tree','train'],{'tree':48,'train':4}],
26:[['fox','thermometer'],{'fox':48,'thermometer':4}],
29:[['bell','compass','snorkel','watch'],{'bell':10,'compass':126,'snorkel':1,'watch':1}],
30:[['bell','compass','snorkel','watch'],{'watch':10,'snorkel':126,'compass':1,'bell':1}]
}

# reward revaluation queries
RewardRevaluation={1:[['houses'],{'houses':100}],
                   2:[['train'],{'train':100}],
                   3:[['snorkel'],{'snorkel':100}],
                   4:[['tophat'],{'tophat':100}]}

state_dictionary={
#starting state
'start':0,
#second stage
'trident':2,'planet':3,    
'bell':4,'tree':5,'watch':6,'fox':7,  
'houses':8,'compass':9,'train':10,'thermometer':11,'microphone':12,'tophat':13,'north':14,'snorkel':15}

sr_matrix='SR_matrix.npy'
pr_matrix='PR_matrix.npy'
rr_key='key_resp_33.keys'

one_many=['many','one']
one_type=['rwd','prob','ev']
neglect_rwd=['','neg_rwd']
neglect_rare_transition=['','neg_rare']
round_ev=[0,1]
normalize_max=['','norm']

all_dfs={}
counter=0

for sub in subs:
    print(sub)
    df=pd.read_csv(sub)
    rd=get_relative_frequencies(sub)
    df_queries=df[df['trial_num'].notnull() & df['PRG_vs_Guessing.thisIndex'].notnull()]
    df_queries=df_queries[df_queries['trial_num'].isin(list(PRvsGuess.keys()))]
    df_queries=df_queries.reset_index(drop=True)
    df_queries['sub_num']=counter

    df_queries2=df[df['trial_num'].notnull() & df['PRG_vs_Guessing.thisIndex'].notnull()]
    df_queries2=df_queries2[df_queries2['trial_num'].isin(list(DistalBias.keys()))]
    df_queries2=df_queries2.reset_index(drop=True)
    df_queries2['sub_num']=counter


    df_queries3=df[df['trial_num'].notnull()]
    df_queries3=df_queries3[df_queries3['reward_reval_real.thisTrialN'].isin([int(x)-1 for x in list(RewardRevaluation.keys())])]
    df_queries3=df_queries3.reset_index(drop=True)
    df_queries3['sub_num']=counter

    for o_m in one_many:
        for n_rwd in neglect_rwd:
                for n_rare in neglect_rare_transition:
                    for norm in normalize_max:

                        if o_m=='one':
                            for one_t in one_type:

                                ratios_goals_pr={}
                                ratios_goals_sr={}
                                goal_cols=['r1','r2','r4']

                                pr_ev_diff=[]
                                sr_ev_diff=[]
                                pr_a1p=[]
                                pr_a0p=[]
                                pr_a1r=[]
                                pr_a0r=[]

                                sr_a1p=[]
                                sr_a0p=[]
                                sr_a1r=[]
                                sr_a0r=[]

                                choices=[0 if x==9 else x for x in df_queries['key_resp_25.keys']]
                                df_queries['choices']=choices


                                #tn is trial number
                                for tn in df_queries['trial_num']:

                                    rewards=PRvsGuess[tn][1]
                                    
                                    ratios_action1_pr=get_specific_ratio(rd,'planet',PRvsGuess[tn][0],'pr')
                                    ratios_action0_pr=get_specific_ratio(rd,'trident',PRvsGuess[tn][0],'pr')
                                    ratios_action1_sr=get_specific_ratio(rd,'planet',PRvsGuess[tn][0],'sr')
                                    ratios_action0_sr=get_specific_ratio(rd,'trident',PRvsGuess[tn][0],'sr')





                                    action1_sr_probabilities,action1_sr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_sr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                    sr_a1p.append(action1_sr_probabilities)
                                    sr_a1r.append(action1_sr_rewards)
                                    action0_sr_probabilities,action0_sr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_sr,sr_matrix=sr_matrix,
                                                           pr_matrix=pr_matrix,state_map=state_dictionary,
                                                   one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                                  neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                    sr_a0p.append(action0_sr_probabilities)
                                    sr_a0r.append(action0_sr_rewards)
                                    action1_pr_probabilities,action1_pr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_pr,sr_matrix=sr_matrix,
                                                           pr_matrix=pr_matrix,state_map=state_dictionary,
                                                   one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                                  neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                    pr_a1p.append(action1_pr_probabilities)
                                    pr_a1r.append(action1_pr_rewards)

                                    action0_pr_probabilities,action0_pr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_pr,sr_matrix=sr_matrix,
                                                           pr_matrix=pr_matrix,state_map=state_dictionary,
                                                   one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                                  neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)


                                    pr_a0p.append(action0_pr_probabilities)
                                    pr_a0r.append(action0_pr_rewards)
                                for i in range(4):
                                    df_queries['pr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1p[j][i] for j in range(8)]
                                    df_queries['pr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0p[j][i] for j in range(8)]
                                    df_queries['pr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1r[j][i] for j in range(8)]
                                    df_queries['pr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0r[j][i] for j in range(8)]

                                    df_queries['sr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1p[j][i] for j in range(8)]
                                    df_queries['sr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0p[j][i] for j in range(8)]
                                    df_queries['sr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1r[j][i] for j in range(8)]
                                    df_queries['sr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0r[j][i] for j in range(8)]






                                ratios_goals_pr={}
                                ratios_goals_sr={}
                                goal_cols=['r1','r2','r4']
                                pr_ev_diff=[]
                                sr_ev_diff=[]
                                pr_a1p=[]
                                pr_a0p=[]
                                pr_a1r=[]
                                pr_a0r=[]

                                sr_a1p=[]
                                sr_a0p=[]
                                sr_a1r=[]
                                sr_a0r=[]

                                choices=[0 if x==9 else x for x in df_queries2['key_resp_25.keys']]
                                df_queries2['choices']=choices
                                #tn is trial number
                                for tn in df_queries2['trial_num']:


                                    rewards=DistalBias[tn][1]
                                    ratios_action1_pr=get_specific_ratio(rd,'planet',DistalBias[tn][0],'pr')
                                    ratios_action0_pr=get_specific_ratio(rd,'trident',DistalBias[tn][0],'pr')
                                    ratios_action1_sr=get_specific_ratio(rd,'planet',DistalBias[tn][0],'sr')
                                    ratios_action0_sr=get_specific_ratio(rd,'trident',DistalBias[tn][0],'sr')
                                    action1_sr_probabilities,action1_sr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_sr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                    sr_a1p.append(action1_sr_probabilities)
                                    sr_a1r.append(action1_sr_rewards)
                                    action0_sr_probabilities,action0_sr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_sr,sr_matrix=sr_matrix,
                                                           pr_matrix=pr_matrix,state_map=state_dictionary,
                                                   one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                                  neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                    sr_a0p.append(action0_sr_probabilities)
                                    sr_a0r.append(action0_sr_rewards)
                                    action1_pr_probabilities,action1_pr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_pr,sr_matrix=sr_matrix,
                                                           pr_matrix=pr_matrix,state_map=state_dictionary,
                                                   one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                                  neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                    pr_a1p.append(action1_pr_probabilities)
                                    pr_a1r.append(action1_pr_rewards)

                                    action0_pr_probabilities,action0_pr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_pr,sr_matrix=sr_matrix,
                                                           pr_matrix=pr_matrix,state_map=state_dictionary,
                                                   one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                                  neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)


                                    pr_a0p.append(action0_pr_probabilities)
                                    pr_a0r.append(action0_pr_rewards)
                                for i in range(4):
                                    df_queries2['pr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1p[j][i] for j in range(8)]
                                    df_queries2['pr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0p[j][i] for j in range(8)]
                                    df_queries2['pr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1r[j][i] for j in range(8)]
                                    df_queries2['pr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0r[j][i] for j in range(8)]

                                    df_queries2['sr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1p[j][i] for j in range(8)]
                                    df_queries2['sr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0p[j][i] for j in range(8)]
                                    df_queries2['sr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1r[j][i] for j in range(8)]
                                    df_queries2['sr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0r[j][i] for j in range(8)]


                                ratios_goals_pr={}
                                ratios_goals_sr={}
                                goal_cols=['r1','r2','r4']
                                pr_ev_diff=[]
                                sr_ev_diff=[]
                                choices=[0 if x==9 else x for x in df_queries3['key_resp_31.keys']]
                                df_queries3['choices']=choices
                                conversion_dict={1:1,2:2,3:3,4:4,5:1,6:2,7:3,8:4}
                                pr_a1p=[]
                                pr_a0p=[]
                                pr_a1r=[]
                                pr_a0r=[]

                                sr_a1p=[]
                                sr_a0p=[]
                                sr_a1r=[]
                                sr_a0r=[]
                                #tn is trial number
                                for tn in df_queries3['trial_num']:
                                    tn=conversion_dict[tn]
                                    rewards=RewardRevaluation[tn][1]
                                    ratios_action1_pr=get_specific_ratio(rd,'planet',RewardRevaluation[tn][0],'pr')
                                    ratios_action0_pr=get_specific_ratio(rd,'trident',RewardRevaluation[tn][0],'pr')
                                    ratios_action1_sr=get_specific_ratio(rd,'planet',RewardRevaluation[tn][0],'sr')
                                    ratios_action0_sr=get_specific_ratio(rd,'trident',RewardRevaluation[tn][0],'sr')


                                    action1_sr_probabilities,action1_sr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_sr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                    sr_a1p.append(action1_sr_probabilities)
                                    sr_a1r.append(action1_sr_rewards)
                                    action0_sr_probabilities,action0_sr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_sr,sr_matrix=sr_matrix,
                                                           pr_matrix=pr_matrix,state_map=state_dictionary,
                                                   one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                                  neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                    sr_a0p.append(action0_sr_probabilities)
                                    sr_a0r.append(action0_sr_rewards)
                                    action1_pr_probabilities,action1_pr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_pr,sr_matrix=sr_matrix,
                                                           pr_matrix=pr_matrix,state_map=state_dictionary,
                                                   one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                                  neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                    pr_a1p.append(action1_pr_probabilities)
                                    pr_a1r.append(action1_pr_rewards)

                                    action0_pr_probabilities,action0_pr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_pr,sr_matrix=sr_matrix,
                                                           pr_matrix=pr_matrix,state_map=state_dictionary,
                                                   one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                                  neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)


                                    pr_a0p.append(action0_pr_probabilities)
                                    pr_a0r.append(action0_pr_rewards)
                                for i in range(4):
                                    df_queries3['pr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1p[j][i] for j in range(4)]
                                    df_queries3['pr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0p[j][i] for j in range(4)]
                                    df_queries3['pr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1r[j][i] for j in range(4)]
                                    df_queries3['pr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0r[j][i] for j in range(4)]

                                    df_queries3['sr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1p[j][i] for j in range(4)]
                                    df_queries3['sr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0p[j][i] for j in range(4)]
                                    df_queries3['sr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1r[j][i] for j in range(4)]
                                    df_queries3['sr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0r[j][i] for j in range(4)]


                        else:

                            one_t='ev'
                            ratios_goals_pr={}
                            ratios_goals_sr={}
                            goal_cols=['r1','r2','r4']

                            pr_ev_diff=[]
                            sr_ev_diff=[]

                            choices=[0 if x==9 else x for x in df_queries['key_resp_25.keys']]
                            df_queries['choices']=choices
                            pr_a1p=[]
                            pr_a0p=[]
                            pr_a1r=[]
                            pr_a0r=[]
                            
                            sr_a1p=[]
                            sr_a0p=[]
                            sr_a1r=[]
                            sr_a0r=[]
                            #tn is trial number
                            for tn in df_queries['trial_num']:

                                rewards=PRvsGuess[tn][1]
                                ratios_action1_pr=get_specific_ratio(rd,'planet',PRvsGuess[tn][0],'pr')
                                ratios_action0_pr=get_specific_ratio(rd,'trident',PRvsGuess[tn][0],'pr')
                                ratios_action1_sr=get_specific_ratio(rd,'planet',PRvsGuess[tn][0],'sr')
                                ratios_action0_sr=get_specific_ratio(rd,'trident',PRvsGuess[tn][0],'sr')





                                action1_sr_probabilities,action1_sr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_sr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                action1_sr_probabilities=[0 for x in action1_sr_probabilities]
                                sr_a1p.append(action1_sr_probabilities)
                                sr_a1r.append(action1_sr_rewards)
                                action0_sr_probabilities,action0_sr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_sr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                action0_sr_probabilities=[0 for x in action0_sr_probabilities]
                                sr_a0p.append(action0_sr_probabilities)
                                sr_a0r.append(action0_sr_rewards)
                                action1_pr_probabilities,action1_pr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_pr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                pr_a1p.append(action1_pr_probabilities)
                                pr_a1r.append(action1_pr_rewards)
                                
                                action0_pr_probabilities,action0_pr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_pr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)

                         
                                pr_a0p.append(action0_pr_probabilities)
                                pr_a0r.append(action0_pr_rewards)

                            for i in range(4):
                                df_queries['pr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1p[j][i] for j in range(8)]
                                df_queries['pr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0p[j][i] for j in range(8)]
                                df_queries['pr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1r[j][i] for j in range(8)]
                                df_queries['pr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0r[j][i] for j in range(8)]

                                df_queries['sr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1p[j][i] for j in range(8)]
                                df_queries['sr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0p[j][i] for j in range(8)]
                                df_queries['sr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1r[j][i] for j in range(8)]
                                df_queries['sr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0r[j][i] for j in range(8)]




                            ratios_goals_pr={}
                            ratios_goals_sr={}
                            goal_cols=['r1','r2','r4']
                            pr_ev_diff=[]
                            sr_ev_diff=[]

                            choices=[0 if x==9 else x for x in df_queries2['key_resp_25.keys']]
                            df_queries2['choices']=choices
                            pr_a1p=[]
                            pr_a0p=[]
                            pr_a1r=[]
                            pr_a0r=[]
                            
                            sr_a1p=[]
                            sr_a0p=[]
                            sr_a1r=[]
                            sr_a0r=[]
                            #tn is trial number
                            for tn in df_queries2['trial_num']:

                                rewards=DistalBias[tn][1]
                                ratios_action1_pr=get_specific_ratio(rd,'planet',DistalBias[tn][0],'pr')
                                ratios_action0_pr=get_specific_ratio(rd,'trident',DistalBias[tn][0],'pr')
                                ratios_action1_sr=get_specific_ratio(rd,'planet',DistalBias[tn][0],'sr')
                                ratios_action0_sr=get_specific_ratio(rd,'trident',DistalBias[tn][0],'sr')

                                action1_sr_probabilities,action1_sr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_sr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                sr_a1p.append(action1_sr_probabilities)
                                sr_a1r.append(action1_sr_rewards)
                                action0_sr_probabilities,action0_sr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_sr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                sr_a0p.append(action0_sr_probabilities)
                                sr_a0r.append(action0_sr_rewards)
                                action1_pr_probabilities,action1_pr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_pr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                pr_a1p.append(action1_pr_probabilities)
                                pr_a1r.append(action1_pr_rewards)
                                
                                action0_pr_probabilities,action0_pr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_pr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)

                         
                                pr_a0p.append(action0_pr_probabilities)
                                pr_a0r.append(action0_pr_rewards)
                            for i in range(4):
                                df_queries2['pr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1p[j][i] for j in range(8)]
                                df_queries2['pr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0p[j][i] for j in range(8)]
                                df_queries2['pr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1r[j][i] for j in range(8)]
                                df_queries2['pr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0r[j][i] for j in range(8)]

                                df_queries2['sr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1p[j][i] for j in range(8)]
                                df_queries2['sr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0p[j][i] for j in range(8)]
                                df_queries2['sr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1r[j][i] for j in range(8)]
                                df_queries2['sr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0r[j][i] for j in range(8)]




                            ratios_goals_pr={}
                            ratios_goals_sr={}
                            goal_cols=['r1','r2','r4']
                            pr_ev_diff=[]
                            sr_ev_diff=[]
                            choices=[0 if x==9 else x for x in df_queries3['key_resp_31.keys']]
                            df_queries3['choices']=choices
                            conversion_dict={1:1,2:2,3:3,4:4,5:1,6:2,7:3,8:4}
                            pr_a1p=[]
                            pr_a0p=[]
                            pr_a1r=[]
                            pr_a0r=[]
                            
                            sr_a1p=[]
                            sr_a0p=[]
                            sr_a1r=[]
                            sr_a0r=[]
                            #tn is trial number
                            for tn in df_queries3['trial_num']:
                                tn=conversion_dict[tn]
                                rewards=RewardRevaluation[tn][1]
                                ratios_action1_pr=get_specific_ratio(rd,'planet',RewardRevaluation[tn][0],'pr')
                                ratios_action0_pr=get_specific_ratio(rd,'trident',RewardRevaluation[tn][0],'pr')
                                ratios_action1_sr=get_specific_ratio(rd,'planet',RewardRevaluation[tn][0],'sr')
                                ratios_action0_sr=get_specific_ratio(rd,'trident',RewardRevaluation[tn][0],'sr')

                                action1_sr_probabilities,action1_sr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_sr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                sr_a1p.append(action1_sr_probabilities)
                                sr_a1r.append(action1_sr_rewards)
                                action0_sr_probabilities,action0_sr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_sr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='sr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                sr_a0p.append(action0_sr_probabilities)
                                sr_a0r.append(action0_sr_rewards)
                                action1_pr_probabilities,action1_pr_rewards=individual_av_components(action=1,goals_reward=rewards,goals_ratio=ratios_action1_pr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)
                                pr_a1p.append(action1_pr_probabilities)
                                pr_a1r.append(action1_pr_rewards)
                                
                                action0_pr_probabilities,action0_pr_rewards=individual_av_components(action=0,goals_reward=rewards,goals_ratio=ratios_action0_pr,sr_matrix=sr_matrix,
                                                       pr_matrix=pr_matrix,state_map=state_dictionary,
                                               one_or_many=o_m,one_type=one_t,sr_or_pr='pr',neglect_reward=n_rwd,
                                              neglect_rare_transition=n_rare,inverse_prior_confidence=0,sr_pr_weighting=0.5,range_adapt=norm,nr_t=neg_rwd_thresh)

                         
                                pr_a0p.append(action0_pr_probabilities)
                                pr_a0r.append(action0_pr_rewards)
                        
                            for i in range(4):
                                df_queries3['pr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1p[j][i] for j in range(4)]
                                df_queries3['pr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0p[j][i] for j in range(4)]
                                df_queries3['pr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a1r[j][i] for j in range(4)]
                                df_queries3['pr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[pr_a0r[j][i] for j in range(4)]

                                df_queries3['sr_{}_{}_{}_{}_{}_prob{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1p[j][i] for j in range(4)]
                                df_queries3['sr_{}_{}_{}_{}_{}_prob{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0p[j][i] for j in range(4)]
                                df_queries3['sr_{}_{}_{}_{}_{}_rwd{}_action1'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a1r[j][i] for j in range(4)]
                                df_queries3['sr_{}_{}_{}_{}_{}_rwd{}_action0'.format(o_m,n_rwd,n_rare,norm,one_t,i)]=[sr_a0r[j][i] for j in range(4)]




    all_dfs['sub_{}'.format(counter)]=pd.concat([df_queries,df_queries2,df_queries3])
    counter+=1

dall=pd.concat([x for x in all_dfs.values()])
dall=dall.reset_index(drop=True)
good_cols=[x for x in dall.columns if x.startswith('pr_') or x.startswith ('sr_')]
good_cols.append('sub_num')
good_cols.append('choices')
dall=dall[good_cols]
retain_cols=['sub_num','choices']



filtered_df1 = dall.loc[dall['pr_many____ev_rwd0_action1'].isin([46, 52]),:]
filtered_df2 = dall.loc[~dall['pr_many____ev_rwd0_action1'].isin([46, 52]),:]

filtered_df1.to_csv('all_subs_Study1_actionvaluesrounded_PRvsGuessing.csv')
filtered_df2.to_csv('all_subs_Study1_actionvaluesrounded_restofqueries.csv',encoding="utf-8")


61460488fc1bdf4d2c8ce1db_pr_task_fullstudy1_2022-08-24_19h15.49.919.csv
5e57a0020c3c6a14a1624031_pr_task_fullstudy1_2022-08-24_20h29.44.549.csv
5b222aff59f9620001c109cb_pr_task_fullstudy1_2022-08-25_00h45.42.398.csv
60ca1c436511b9fc8ab35615_pr_task_fullstudy1_2022-08-24_21h21.17.861.csv
5e0fac24500f066c547f6376_pr_task_fullstudy1_2022-08-24_14h16.08.280.csv
58404fd8ad98e40001ce915f_pr_task_fullstudy1_2022-08-24_19h18.19.531.csv
5d811e6ab395350018786d78_pr_task_fullstudy1_2022-08-24_21h16.20.244.csv
60951a228692926022c15fbd_pr_task_fullstudy1_2022-08-24_19h15.32.838.csv
62e965d4ab0caa404c95a439_pr_task_fullstudy1_2022-08-24_19h15.35.456.csv
5f07759f859dcb38b8d4c6b5_pr_task_fullstudy1_2022-08-24_21h15.46.754.csv
5f4e97aae4fe380a7bad99c7_pr_task_fullstudy1_2022-08-24_20h07.57.755.csv
5cc7443b836ddc0016fec706_pr_task_fullstudy1_2022-08-24_19h24.23.461.csv
5fd7782dee03dc08d3f3f491_pr_task_fullstudy1_2022-08-25_01h04.37.751.csv
60fd5adad03767cff6dddda8_pr_task_fullstudy1_2022-08-24_19h17.14.

# Run Analysis in R

- Use file model_fitting.R, and then immediately after, model_fitting_rest.R, in directory to run model comparison. Uses's BRM's bridgesampling algorithm to obtain reliable estimates of each model's log likelihood.
- This will need to be done externally in an R terminal
