In [None]:
"""
========================================================
Author:  Sevan Harootonian
Affiliation: Princeton University
Date: 2025-08-20
========================================================

Description:
------------
This notebook includes the following for experiment 1,
  - Model simulations to compute ultlity (about 45 min) [will use about 20gb of ram]
  - Model fitting to the data (about 30 seconds)
  - Model simulation using the fitted parameters (100 iteration ~5min)
  - Model fitting for model recovery (100 iteration about 2 hours)
  
========================================================
"""

In [None]:
# model simulation for exp 3 (this wil take about 35 min) about 20gb ram
import pandas as pd
import numpy as np
from functions.mdp_params import create_random_mdp_params,create_true_mdp_params
from functions.mentor import OptimalBayesianMentor, NaiveBayesianMentor, PriorOnlyMentor
from functions.graphWorld import coordToint, intTocoord
from functions.features import get_feature_reward_sum_rmTraj
from frozendict import frozendict
from collections import defaultdict

sim = pd.DataFrame(pd.read_pickle('data/tasksetup/exp3_simtrials.pkl'))
sim['feature_reward'] = [[0,0]]*len(sim)
sim['U_obm'] = [[0,0]]*len(sim)
sim['U_nbm'] = [[0,0]]*len(sim)
sim['U_fr'] = [[0,0]]*len(sim)
sim['U_rand'] = [[0,0]]*len(sim)
sim['U_obm_max'] = [[0,0]]*len(sim)
sim['inf_prob'] = [[0,0]]*len(sim)

def softmax_probs(inversetemp,dict):
    probs = {e:np.exp(inversetemp*val) for e,val in dict.items()}
    norm = sum(probs.values())
    probs = {e: p/norm for e,p in probs.items()}
    return probs

def inference_values(data,keys):
    data = graph_post
    new_dict = defaultdict(float)
    for key_set, value in data.items():
        for key_tuple in key_set:
            new_dict[key_tuple] += value

    for key in keys:
        new_dict[key] = 1 - new_dict[key]

    # normalize
    for key, value in new_dict.items():
        new_dict[key] = new_dict[key]/sum(new_dict.values())  
    return new_dict


for i,row in sim.iterrows():
    traj = tuple(sorted(({j for i in intTocoord(row.connections) for j in i}), key=lambda x: x[1], reverse=True)) # get it to correct format
    goal_values = frozendict(intTocoord(row.goal_values)) # get it to correct format
    
    trial_param = create_true_mdp_params(0,4)
    trial_param['goal_values'] = goal_values

    obm_mentor = OptimalBayesianMentor(trial_param) # create Optimal Bayesian Mentor class
    OBM_advice_utility = obm_mentor.advice_dist([traj,]) # adivce utility based on OBM

    nbm_mentor = NaiveBayesianMentor(trial_param) 
    NBM_advice_utility = nbm_mentor.advice_dist([traj,]) 

    graph_post = obm_mentor.mentee_graph_posterior([traj,])
    inf_prob = inference_values(graph_post,trial_param['connections'])
    
    sim.at[i,'inf_prob'] = coordToint(inf_prob)
    sim.at[i,'U_obm'] = coordToint(OBM_advice_utility)
    sim.at[i,'U_nbm'] = coordToint(NBM_advice_utility)
    sim.at[i,'feature_reward'] = coordToint(get_feature_reward_sum_rmTraj(goal_values,trial_param['connections'],traj))
    
    #norm
    U_obm_norm = sum(sim.U_obm[i].values())
    U_obm_rescale = {key :sim.U_obm[i][key]/ U_obm_norm for key,value in sim.U_obm[i].items()}
    
    U_fr_norm = sum(sim.feature_reward[i].values())
    U_fr_rescale = {key :sim.feature_reward[i][key]/ U_fr_norm for key,value in sim.feature_reward[i].items()}
    
    obm_max_probs = softmax_probs(1000,U_obm_rescale)
    sim.at[i,'U_obm_max'] = {key: obm_max_probs[key]*sim.U_obm[i][key] for key in obm_max_probs}
    
    # feature reward
    fr_probs = softmax_probs(1000,U_fr_rescale)
    sim.at[i,'U_fr'] = {key: fr_probs[key]*sim.U_obm[i][key] for key in fr_probs}

    # random pi
    rand_probs = softmax_probs(0,U_obm_rescale)
    sim.at[i,'U_rand'] = {key: rand_probs[key]*sim.U_obm[i][key] for key in rand_probs}


pd.to_pickle(sim, 'data/sim/exp3/exp3_modelsim.pickle')

In [None]:
import pandas as pd
import numpy as np
from functions.fitting import fitting_choices

preprocessed = pd.read_pickle('data/preprocessed/exp3/preprocessed_exp3.pkl')
models = {
    "Reward": ["feature_reward_sum"],
    "OBM": ["U_obm"],
}
test_data = preprocessed[preprocessed.block=='test'].reset_index(drop=True)
df_fits_exp3 = fitting_choices(test_data, models,exp = 3)
df_fits_exp3.to_pickle('data/preprocessed/exp3/df_fits_exp3.pkl')

100%|██████████| 759/759 [01:18<00:00,  9.71it/s]


In [1]:
import pandas as pd
import numpy as np
from functions.model_comparison import sample_multinomial_logit
from collections import defaultdict
from functions.functions import max_value_keys

In [None]:
test_trials_id = ['test_0','test_1','test_2','test_3','test_4']
simdata = pd.read_pickle('data/sim/exp3_modelsim.pickle')

test_simdata = simdata[simdata.trial_id.isin(test_trials_id)].reset_index(drop=True)
rename_test_simdata_map = {
    "feature_reward": "Reward",
    "U_obm": "OBM",
}
test_simdata.rename(columns=rename_test_simdata_map, inplace=True)

df_fits= pd.read_pickle('data/preprocessed/exp3/df_fits_exp3.pkl')
rename_df_fits_map = {
    'feature_reward_sum': "Reward",
    "U_obm": "OBM",
}
df_fits.rename(columns=rename_df_fits_map,inplace=True)


models = {
    'Reward': ["Reward"],
    "OBM": ["OBM"],
}

posterior_sim = pd.DataFrame({"samples": [],
                              "subjID": [],
                              "model": [],
                              "feature_weights": [],
                              "choice": [],
                              "teaching_score" : [],
                            })
simrows = []

Subjects = df_fits.subjID.unique()
K = df_fits.model.unique()
iteration = 100

for s in range(0,iteration):
    for subj in Subjects:
        subj_fits =  df_fits[df_fits.subjID == subj].reset_index(drop=True)
        for k in K:
            feature_weights = subj_fits[subj_fits.model == k][models[k]].values.flatten()
            for i, row in test_simdata.iterrows():
                feature_value_dict = row[models[k]]
                choice = sample_multinomial_logit(feature_weights,feature_value_dict)
                teaching_score = row.OBM[choice]/ row.OBM[max_value_keys(row.OBM)[0]] 
            
                simrow= {
                    "iteration": s,
                    "subjID": subj,
                    "sim_model": k,
                    'trial_id' : row.trial_id,
                    "feature_weights": feature_weights.tolist(),  
                    "choice": choice,
                    "teaching_score": teaching_score,
                }
                for model_name in models.keys():
                    if model_name != "Level,Reward":
                        simrow[model_name] = row[model_name] 

                simrows.append(simrow)

posterior_sim = pd.DataFrame.from_records(simrows)

posterior_sim.to_pickle('data/sim/exp3/exp3_posterior_sim_100.pkl')

In [None]:
import importlib, functions.fitting
importlib.reload(functions.fitting)
from functions.fitting import model_recovery_fitting

sim_fits = model_recovery_fitting(posterior_sim, models)

sim_fits.to_pickle('data/sim/exp3/exp3_simfits_75.pkl')

iterations: 100%|██████████| 75/75 [1:33:05<00:00, 74.47s/it]
