# Generate simulated data
This notebook simulates data from cognive computaional models: 4-P RL, HRL, and GLM-HMM

To simulate data from meta RL with dyanmic noise, we use the open source [code](https://github.com/jl3676/dynamic_noise_estimation/tree/main/Dynamic_Foraging/code) from [Li et.al. 2024](https://doi.org/10.1016/j.jmp.2024.102842)

In [1]:
import numpy as np
import pandas as pd
import tqdm
import random

from prl.simulate_4prl import simulate_4prl
from hrl.simulate_hrl import simulate_hrl

# 4-P RL Simulation

Simulate data from a 4-parameter reinforcement learning model on two-armed bandit task
 

In [2]:
N_AGENTS = 100
num_trials = 720

train_data_list = []
for a in tqdm.tqdm(range(N_AGENTS)):
    rand_beta = random.uniform(1e-4, 10)
    rand_alpha = random.uniform(1e-4, 1)
    rand_neg_alpha = random.uniform(1e-4, 1)
    rand_stickiness = random.uniform(0, 1)

    parameters = [rand_beta, rand_alpha, rand_neg_alpha, rand_stickiness]
    data = simulate_4prl(parameters, num_trials, 0.8, 15, 2, a)
    train_data_list.append(data)

train_df = pd.concat(train_data_list)
train_df.head(3)

100%|██████████| 100/100 [00:01<00:00, 62.54it/s]


Unnamed: 0,agentid,actions,correct_actions,rewards,isswitch,iscorrectaction,trials,rpe_history,unchosen_rpe_history,alpha,beta,neg_alpha,stickiness
0,0,1,1,1,0,1,0,0.5,-0.5,0.79334,8.908567,0.779529,0.975354
1,0,1,1,1,0,1,1,0.10333,-0.10333,0.79334,8.908567,0.779529,0.975354
2,0,1,1,1,0,1,2,0.021354,-0.021354,0.79334,8.908567,0.779529,0.975354


In [None]:
# Save the data
train_df.to_csv(f"../data/4prl_{N_AGENTS}a_{num_trials}t.csv")

# HRL simulation

Simulate a hierarchical reinforcement learning model (HRL) with a dynamic decision making task.

In [3]:
N_AGENTS = 100
num_trials = 720

train_data_list = []
for a in tqdm.tqdm(range(N_AGENTS)):
    # simulate N_AGENTS agents with randomly sampled alphas and betas
    rand_alpha = random.uniform(0.4, 0.7)
    rand_beta = random.uniform(1, 10)
    # Fixed stickiness to 0
    parameters = [rand_beta, rand_alpha, 0, 0]
    data = simulate_hrl(parameters, num_trials, [0.1, 0.9], 0.05, 3, a)
    train_data_list.append(data)

train_df = pd.concat(train_data_list)
train_df.head(3)

100%|██████████| 100/100 [00:02<00:00, 38.95it/s]


Unnamed: 0,agentid,correctcue,rewards,isswitch,iscorrectcue,trials,rpe_history,chosen_qv,chosenside,chosencue,correctruleiteration,alpha,stickiness,allstims0,allstims1,allstims2,beta,qv0,qv1,qv2
0,0,1,1,0,1,0,0.666667,0.333333,0,1,0,0.463772,0,1,0,0,2.897685,0.333333,0.333333,0.333333
1,0,1,1,0,1,1,0.357486,0.642514,0,1,1,0.463772,0,0,0,1,2.897685,0.178743,0.642514,0.178743
2,0,1,1,0,1,2,0.191694,0.808306,1,1,2,0.463772,0,0,1,1,2.897685,0.095847,0.808306,0.095847


In [None]:
# Save the data
train_df.to_csv(f"../data/hrl_{N_AGENTS}a_{num_trials}t.csv")