In [36]:
%load_ext autoreload
%autoreload 2

import numpy as np
from models.td_model import TdModel, calc_values_for_td_model
from model_fit_utils import fit_model

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Test TD-model values look reasonable

In [14]:
stimuli_idxs = np.array([0, 2, 2, 0])
rewards = np.array([-3, 0, -3, -3])

model = calc_values_for_td_model(alpha=0.2, stimuli_idxs=stimuli_idxs, rewards=rewards)

In [15]:
model.values

array([[ 0.  ,  0.  ,  0.  ],
       [-0.6 ,  0.  ,  0.  ],
       [-0.6 ,  0.  ,  0.  ],
       [-0.6 ,  0.  , -0.6 ],
       [-1.08,  0.  , -0.6 ]])

### Test TD Model Fitting recovers learning rate

In [18]:
# create random stimuli and rewards for 100 trials
rng = np.random.default_rng(10)
stimuli_idxs = rng.choice([0, 1, 2], 100)
rewards = rng.choice([-3, 0], 100)

# set ground truth alpha, this is what we'll try to recover
alpha = 0.2

# make a ground truth model
ground_truth_model = calc_values_for_td_model(alpha=alpha, stimuli_idxs=stimuli_idxs, rewards=rewards)

In [38]:
# use values from ground truth model as behavioral response we'll try to fit on
trial_vals = ground_truth_model.values[:-1, :]
behavior = trial_vals[np.arange(len(trial_vals)), stimuli_idxs]

In [43]:
# fit model with initial guess of 0.5
res = fit_model(stimuli_idxs, rewards, behavior, 0.5)

In [42]:
# show that learning rate gets recovered from model fitting (x param)
res

 final_simplex: (array([[0.2       ],
       [0.20007813]]), array([6.50179312e-30, 2.83319564e-08]))
           fun: 6.501793121575534e-30
       message: 'Optimization terminated successfully.'
          nfev: 32
           nit: 16
        status: 0
       success: True
             x: array([0.2])