**Results differ slightly**

In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

In [2]:
def softmax(x):
    exp = np.exp(x)
    return exp / np.sum(exp)

In [3]:
def rescorla_wagner(q_val, epsilon, reward):
    return q_val + epsilon * (reward - q_val)

In [4]:
def reward(a_t, r_t, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth):
    if a_t == 1 and r_t > 0: return rho_rew_app
    if a_t == 1 and r_t < 0: return -rho_pun_app
    
    if a_t == 0 and r_t > 0: return rho_rew_wth
    if a_t == 0 and r_t < 0: return -rho_pun_wth

    return 0

In [5]:
def bias(a, bias_app, bias_wth):
    return bias_app if a == 1 else bias_wth

In [6]:
def log_likelihood(cues, actions, rewards, epsilon, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth, bias_app, bias_wth):
    n_stimuli = len(set(cues))
    n_actions = len(set(actions))

    q_vals = np.zeros((n_stimuli, n_actions))
    w_vals = np.zeros((n_stimuli, n_actions))

    log_likelihood = 0

    for t, a_t in enumerate(actions):
        s_t = cues[t] - 1
        r_t = reward(a_t, rewards[t], rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth)

        probs = softmax(w_vals[s_t])
        log_likelihood += np.log(probs[a_t])

        # Update the Q-values using Rescorla-Wagner, add action bias
        q_vals[s_t, a_t] = rescorla_wagner(
            q_val = q_vals[s_t, a_t],
            epsilon = epsilon,
            reward = r_t
        )

        w_vals[s_t, a_t] = q_vals[s_t, a_t] + bias(a_t, bias_app, bias_wth)
        w_vals[s_t, 1 - a_t] = q_vals[s_t, 1 - a_t] + bias(1 - a_t, bias_app, bias_wth)

    return log_likelihood

In [7]:
data = pd.read_csv("gen_data.csv")

In [8]:
def loss(params, cues, actions, rewards):
    epsilon, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth, bias_app, bias_wth = params
    return -log_likelihood(cues, actions, rewards, epsilon, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth, bias_app, bias_wth)

In [9]:
epsilon_bounds = (0.0000001, 0.99999)
beta_bounds = (0.0001, 9.9999)
bias_bounds = (-0.99999, 0.99999)

In [10]:
min_loss = []

for subject_id in data.ID.unique():
    subject = data[data.ID == subject_id]

    cues = subject.cue.tolist()
    actions = subject.pressed.tolist()
    rewards = subject.outcome.tolist()

    res = minimize(
        fun = loss,
        x0 = [0.5, 5, 5, 5, 5, 0, 0],
        bounds = [epsilon_bounds, beta_bounds, beta_bounds, beta_bounds, beta_bounds, bias_bounds, bias_bounds],
        args = (cues, actions, rewards),
        method = "Nelder-Mead"
    )
    
    print(res.x)
    min_loss.append(res.fun)

[ 0.24028048  1.94550321  2.63023797  3.03945437  1.18688565  0.70708722
 -0.61957934]
[ 0.31112032  3.93078425  1.61406785  2.50512631  2.25464739  0.19501821
 -0.09355719]
[0.28190804 3.08006806 2.49137707 3.50583053 2.00679922 0.95287406
 0.17043185]
[1.98451896e-01 4.32918898e+00 2.07067780e+00 1.87008634e+00
 2.47761686e+00 4.10640116e-03 5.52650077e-03]
[ 2.62592929e-01  3.94045856e+00  2.92705829e+00  3.05948873e+00
  2.22086849e+00  5.18886383e-01 -5.53390371e-04]
[ 0.1706286   4.07407681  2.93859629  3.296823    1.6697872   0.58208497
 -0.35201723]
[ 0.13726235  4.08685257  2.73562809  3.66084878  1.86453297 -0.28576216
 -0.99966522]
[ 0.27818418  3.56588713  2.19045576  2.75430613  1.99274451  0.17910234
 -0.32301122]
[ 0.28869302  3.85631377  3.24173536  3.03229108  1.87838331  0.76571965
 -0.17687139]
[ 0.31291659  2.80966551  2.00913213  2.62868546  2.38778215  0.14744933
 -0.36984791]


In [11]:
np.sum(min_loss) # 2663

2654.852286109379