**Results differ slightly**

In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

In [2]:
def softmax(x):
    exp = np.exp(x)
    return exp / np.sum(exp)

In [3]:
def rescorla_wagner(q_val, epsilon, reward):
    return q_val + epsilon * (reward - q_val)

In [4]:
def reward(a_t, r_t, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth):
    if a_t == 1 and r_t > 0: return rho_rew_app
    if a_t == 1 and r_t < 0: return -rho_pun_app
    
    if a_t == 0 and r_t > 0: return rho_rew_wth
    if a_t == 0 and r_t < 0: return -rho_pun_wth

    return 0

In [5]:
def log_likelihood(cues, actions, rewards, epsilon, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth, bias_app, bias_wth):
    n_stimuli = len(set(cues))
    n_actions = len(set(actions))

    q_vals = np.zeros((n_stimuli, n_actions))

    log_likelihood = 0

    for t, a_t in enumerate(actions):
        s_t = cues[t] - 1
        r_t = reward(a_t, rewards[t], rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth)

        qs = [
            q_vals[s_t, 0] + bias_wth,
            q_vals[s_t, 1] + bias_app
        ]

        probs = softmax(qs)
        log_likelihood += np.log(probs[a_t])

        # Update the Q-values using Rescorla-Wagner
        q_vals[s_t, a_t] = rescorla_wagner(
            q_val = q_vals[s_t, a_t],
            epsilon = epsilon,
            reward = r_t
        )

    return log_likelihood

In [6]:
data = pd.read_csv("gen_data.csv")

In [7]:
def loss(params, cues, actions, rewards):
    epsilon, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth, bias_app, bias_wth = params
    return -log_likelihood(cues, actions, rewards, epsilon, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth, bias_app, bias_wth)

In [8]:
epsilon_bounds = (0.0000001, 0.99999)
beta_bounds = (0.0001, 9.9999)
bias_bounds = (-0.99999, 0.99999)

In [9]:
min_loss = []

for subject_id in data.ID.unique():
    subject = data[data.ID == subject_id]

    cues = subject.cue.tolist()
    actions = subject.pressed.tolist()
    rewards = subject.outcome.tolist()

    res = minimize(
        fun = loss,
        x0 = [0.5, 5, 5, 5, 5, 0, 0],
        bounds = [epsilon_bounds, beta_bounds, beta_bounds, beta_bounds, beta_bounds, bias_bounds, bias_bounds],
        args = (cues, actions, rewards),
        method = "Nelder-Mead"
    )
    
    print(res.x)
    min_loss.append(res.fun)

[ 0.23126032  2.35014527  2.30986205  2.78507054  1.52872593  0.67549142
 -0.31348123]
[ 0.31118584  3.96672608  1.57475076  2.47550941  2.27967553 -0.02567013
 -0.28261337]
[ 0.28249697  3.05124282  2.52912831  3.5406952   1.97720311  0.20598825
 -0.61083067]
[0.23649403 3.58446342 2.54614625 2.35614193 2.03546492 0.8886435
 0.397028  ]
[0.26886781 3.82725318 2.99440204 3.11309162 2.13773637 0.64524068
 0.05388905]
[ 0.16879175  4.1968775   2.81954982  3.17251544  1.76025512  0.79204917
 -0.03500612]
[ 0.14921641  3.78877198  2.98769066  3.93449501  1.63411243  0.85632597
 -0.11195161]
[ 0.2778251   3.50588213  2.24718734  2.80416015  1.94464143 -0.35128919
 -0.90558005]
[ 0.28503391  3.98673635  3.10357915  2.91499872  1.99049676  0.65622433
 -0.15573681]
[0.31116105 2.89384516 1.93251131 2.56168422 2.45152467 0.61237053
 0.16921546]


In [10]:
np.sum(min_loss) # 2663

2655.6331605212113