In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

In [2]:
def softmax(x):
    exp = np.exp(x)
    return exp / np.sum(exp)

In [3]:
def rescorla_wagner(q_val, epsilon_app, epsilon_wth, action, reward):
    if action == 1:
        return q_val + epsilon_app * (reward - q_val)

    return q_val + epsilon_wth * (reward - q_val)

In [4]:
def reward(r_t, rho_rew, rho_pun):
    if r_t > 0: return rho_rew
    if r_t < 0: return -rho_pun
    return 0

In [5]:
def log_likelihood(cues, actions, rewards, epsilon_app, epsilon_wth, rho_rew, rho_pun, bias_app, bias_wth, p):
    n_stimuli = len(set(cues))
    n_actions = len(set(actions))

    q_vals = np.zeros((n_stimuli, n_actions))

    log_likelihood = 0

    for t, a_t in enumerate(actions):
        s_t = cues[t] - 1
        r_t = reward(rewards[t], rho_rew, rho_pun)

        qs = q_vals[s_t] + [ bias_wth, bias_app ]

        max_q = np.max(q_vals[s_t])
        if max_q < 0: qs[0] += p
        if max_q > 0: qs[1] += p

        probs = softmax(qs)
        log_likelihood += np.log(probs[a_t])

        # Update the Q-values using Rescorla-Wagner
        q_vals[s_t, a_t] = rescorla_wagner(
            q_val = q_vals[s_t, a_t],
            epsilon_app = epsilon_app,
            epsilon_wth = epsilon_wth,
            action = a_t,
            reward = r_t
        )

    return log_likelihood

In [6]:
data = pd.read_csv("gen_data.csv")

In [7]:
def loss(params, cues, actions, rewards):
    epsilon_app, epsilon_wth, rho_rew, rho_pun, bias_app, bias_wth, p = params
    return -log_likelihood(cues, actions, rewards, epsilon_app, epsilon_wth, rho_rew, rho_pun, bias_app, bias_wth, p)

In [8]:
epsilon_bounds = (0.0000001, 0.99999)
beta_bounds = (0.0001, 9.9999)
bias_bounds = (-0.99999, 0.99999)
p_bounds = (0.0001, 0.9999)

In [9]:
min_loss = []

for subject_id in data.ID.unique():
    subject = data[data.ID == subject_id]

    cues = subject.cue.tolist()
    actions = subject.pressed.tolist()
    rewards = subject.outcome.tolist()

    res = minimize(
        fun = loss,
        x0 = [0.5, 0.5, 5, 5, 0, 0, 0.5],
        bounds = [epsilon_bounds, epsilon_bounds, beta_bounds, beta_bounds, bias_bounds, bias_bounds, p_bounds],
        args = (cues, actions, rewards),
        method = "Nelder-Mead"
    )
    
    print(res.x)
    min_loss.append(res.fun)

[ 0.234688    0.21058039  2.34413325  2.26446764  0.18954365 -0.48864192
  0.3291652 ]
[ 0.35486004  0.3096695   3.06289315  2.26497166  0.42622588 -0.29440229
  0.77712429]
[ 0.2522245   0.30378664  2.88209545  2.71712513  0.02925667 -0.50757849
  0.60095064]
[ 0.31059788  0.21926427  2.96933118  2.19702767 -0.11332759 -0.7445545
  0.30444236]
[ 0.44644547  0.1039132   3.65664642  2.95614314 -0.09607697 -0.73031728
  0.15432255]
[ 0.32934028  0.18952329  3.69544525  2.33112058  0.33213251 -0.51053524
  0.78812315]
[ 0.15373457  0.16022389  3.3673331   2.55373356  0.10329098 -0.55173282
  0.70028521]
[ 0.34105913  0.16383747  2.85200691  2.50958413 -0.36488258 -0.99088649
  0.3653765 ]
[ 0.3235892   0.24311728  3.46181829  2.4590799   0.12881534 -0.62029656
  0.38871371]
[ 0.06790924  0.25666651  2.56885061  3.08415409  0.29902516 -0.17291557
  0.42423712]


In [10]:
np.sum(min_loss) # 2652

2653.68236585408