In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from abc import ABC, abstractmethod

In [2]:
class Model(ABC):
    def __init__(self):
        # Init model fit bounds
        self.epsilon_bounds = (0.0000001, 0.99999)
        self.beta_bounds = (0.0001, 9.9999)
        self.bias_bounds = (-0.99999, 0.99999)
        self.p_bounds = (0.0001, 0.9999)
    
    @staticmethod
    def softmax(x):
        exp = np.exp(x)
        return exp / np.sum(exp)

    @staticmethod
    def rescorla_wagner(q_val, epsilon_rew, epsilon_pun, epsilon_omi, reward):
        if reward > 0: return q_val + epsilon_rew * (reward - q_val)
        if reward < 0: return q_val + epsilon_pun * (reward - q_val)
        return q_val + epsilon_omi * (reward - q_val)

    @staticmethod
    def reward(r_t, rho_rew, rho_pun):
        if r_t > 0: return rho_rew
        if r_t < 0: return rho_pun
        return 0

    def log_likelihood(self, cues, actions, rewards, epsilon_rew, epsilon_pun, epsilon_omi, rho_rew, rho_pun, bias_wth, bias_app):
        n_stimuli = len(set(cues))
        n_actions = len(set(actions))
    
        q_vals = np.zeros((n_stimuli, n_actions))
    
        log_likelihood = 0
    
        for t, a_t in enumerate(actions):
            s_t = cues[t] - 1
            r_t = self.reward(rewards[t], rho_rew, rho_pun)

            qs = q_vals[s_t] + [ bias_wth, bias_app ]
    
            probs = self.softmax(qs)
            log_likelihood += np.log(probs[a_t])
    
            # Update the Q-values using Rescorla-Wagner
            q_vals[s_t, a_t] = self.rescorla_wagner(
                q_val = q_vals[s_t, a_t],
                epsilon_rew = epsilon_rew,
                epsilon_pun = epsilon_pun,
                epsilon_omi = epsilon_omi,
                reward = r_t
            )
    
        return log_likelihood
    
    @abstractmethod
    def loss(self, params, cues, actions, rewards):
        pass
    
    @abstractmethod
    def minimize_loss(self, cues, actions, rewards):
        pass
    
    def fit(self, data):
        fit_result = []
        
        for subject_id in data.ID.unique():
            subject_data = data[ data.ID == subject_id ]
            
            cues = subject_data.cue.tolist()
            actions = subject_data.pressed.tolist()
            rewards = subject_data.outcome.tolist()
            
            loss, x = self.minimize_loss(cues, actions, rewards)
            
            x["ID"] = subject_id
            x["loss"] = loss
            fit_result.append(x)

        fit_result = pd.concat(fit_result)
        fit_result.reset_index(drop = True, inplace = True)
        
        return fit_result

In [3]:
class Model1(Model):
    def loss(self, params, cues, actions, rewards):
        epsilon, beta = params
        return -self.log_likelihood(
            cues = cues,
            actions = actions,
            rewards = rewards,
            epsilon_rew = epsilon,
            epsilon_pun = epsilon,
            epsilon_omi = epsilon,
            rho_rew = beta,
            rho_pun = -beta,
            bias_wth = 0,
            bias_app = 0
        )

    def minimize_loss(self, cues, actions, rewards):
        result = minimize(
            fun = self.loss,
            x0 = [0.5, 5],
            bounds = [self.epsilon_bounds, self.beta_bounds],
            args = (cues, actions, rewards),
            method = "Nelder-Mead"
        )
        
        fit_params = pd.DataFrame([result.x])
        fit_params.columns = ["epsilon", "beta"]
        
        return result.fun, fit_params

In [4]:
class Model2(Model):
    def loss(self, params, cues, actions, rewards):
        epsilon, rho_rew, rho_pun = params
        return -self.log_likelihood(
            cues = cues,
            actions = actions,
            rewards = rewards,
            epsilon_rew = epsilon,
            epsilon_pun = epsilon,
            epsilon_omi = epsilon,
            rho_rew = rho_rew,
            rho_pun = -rho_pun,
            bias_wth = 0,
            bias_app = 0
        )

    def minimize_loss(self, cues, actions, rewards):
        result = minimize(
            fun = self.loss,
            x0 = [0.5, 5, 5],
            bounds = [
                self.epsilon_bounds,
                self.beta_bounds,
                self.beta_bounds
            ],
            args = (cues, actions, rewards),
            method = "Nelder-Mead"
        )

        fit_params = pd.DataFrame([result.x])
        fit_params.columns = ["epsilon", "rho_rew", "rho_pun"]
        fit_params["rho_pun"] = -fit_params["rho_pun"]

        return result.fun, fit_params

In [5]:
class Model3(Model):
    def loss(self, params, cues, actions, rewards):
        epsilon_rew, epsilon_pun, epsilon_omi, beta = params
        return -self.log_likelihood(
            cues = cues,
            actions = actions,
            rewards = rewards,
            epsilon_rew = epsilon_rew,
            epsilon_pun = epsilon_pun,
            epsilon_omi = epsilon_omi,
            rho_rew = beta,
            rho_pun = -beta,
            bias_wth = 0,
            bias_app = 0
        )

    def minimize_loss(self, cues, actions, rewards):
        result = minimize(
            fun = self.loss,
            x0 = [0.5, 0.5, 0.5, 5],
            bounds = [
                self.epsilon_bounds,
                self.epsilon_bounds,
                self.epsilon_bounds,
                self.beta_bounds,
            ],
            args = (cues, actions, rewards),
            method = "Nelder-Mead"
        )

        fit_params = pd.DataFrame([result.x])
        fit_params.columns = ["epsilon_rew", "epsilon_pun", "epsilon_omi", "beta"]

        return result.fun, fit_params

In [6]:
class Model4(Model):
    def loss(self, params, cues, actions, rewards):
        epsilon, beta, bias_app, bias_wth = params
        return -self.log_likelihood(
            cues = cues,
            actions = actions,
            rewards = rewards,
            epsilon_rew = epsilon,
            epsilon_pun = epsilon,
            epsilon_omi = epsilon,
            rho_rew = beta,
            rho_pun = -beta,
            bias_wth = bias_wth,
            bias_app = bias_app
        )

    def minimize_loss(self, cues, actions, rewards):
        result = minimize(
            fun = self.loss,
            x0 = [0.5, 5, 0, 0],
            bounds = [
                self.epsilon_bounds,
                self.beta_bounds,
                self.bias_bounds,
                self.bias_bounds,
            ],
            args = (cues, actions, rewards),
            method = "Nelder-Mead"
        )

        fit_params = pd.DataFrame([result.x])
        fit_params.columns = ["epsilon", "beta", "bias_app", "bias_wth"]

        return result.fun, fit_params

In [7]:
class Model5(Model):
    def loss(self, params, cues, actions, rewards):
        epsilon, rho_rew, rho_pun, bias_app, bias_wth = params
        return -self.log_likelihood(
            cues = cues,
            actions = actions,
            rewards = rewards,
            epsilon_rew = epsilon,
            epsilon_pun = epsilon,
            epsilon_omi = epsilon,
            rho_rew = rho_rew,
            rho_pun = -rho_pun,
            bias_wth = bias_wth,
            bias_app = bias_app
        )

    def minimize_loss(self, cues, actions, rewards):
        result = minimize(
            fun = self.loss,
            x0 = [0.5, 5, 5, 0, 0],
            bounds = [
                self.epsilon_bounds,
                self.beta_bounds,
                self.beta_bounds,
                self.bias_bounds,
                self.bias_bounds,
            ],
            args = (cues, actions, rewards),
            method = "Nelder-Mead"
        )

        fit_params = pd.DataFrame([result.x])
        fit_params.columns = ["epsilon", "rho_rew", "rho_pun", "bias_app", "bias_wth"]

        return result.fun, fit_params

In [8]:
class Model6(Model):
    @staticmethod
    def reward(a_t, r_t, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth):
        if a_t == 1 and r_t > 0: return rho_rew_app
        if a_t == 1 and r_t < 0: return -rho_pun_app

        if a_t == 0 and r_t > 0: return rho_rew_wth
        if a_t == 0 and r_t < 0: return -rho_pun_wth

        return 0

    def log_likelihood(self, cues, actions, rewards, epsilon_rew, epsilon_pun, epsilon_omi, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth, bias_wth, bias_app):
        n_stimuli = len(set(cues))
        n_actions = len(set(actions))

        q_vals = np.zeros((n_stimuli, n_actions))

        log_likelihood = 0

        for t, a_t in enumerate(actions):
            s_t = cues[t] - 1
            r_t = self.reward(a_t, rewards[t], rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth)

            qs = q_vals[s_t] + [bias_wth, bias_app]

            probs = self.softmax(qs)
            log_likelihood += np.log(probs[a_t])

            # Update the Q-values using Rescorla-Wagner
            q_vals[s_t, a_t] = self.rescorla_wagner(
                q_val = q_vals[s_t, a_t],
                epsilon_rew = epsilon_rew,
                epsilon_pun = epsilon_pun,
                epsilon_omi = epsilon_omi,
                reward = r_t
            )

        return log_likelihood

    def loss(self, params, cues, actions, rewards):
        epsilon, rho_rew_app, rho_rew_wth, rho_pun_app, rho_pun_wth, bias_app, bias_wth = params
        return -self.log_likelihood(
            cues = cues,
            actions = actions,
            rewards = rewards,
            epsilon_rew = epsilon,
            epsilon_pun = epsilon,
            epsilon_omi = epsilon,
            rho_rew_app = rho_rew_app,
            rho_rew_wth = rho_rew_wth,
            rho_pun_app = rho_pun_app,
            rho_pun_wth = rho_pun_wth,
            bias_wth = bias_wth,
            bias_app = bias_app
        )

    def minimize_loss(self, cues, actions, rewards):
        result = minimize(
            fun = self.loss,
            x0 = [0.5, 5, 5, 5, 5, 0, 0],
            bounds = [
                self.epsilon_bounds,
                self.beta_bounds,
                self.beta_bounds,
                self.beta_bounds,
                self.beta_bounds,
                self.bias_bounds,
                self.bias_bounds
            ],
            args = (cues, actions, rewards),
            method = "Nelder-Mead"
        )

        fit_params = pd.DataFrame([result.x])
        fit_params.columns = ["epsilon", "rho_rew_app", "rho_rew_wth", "rho_pun_app", "rho_pun_wth", "bias_app", "bias_wth"]
        fit_params["rho_pun_app"] = -fit_params["rho_pun_app"]
        fit_params["rho_pun_wth"] = -fit_params["rho_pun_wth"]

        return result.fun, fit_params

In [9]:
class Model7(Model):
    @staticmethod
    def rescorla_wagner(q_val, epsilon_app, epsilon_wth, action, reward):
        if action == 1:
            return q_val + epsilon_app * (reward - q_val)

        return q_val + epsilon_wth * (reward - q_val)
    
    def log_likelihood(self, cues, actions, rewards, epsilon_app, epsilon_wth, rho_rew, rho_pun, bias_wth, bias_app):
        n_stimuli = len(set(cues))
        n_actions = len(set(actions))

        q_vals = np.zeros((n_stimuli, n_actions))

        log_likelihood = 0

        for t, a_t in enumerate(actions):
            s_t = cues[t] - 1
            r_t = self.reward(rewards[t], rho_rew, rho_pun)

            qs = q_vals[s_t] + [ bias_wth, bias_app ]

            probs = self.softmax(qs)
            log_likelihood += np.log(probs[a_t])

            # Update the Q-values using Rescorla-Wagner
            q_vals[s_t, a_t] = self.rescorla_wagner(
                q_val = q_vals[s_t, a_t],
                epsilon_app = epsilon_app,
                epsilon_wth = epsilon_wth,
                action = a_t,
                reward = r_t
            )

        return log_likelihood

    def loss(self, params, cues, actions, rewards):
        epsilon_app, epsilon_wth, rho_rew, rho_pun, bias_app, bias_wth = params
        return -self.log_likelihood(
            cues = cues,
            actions = actions,
            rewards = rewards,
            epsilon_app = epsilon_app,
            epsilon_wth = epsilon_wth,
            rho_rew = rho_rew,
            rho_pun = -rho_pun,
            bias_wth = bias_wth,
            bias_app = bias_app
        )

    def minimize_loss(self, cues, actions, rewards):
        result = minimize(
            fun = self.loss,
            x0 = [0.5, 0.5, 5, 5, 0, 0],
            bounds = [
                self.epsilon_bounds,
                self.epsilon_bounds,
                self.beta_bounds,
                self.beta_bounds,
                self.bias_bounds,
                self.bias_bounds
            ],
            args = (cues, actions, rewards),
            method = "Nelder-Mead"
        )

        fit_params = pd.DataFrame([result.x])
        fit_params.columns = ["epsilon_app", "epsilon_wth", "rho_rew", "rho_pun", "bias_app", "bias_wth"]
        fit_params["rho_pun"] = -fit_params["rho_pun"]

        return result.fun, fit_params

In [10]:
class Model8(Model):
    @staticmethod
    def rescorla_wagner(q_val, epsilon_app, epsilon_wth, action, reward):
        if action == 1:
            return q_val + epsilon_app * (reward - q_val)

        return q_val + epsilon_wth * (reward - q_val)

    def log_likelihood(self, cues, actions, rewards, epsilon_app, epsilon_wth, rho_rew, rho_pun, bias_wth, bias_app, p):
        n_stimuli = len(set(cues))
        n_actions = len(set(actions))

        q_vals = np.zeros((n_stimuli, n_actions))

        log_likelihood = 0

        for t, a_t in enumerate(actions):
            s_t = cues[t] - 1
            r_t = self.reward(rewards[t], rho_rew, rho_pun)

            qs = q_vals[s_t] + [ bias_wth, bias_app ]
            
            max_q = np.max(q_vals[s_t])
            if max_q < 0: qs[0] += p
            if max_q > 0: qs[1] += p

            probs = self.softmax(qs)
            log_likelihood += np.log(probs[a_t])

            # Update the Q-values using Rescorla-Wagner
            q_vals[s_t, a_t] = self.rescorla_wagner(
                q_val = q_vals[s_t, a_t],
                epsilon_app = epsilon_app,
                epsilon_wth = epsilon_wth,
                action = a_t,
                reward = r_t
            )

        return log_likelihood

    def loss(self, params, cues, actions, rewards):
        epsilon_app, epsilon_wth, rho_rew, rho_pun, bias_app, bias_wth, p = params
        return -self.log_likelihood(
            cues = cues,
            actions = actions,
            rewards = rewards,
            epsilon_app = epsilon_app,
            epsilon_wth = epsilon_wth,
            rho_rew = rho_rew,
            rho_pun = -rho_pun,
            bias_wth = bias_wth,
            bias_app = bias_app,
            p = p
        )

    def minimize_loss(self, cues, actions, rewards):
        result = minimize(
            fun = self.loss,
            x0 = [0.5, 0.5, 5, 5, 0, 0, 0.5],
            bounds = [
                self.epsilon_bounds,
                self.epsilon_bounds,
                self.beta_bounds,
                self.beta_bounds,
                self.bias_bounds,
                self.bias_bounds,
                self.p_bounds
            ],
            args = (cues, actions, rewards),
            method = "Nelder-Mead"
        )

        fit_params = pd.DataFrame([result.x])
        fit_params.columns = ["epsilon_app", "epsilon_wth", "rho_rew", "rho_pun", "bias_app", "bias_wth", "p"]
        fit_params["rho_pun"] = -fit_params["rho_pun"]

        return result.fun, fit_params

In [11]:
data = pd.read_csv("gen_data.csv")

In [12]:
model1 = Model1()
res = model1.fit(data)
res["loss"].sum() # 2866

2866.060590541449

In [13]:
model2 = Model2()
res = model2.fit(data)
res["loss"].sum() # 2862

2862.9286280091674

In [14]:
model3 = Model3()
res = model3.fit(data)
res["loss"].sum() # 2793

2792.919405767867

In [15]:
model4 = Model4()
res = model4.fit(data)
res["loss"].sum() # 2736

2736.4129286559555

In [16]:
model5 = Model5()
res = model5.fit(data)
res["loss"].sum() # 2732

2732.855646686895

In [17]:
model6 = Model6()
res = model6.fit(data)
res["loss"].sum() # 2663 (2655)

2655.651541220959

In [18]:
model7 = Model7()
res = model7.fit(data)
res["loss"].sum() # 2682

2682.4008837954207

In [19]:
model8 = Model8()
res = model8.fit(data)
res["loss"].sum() # 2652

2653.68236585408