### Behavioral modelling

#### Assumptions:

1. experienced utility is some function of displayed reward magnitude: 
    - simple model: $u(x) = x$ 
    - complex model: $u(x) = x^{\delta} \quad if \quad x>0$ or $u(x) = -\gamma |x|^{\delta} \quad if \quad x>0$
2. values (reflecting beliefs about probability) are learned with simple delta learning rule (TD model):
    - $V(a_t)=V(a_{t-1})+ \alpha [R-V(a_{t-i})]$
3. choice probabilites are probabilistic functions of expected utilities:
    - degenerate model: $p(a)=\frac{EV(a)}{EV(a)+EV(b)}$ (parsimoneous approach introduced by *Summerfield et al. 2011*)
    - full softmax model: $p(a)=\frac{\exp(\theta EV(a))}{\exp(\theta EV(a)) + \exp(\theta EV(b))}$

#### Free parameters:

- $\alpha \in [0, 1]$: learning rate (modelling learning rate above half would result in model selecting previously rewarded / not punished option which is not realistic 
- $\gamma \in [0, \infty]$: loss aversion parameter
- $\delta \in [0, 1]$: risk aversion parameter
- $\theta \in [0, \infty]$: inverse temperature for softmax function

#### Model variations:

- model 1: (k=1 parameter) simple utility model, degenerate model for choice probabilities
- model 2: (k=2 parameters) simple utility model, full softmax model
- model 3: (k=4 parameters) complex utility model, full softmax model


In [None]:
def estimate_values(df, info, alpha):
    '''Implements TD learning model on experienced probabilistic outcomes.
    
    Args:
        df (pd.Dataframe): clean behavioral responses
        alpha (float): learning rate 
        
    Returns:
        val (np.array): reflects algorithm trialwise beliefs about 
            probabilities that box will be rewarded / punished
    '''
    
    val = np.zeros((info['n_trials'], 2))
    val[0] = [.5, .5] # Initial beliefs (agnostic)

    for trial, rwd in df['rwd'][:-1].iteritems():
        val[trial+1, 1] = val[trial, 1] + alpha * ((rwd + 1)/2 - val[trial, 1])
        val[trial+1, 0] = val[trial, 0] + alpha * ((-rwd + 1)/2 - val[trial, 0])    

    return val

def estimate_utilities(df, info, gamma=1, delta=1):                                               
    '''Implements function converting reward magnitude to experienced utility.
    
    Args:
        df (pd.Dataframe): clean behavioral responses
        gamma (float): loss aversion parameter
        delta: (float): risk aversion parameter
        
    Returns:
        util (np.array): reflects algorithm trialwise estimates of utility 
            for both left and right boxes
    
    '''
    
    util = np.zeros((info['n_trials'], 2))
    
    if info['condition'] == 'pun': 
        factor = (-1) * gamma
    else:
        factor = 1
        
    util[:, 0] = factor * np.power(np.abs(df['magn_left']), delta)
    util[:, 1] = factor * np.power(np.abs(df['magn_right']), delta)

    return util

def estimate_choice_probability(df, val, util, kind='simple', theta=None):
    '''Implements softmax decision rule reflecting choice probabilities'''

    # Calculate expected value for both options
    ev = np.multiply(util, val)
    
    if kind == 'simple':
        p = ev / np.sum(ev, axis=1)[:, np.newaxis]
        if np.sum(ev) < 0: 
            p = np.fliplr(p)
        
    elif kind == 'softmax':
        p = np.exp(theta * ev) / np.sum(np.exp(theta * ev), axis=1)[:, np.newaxis]

    return p

def g_square(df, p):
    '''Calculate badness-of-fit quality measure. G-square is inversely 
    related to log likelyhood.'''

    ll = 0 

    for i, resp in df['response'].iteritems():

        if resp == -1:
            ll += np.log(p[i, 0])
        elif resp == 1:
            ll += np.log(p[i, 1])

    return (-2) * ll

### Behavioral Models #######################################################
def model1(df, info, alpha):
    '''Simple one-parameter model with variable learning rate.'''
    
    val = estimate_values(df, info, alpha)
    util = estimate_utilities(df, info)
    p = estimate_choice_probability(df, val, util, kind='simple')
    
    return p
    
    
def model2(df, info, alpha, theta):
    '''Two-parameter model  with variable learning rate and inverse T.'''
    
    val = estimate_values(df, info, alpha)
    util = estimate_utilities(df, info)
    p = estimate_choice_probability(df, val, util, kind='softmax', theta=theta)
    
    return p

def model3(df, info, alpha, theta, gamma, delta):
    '''Four-parameter model.
    
    Params:
        df (pd.Dataframe): clean behavioral responses
        alpha (float): learning rate
        theta (float): inverse softmax temperature
        gamma (float): loss aversion
        delta (float): risk aversion 
    '''
    
    val = estimate_values(df, info, alpha)
    util = estimate_utilities(df, info, gamma, delta)
    p = estimate_choice_probability(df, val, util, kind='softmax', theta=theta)
    
    return p

### Show example model fit for models 1 and 2
Models are fitted to subject responses using $G^2$ which is a measure of badness-of-fit derived from log likelyhood. In this section model 1 and 2 are fitted for all possible parameters sampled from parameter space. Both task conditions are fitted separately for single subject. Finally, $G^2$  function is visualised in the parameter space.

In [None]:
N_grid = 100

alpha = np.linspace(0, 1, N_grid)
theta = np.linspace(0, .5, N_grid)

### Model 1 #################################################################
fit1 = np.zeros((2, N_grid))

for i, a in enumerate(alpha):
    fit1[0, i] = g_square(df_rew, model1(df_rew, info_rew, a))
    fit1[1, i] = g_square(df_pun, model1(df_pun, info_pun, a))
    
### Model 2 #################################################################
av, tv = np.meshgrid(alpha, theta)

fit2 = np.zeros((2, N_grid, N_grid))
for i, a in enumerate(alpha):
    for j, t in enumerate(theta):
        fit2[0, i, j] = g_square(df_rew, model2(df_rew, info_rew, a, t))
        fit2[1, i, j] = g_square(df_pun, model2(df_pun, info_pun, a, t))    

In [None]:
plt.style.use('seaborn-ticks')

# Figure 1
fig1, ax = plt.subplots(facecolor='w', figsize=(10, 5))
ax.plot(alpha, fit1[0,:], linewidth=2, color='g', label='reward')
ax.plot(alpha, fit1[1,:], linewidth=2, color='r', label='punishment')

ax.set_ylabel('$G^2$')
ax.set_xlabel('alpha')
ax.legend()
ax.grid()

plt.tight_layout()

# Figure 2
fig2, (ax2r, ax2p) = plt.subplots(nrows=1, ncols=2, facecolor='w', figsize=(10, 5))
im2r = ax2r.contourf(alpha, theta, fit2[0].T, levels=50, cmap='Greens_r')
im2p = ax2p.contourf(alpha, theta, fit2[1].T, levels=50, cmap='Reds_r')

ax2r.set_xlabel('alpha')
ax2r.set_ylabel('theta')
ax2p.set_xlabel('alpha')
ax2p.set_ylabel('theta')

fig2.colorbar(im2r, ax=ax2r)
fig2.colorbar(im2p, ax=ax2p)

plt.tight_layout()

### Model fitting for single subject responses
In this section model parameters are optimized for explaining subject responses. Behavioral responsed are pooled across both task conditions.  

In [None]:
from scipy.optimize import minimize
from scipy.optimize import Bounds

In [None]:
results = {}
results['model1'] = {'k': 1, 'g_square': None, 'x': None}
results['model2'] = {'k': 2, 'g_square': None, 'x': None}
results['model3'] = {'k': 4, 'g_square': None, 'x': None}

### Model 1 #################################################################
bounds1 = Bounds([0], [1])

def cost_model1(x):
    '''Optimization function for model 1.'''
    g_rew = g_square(df_rew, model1(df_rew, info_rew, x))
    g_pun = g_square(df_pun, model1(df_pun, info_pun, x))
    return g_rew  + g_pun

x0 = np.array([.5])

res1 = minimize(cost_model1, x0, method='SLSQP', bounds=bounds1)
results['model1']['g_square'] = res1['fun']
results['model1']['x'] = res1['x']

### Model 2 #################################################################
bounds2 = Bounds([0, 0], [1, np.inf])

def cost_model2(x):
    '''Optimization function for model 2.'''
    g_rew = g_square(df_rew, model2(df_rew, info_rew, x[0], x[1]))
    g_pun = g_square(df_pun, model2(df_pun, info_pun, x[0], x[1]))
    return g_rew + g_pun

x0 = np.array([.5, .5])

res2 = minimize(cost_model2, x0, method='SLSQP', bounds=bounds2)    
results['model2']['g_square'] = res2['fun']
results['model2']['x'] = res2['x']

### Model 3 #################################################################
bounds3 = Bounds([0, 0, 0, 0], [1, np.inf, np.inf, 1])

def cost_model3(x):
    '''Optimization function for model 2.'''
    g_rew = g_square(df_rew, model3(df_rew, info_rew, x[0], x[1], x[2], x[3]))
    g_pun = g_square(df_pun, model3(df_pun, info_pun, x[0], x[1], x[2], x[3]))
    return g_rew + g_pun

x0 = np.array([.5, .5, 1, 1])

res3 = minimize(cost_model3, x0, method='SLSQP', bounds=bounds3)    
results['model3']['g_square'] = res3['fun'] 
results['model3']['x'] = res3['x']

### Model comparison for single subject
In this section three models are compared with respect to their Akaike Information Criterion (AIC score). AIC enables comparision of models with different number of free parameters. AIC is defined as:
$$AIC = G^2 + 2k$$ 

In [None]:
aic = np.zeros(3)

for i, model in enumerate(results):
    aic[i] = results[model]['g_square'] + 2*results[model]['k']

plt.style.use('seaborn-ticks')

fig, ax = plt.subplots(facecolor='w', figsize=(10, 3))

color = ['#01579B' for _ in range(3)]
color[np.argmin(aic)] = '#A67C00'
ax.barh(range(3), aic, color=color, alpha=.75)
ax.set_xlabel('Akaike Information Criterion')
ax.set_yticks(range(3))
ax.set_yticklabels(['model1', 'model2', 'model3'])
ax.grid()