In [2]:
import numpy as np
import matplotlib as plt

# Rescorla-Wagner Model of Classical Conditioning

### Step 1. Initializing association weights $w \in \mathbb{R}^n$

In [5]:
# define w
n_feat = 2
w = np.random.uniform(high=0.0001, size=n_feat)
print("w:", np.round(w,2))

w: [0. 0.]


### Step 2. Defining value of a Stimulus vector $x$
Given: $w$ weight vector and stimulus vector $x$

In [24]:
# define a function, call it value (receives as input w and x)
def value(w,x):
    return np.sum([w[i]*x[i] for i in range(len(x))])

# Testing the value function in this situation: 
value(w=[1,0],x=[1,0])

1

### Step 3. Defining associative weight update $\Delta w$
$x$: stimulus vector, $r$: reward, $\alpha$: learning rate, n_times: number of updates

In [8]:
# define the weight update 
def update_rw(w, x, r=1, alpha=0.5, n_times=1):
    x = np.array(x)
    for _ in range(n_times):
        td_error = r - value(w,x)
        w = w + alpha*td_error*x
    return w

### Step 4. Define Classical Conditioning paradigms

In [25]:
# take the example of forward blocking and define all steps to test results with the model
# 1) initialize weights w and stimulus x
# 2) update weights 10 times for A -> + association
# 3) update weights 10 times for AB -> + association
# 4) print the value of B?

x = [1,0]
w = np.array([0,0])

# First phase A->+
w = update_rw(w,x,r=1, n_times=10)
print("> w after A->+:", np.round(w,4))

# Second AB -> +
x = [1,1]
w = update_rw(w,x,r=1, n_times=10)

print("> w after AB->+", np.round(w,3))
print("Value of B->?", np.round(value(w,[0,1]),2))

> w after A->+: [0.999 0.   ]
> w after AB->+ [1. 0.]
Value of B->? 0.0


# Temoral Difference Learning

### Step 1. Future Reward prediction: $V_t \in \mathbb{R}$
Function approximation $V_t = w^\top \phi(s_t)$ and Indicator features $x_t = \phi(s_t) \in \mathbb{R}^n$ sets a 1 at the corresponding state index (one-hot encoding): $x_t(t) = 1$  

In [9]:
# Our aim is to test Second Order Conditioning: 
# B->+,   A->B,   A->?

# define V for 3 states: A, B, reward delivery
# define r for 3 states: We get reward in the last one
# define alpha: learning rate
# define gamma: discount factor

### Step 2. Define TD error and $V_t$ update

In [7]:
# 1) define the td error
# 2) define the weight update 

### Step 3. Test Second Order Conditioning