In [28]:
import numpy as np
import pandas as pd

In [29]:
np.random.seed(0)

In [30]:
data = pd.read_csv('../data/clean_weather.csv')

In [31]:
i_weight = np.random.rand(1,2)
h_weight = np.random.rand(2,2)
o_weight = np.random.rand(2,1)

In [32]:
temps = data['tmax'].tail(3).to_numpy()
temps

array([66., 70., 62.])

In [33]:
x0 = temps[0].reshape(1,1)
x1 = temps[1].reshape(1,1)
x2 = temps[2].reshape(1,1)


## Forward at time 0

In [34]:
xi_0 = x0 @ i_weight
xi_0

array([[36.22169126, 47.20249818]])

In [35]:
xh_0 = np.maximum(0, xi_0)
xi_0

array([[36.22169126, 47.20249818]])

In [36]:
xo_0 = xh_0 @ o_weight
xo_0

array([[57.94406231]])

## Forward time 1

In [37]:
xi_1 = x1 @ i_weight

xh = xi_1 @ h_weight

xh_1 = np.maximum(0, xh+xi_1)

xo_1 = xh_1 @ o_weight
xo_1

array([[128.37299954]])

## Time step 2

In [38]:
xi_2 = x2 @ i_weight
xh = xh_1 @ h_weight
xh_2 = np.maximum(xi_0, xh+xi_2)

xo_2 = xh_2 @ o_weight
xo_2

array([[195.16665582]])

## Full Forward Step

In [46]:
np.random.seed(0)
i_weight = np.random.rand(1, 5) / 5 - 0.1 
h_weight = np.random.rand(5, 5) / 5 - 0.1 
o_weight = np.random.rand(5, 1)  * 50

h_bias = np.random.rand(1, 5) / 5 - 0.1
o_bias = np.random.rand(1,1)

In [47]:
outputs = np.zeros(3)
hidden = np.zeros((3,5))
sequence  = data['tmax'].tail(3).to_numpy()
sequence

array([66., 70., 62.])

In [48]:
prev_hidden = None 
for i  in range(len(sequence)):
    x = sequence[i].reshape(1, 1)
    
    xi = x @ i_weight
    
    if prev_hidden is None:
        xh = xi
    else :
        xh = xi + prev_hidden @ h_weight + h_bias
        
    xh = np.tanh(xh)
    prev_hidden = xh
    hidden[i,] = xh
    
    xo = xh @ o_weight +o_bias
    
    outputs[i] = xo
print(outputs)

[80.68122178 87.882189   85.74845223]


In [49]:
hidden

array([[ 0.56784618,  0.99320288,  0.87557333,  0.53166114, -0.76483255],
       [ 0.62831266,  0.99539815,  0.90626505,  0.73036504, -0.79820907],
       [ 0.59019575,  0.99104942,  0.87265444,  0.70451068, -0.75540773]])

## Loss Function

In [16]:
# mean square error
def mse(actual, predict):
    return np.mean((predict - actual)**2)

def mse_grad(actual, predict):
    return predict - actual


In [17]:
actual = [80, 87, 85]
loss = mse_grad(actual, outputs)
loss

array([0.68122178, 0.882189  , 0.74845223])

## Backward propagation

In [18]:
next_hidden = None
loss_grad = mse_grad(actual, outputs)
o_weight_grad, o_bias_grad, h_weight_grad, h_bias_grad, i_weight_grad = [0]*5

for i in range(2, -1, -1):
    l_grad = loss_grad[i].reshape(1, 1)
    
    o_weight_grad += hidden[i][:, np.newaxis ]@ l_grad
    o_bias_grad += np.mean(l_grad)
    
    o_grad = l_grad @ o_weight.T
    
    if next_hidden is None:
        h_grad = o_grad
    else:
        h_grad = o_grad +next_hidden @ h_weight.T
        
    tanh_dervi = 1 - hidden[i,:][np.newaxis, :]
    
    h_grad = np.multiply(h_grad, tanh_dervi)
    
    if i > 0 :
        h_weight_grad += hidden[i-1, :][:, np.newaxis] @ h_grad
        h_bias += np.mean(h_grad)
    i_weight_grad += sequence[i].reshape(1, 1 ).T @ h_grad

In [19]:
i_weight_grad

array([[ 812.17749263,   38.9100749 ,  394.38955612, 1461.43064731,
         255.40151145]])

In [20]:
lr = 0.00001
i_weight -= i_weight_grad*lr
h_weight -= h_weight_grad*lr
o_weight -= o_weight_grad*lr
o_bias -= o_bias_grad*lr
h_bias -= h_bias_grad*lr


In [21]:
i_weight

array([[ 0.00164093,  0.04264877,  0.01660878, -0.00563767, -0.01782306]])

In [22]:
from sklearn.preprocessing import StandardScaler

#Define predictors and target
PREDICTORS = list(data.columns)[1:-1]
TARGET = list(data.columns)[-1]

#Scale data to have mean 0 
scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])