In [42]:
import numpy as np

#**Intialize Random arrays**

In [43]:
input_dim = 1
hidden_dim = 3
output_dim = 1

wx = np.random.randn(hidden_dim, input_dim) * np.sqrt(1/input_dim)
wh = np.random.randn(hidden_dim, hidden_dim) * np.sqrt(1/hidden_dim)
wo = np.random.randn(output_dim,hidden_dim) * np.sqrt(1/hidden_dim)

print(f"input->hidden Weights :\n {wx } \n")
print(f"hidden->hidden weights :\n {wh } \n")
print(f"hidden->output weights :\n {wo } \n")

input->hidden Weights :
 [[-1.12842626]
 [ 0.13693645]
 [-0.01567228]] 

hidden->hidden weights :
 [[ 0.40316286  0.37694561 -0.00980171]
 [-1.47310901  0.65082041 -0.17920119]
 [ 0.92837611 -0.36532238  0.31541256]] 

hidden->output weights :
 [[ 0.44378544  0.25123416 -0.23855756]] 



In [44]:
bx = np.zeros((hidden_dim, 1))
by = np.zeros((output_dim, 1))

#**Activation Functions**

In [45]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))
def dissigmoid(x):
  s = sigmoid(x)
  return s * (1-s)

#**Forward Prop Function**

In [46]:
def forward_prop(x_seq):
  T = len(x_seq)
  h = {}
  z = {}
  y = {}
  h[-1] = np.zeros((hidden_dim,1))
  for t in range(T):
    x_t = x_seq[t].reshape(-1, 1)
    z[t] = np.dot(wx,x_t) + np.dot(wh,h[t-1]) + bx
    h[t] = sigmoid(z[t])
    y[t] = np.dot(wo,h[t]) + by
  return h, y , z

#**Loss Function**

In [47]:
def loss(y, y_seq):
  loss = 0.0
  for t in range(len(y_seq)):
    loss += 0.5 * (y[t]-y_seq[t])**2
  return loss

#**Backward Prop**

In [48]:
def backward_prob(x_seq, y_seq, h, y, z):
  T = len(x_seq)
  dWx = np.zeros_like(wx)
  dWh = np.zeros_like(wh)
  dWy = np.zeros_like(wo)
  dbx = np.zeros_like(bx)
  dby = np.zeros_like(by)
  dh_next = np.zeros((hidden_dim, 1))
  for t in reversed(range(T)):
    x_t = x_seq[t].reshape(-1,1)
    y_t = y_seq[t]
    dy = ( y[t] - y_t)
    dWy = np.dot(dy,h[t].T)
    dby += dy
    dz = (np.dot(wo.T,dy)+np.dot(wh.T,dh_next))*dissigmoid(z[t])
    dWx += np.dot(dz,x_t.T)
    dWh += np.dot(dz,h[t-1].T)
    dbx += dz
    dh_next = dz
  return dWx, dWh, dWy, dbx, dby

#**Update param function**

In [49]:
def update_param(dWx,dWh,dWy,dbx,dby,lr=0.01):
  global wx,wy,wh,bx,by
  wx -= lr*dWx
  wo -= lr*dWy
  wh -= lr*dWh
  bx -= lr*dbx
  by -= lr*dby

In [50]:
x = [np.array([1]), np.array([2])]
target = [np.array([[0.5]]), np.array([[0.8]])]

In [51]:
x

[array([1]), array([2])]

#**Train Loop**

In [52]:
for epoch in range(2000):
  h,y,z = forward_prop(x)
  current_loss = loss(y,target)
  dWx, dWh, dWy, dbx, dby = backward_prob(x,target,h,y,z)
  update_param(dWx,dWh,dWy,dbx,dby)
  if epoch % 100 == 0:
    print(f"Epoch {epoch} Loss {current_loss}")

Epoch 0 Loss [[0.34250106]]
Epoch 100 Loss [[0.03610157]]
Epoch 200 Loss [[0.03159182]]
Epoch 300 Loss [[0.03063116]]
Epoch 400 Loss [[0.02972769]]
Epoch 500 Loss [[0.0286496]]
Epoch 600 Loss [[0.02734849]]
Epoch 700 Loss [[0.02582265]]
Epoch 800 Loss [[0.02409911]]
Epoch 900 Loss [[0.0222216]]
Epoch 1000 Loss [[0.02024075]]
Epoch 1100 Loss [[0.01820815]]
Epoch 1200 Loss [[0.0161737]]
Epoch 1300 Loss [[0.01418451]]
Epoch 1400 Loss [[0.01228353]]
Epoch 1500 Loss [[0.01050741]]
Epoch 1600 Loss [[0.00888419]]
Epoch 1700 Loss [[0.00743158]]
Epoch 1800 Loss [[0.00615671]]
Epoch 1900 Loss [[0.0050573]]
