In [1]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(x):
    return x * (1 - x)

def RNN(X, W_xh, W_hh, W_hy, b_h, b_y):
    # X: Input sequence (batch_size, seq_len, input_dim)
    # W_xh: Weight matrix from input to hidden layer (input_dim, hidden_dim)
    # W_hh: Recurrent weight matrix (hidden_dim, hidden_dim)
    # W_hy: Weight matrix from hidden layer to output layer (hidden_dim, output_dim)
    # b_h: Bias for the hidden layer (hidden_dim,)
    # b_y: Bias for the output layer (output_dim,)

    hidden_state = np.zeros((X.shape[0], X.shape[1], W_hh.shape[0]))
    output = np.zeros((X.shape[0], X.shape[1], W_hy.shape[1]))

    for t in range(X.shape[1]):
        hidden_state[:, t, :] = sigmoid(np.dot(X[:, t, :], W_xh) + np.dot(hidden_state[:, t-1, :], W_hh) + b_h)
        output[:, t, :] = sigmoid(np.dot(hidden_state[:, t, :], W_hy) + b_y)

    return output

# Example usage:
input_dim = 10
hidden_dim = 5
output_dim = 2
seq_len = 4
batch_size = 2

X = np.random.rand(batch_size, seq_len, input_dim)
W_xh = np.random.rand(input_dim, hidden_dim)
W_hh = np.random.rand(hidden_dim, hidden_dim)
W_hy = np.random.rand(hidden_dim, output_dim)
b_h = np.zeros((hidden_dim,))
b_y = np.zeros((output_dim,))

output = RNN(X, W_xh, W_hh, W_hy, b_h, b_y)
print(output)

[[[0.91366476 0.8217149 ]
  [0.92195952 0.83426007]
  [0.92144408 0.83371297]
  [0.92143352 0.8336093 ]]

 [[0.88545374 0.78760779]
  [0.92029224 0.83140036]
  [0.92159804 0.83358482]
  [0.92088715 0.83251319]]]


In [1]:
import numpy as np
# Initialize Parameters
input_size = 2
hidden_size = 3
output_size = 1
Wx = np.random.rand(hidden_size, input_size) * 0.01
Wh = np.random.rand(hidden_size, hidden_size) * 0.01
Wy = np.random.rand(output_size, hidden_size) * 0.01
bh = np.zeros((hidden_size, 1))
by = np.zeros((output_size, 1))
# Define Activation Function
def tanh(x):
    return np.tanh(x)
def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2
 # Forward Pass
def rnn_forward(inputs):
    h = np.zeros((hidden_size, 1))
    for x in inputs:
        x = x.reshape(-1, 1)
        h = tanh(np.dot(Wx, x) + np.dot(Wh, h) + bh)
    y = np.dot(Wy, h) + by
    return y, h
# Backward Pass
def rnn_backward(inputs, target, y, h):
    loss = (y - target) ** 2
    dWy = (y - target) * h.T
    dby = (y - target)
    dh = np.dot(Wy.T, (y - target))
    
    dWx_total = np.zeros_like(Wx)
    dWh_total = np.zeros_like(Wh)
    dbh_total = np.zeros_like(bh)
    for t in reversed(range(len(inputs))):
        x = inputs[t].reshape(-1, 1)
        dhraw = dh * tanh_derivative(h)
        dWx = np.dot(dhraw, x.T)
        dWh = np.dot(dhraw, h.T)
        dbh = dhraw
        dh = np.dot(Wh.T, dhraw)
        
        dWx_total += dWx
        dWh_total += dWh
        dbh_total += dbh
    return dWx_total, dWh_total, dWy, dbh_total, dby

# Update Weights
def update_parameters(learning_rate, dWx, dWh, dWy, dbh, dby):
    global Wx, Wh, Wy, bh, by
    Wx -= learning_rate * dWx
    Wh -= learning_rate * dWh
    Wy -= learning_rate * dWy
    bh -= learning_rate * dbh
    by -= learning_rate * dby
inputs = [np.array([1, 0]), np.array([0, 1]), np.array([1, 1])]
target = np.array([[1]])
learning_rate = 0.01

for epoch in range(100):
    y, h = rnn_forward(inputs)
    dWx, dWh, dWy, dbh, dby = rnn_backward(inputs, target, y, h)     
    update_parameters(learning_rate, dWx, dWh, dWy, dbh, dby)
    if epoch % 10 == 0:
       print(f'Epoch {epoch}, Loss: {np.mean((y - target) ** 2)}')

Epoch 0, Loss: 0.9995626668420855
Epoch 10, Loss: 0.8173901496644235
Epoch 20, Loss: 0.6683731130141761
Epoch 30, Loss: 0.5464768997241599
Epoch 40, Loss: 0.4467667561357531
Epoch 50, Loss: 0.36520702508032155
Epoch 60, Loss: 0.29849700365063636
Epoch 70, Loss: 0.24393678246407355
Epoch 80, Loss: 0.19931760772663204
Epoch 90, Loss: 0.16283229743959962
