In [1]:
import numpy as np

In [2]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [3]:
def dsigmoid(s):
    return s*(1-s)

In [4]:
def tanh(x):
    return np.tanh(x)

In [5]:
def dtanh(x):
    return 1.0 - np.tanh(x) ** 2

In [None]:
class RNN():
    def __init__(self, input_size, hidden_size, output_size, lr) -> None:
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.25
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.25
        self.Why = np.random.randn(output_size, input_size) * 0.25
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))
        self.lr = lr

    def forward(self, x, return_h = False):
        h = np.zeros((self.hidden_size, 1))
        for t in range(len(x)):
            h = tanh(np.dot(self.Wxh, x[t]) + np.dot(self.Whh, h) + self.bh)
        y_hat = np.dot(self.Why, h) + self.by
        if return_h:
            return y_hat, h
        else:
            return y_hat
        
    def loss(y, y_true):
        return 0.5 * (np.sum((y-y_true)**2))
    
    def backward(self, x, y, y_true, h):
        dWxh = np.zeros_like(self.Wxh)
        dWhh = np.zeros_like(self.Whh)
        dWhy = np.zeros_like(self.Why)
        dbh = np.zeros_like(self.bh)
        dby = np.zeros_like(self.by)
        dhh = np.zeros_like(h)

        dy = y - y_true
        dWhy = np.dot(dy, h.T)
        dby = dy

        for t in reversed(range(len(x))):
            dh = np.dot(self.Why.T, dy) + dhh
            dh_ = dtanh(h) * dh
            dbh += dh_

            dx = np.dot(self.Wxh.T, dh_)
            dWxh += np.dot(dh_, x[t].T)

            dhh = np.dot(self.Whh.T, dh_)
            dWhh += np.dot(dh_, h.T)

        return dWxh, dWhh, dWhy, dbh, dby
    
    def update(self, dWxh, dWhh, dWhy, dbh, dby):
        self.Wxh += self.lr * dWxh
        self.Whh += self.lr * dWhh
        self.Why += self.lr * dWhy
        self.bh += self.lr * dbh
        self.by += self.lr * dby

    def predict(y_hat):
        return y_hat