In [4]:
import numpy as np
from tqdm import tqdm

class RNN:
    def __init__(self, x, y, hidden_units):
        self.x = x # shape [samples, timesteps, features]
        self.y = y # shape [samples, outputs]
        self.hidden_units = hidden_units
        self.Wx = np.random.randn(self.hidden_units, self.x.shape[2])
        self.Wh = np.random.randn(self.hidden_units, self.hidden_units)
        self.Wy = np.random.randn(self.y.shape[1],self.hidden_units)
    
    def cell(self, xt, ht_1):
        ht = np.tanh(np.dot(self.Wx,xt.reshape(1,1)) + np.dot(self.Wh,ht_1))
        yt = np.dot(self.Wy,ht)
        return ht, yt
        
    def forward(self, sample):
        sample_x, sample_y = self.x[sample], self.y[sample]
        ht = np.zeros((self.hidden_units,1)) # first hidden state is zeros vector
        self.hidden_states = [ht] # collection of hidden states for each sample
        self.inputs = [] # collection of inputs for each sample
        for step in range(len(sample_x)):
            ht, yt = self.cell(sample_x[step],ht)
            self.inputs.append(sample_x[step].reshape(1,1))
            self.hidden_states.append(ht)
        self.error = yt - sample_y
        self.loss = 0.5*self.error**2
        self.yt = yt

    
    def backward(self):
        n = len(self.inputs)
        dyt = self.error.reshape(-1, 1)  # Reshape dyt to column vector
        ht_last = self.hidden_states[-1].reshape(-1, 1)  # Reshape last hidden state to column vector
        dWy = np.dot(dyt, ht_last.T)  # dyt/dWy
        dht = np.dot(self.Wy.T, dyt)  # dL/dht = dL/dyt * dyt/dht, where ht = tanh(Wx*xt + Wh*ht))
        dWx = np.zeros(self.Wx.shape)
        dWh = np.zeros(self.Wh.shape)
        # BPTT
        for step in reversed(range(n)):
            temp = (1 - self.hidden_states[step + 1] ** 2) * dht  # dL/dtanh = dL/dyt * dyt/dht * dht/dtanh
            dWx += np.dot(temp, self.inputs[step].T)  # dL/dWx = dL/dyt * dyt/dht * dht/dtanh * dtanh/dWx
            dWh += np.dot(temp, self.hidden_states[step].T)  # dL/dWh = dL/dyt * dyt/dht * dht/dtanh * dtanh/dWh

            dht = np.dot(self.Wh, temp)  # dL/dht-1 = dL/dht * (1 - ht+1^2) * Whh
        dWy = np.clip(dWy, -1, 1)
        dWx = np.clip(dWx, -1, 1)
        dWh = np.clip(dWh, -1, 1)
        self.Wy -= self.lr * dWy
        self.Wx -= self.lr * dWx
        self.Wh -= self.lr * dWh

        
    def train(self, epochs, learning_rate):
        self.Ovr_loss = []
        self.lr = learning_rate
        for epoch in tqdm(range(epochs)):
            for sample in range(self.x.shape[0]):
                self.forward(sample)
                self.backward()
            self.Ovr_loss.append(np.squeeze(self.loss / self.x.shape[0]))
            self.loss = 0
            
    def test(self,x,y):
        self.x = x
        self.y = y
        self.outputs = []
        for sample in range(len(x)):
            self.forward(sample)
            self.outputs.append(self.yt)
            
# Create sequence data
sequence = np.array([1,3,2,3,4,3,8])
X = sequence[:-1].reshape(1, -1, 1)  # Reshape to [samples, timesteps, features]
y = sequence[1:].reshape(1, -1)      # Reshape to [samples, outputs]

# Initialize and train RNN
rnn = RNN(X, y, hidden_units=6)
rnn.train(epochs=5, learning_rate=0.01)

# Print weights for each layer in the last iteration
print("Weights Wx for each layer:")
for i, wx in enumerate(rnn.Wx):
    print(f"Layer {i+1}: {wx}")

print("\nWeights Wh for each layer:")
for i, wh in enumerate(rnn.Wh):
    print(f"Layer {i+1}: {wh}")

print("\nWeights Wy:")
print(rnn.Wy)


  0%|          | 0/5 [00:00<?, ?it/s]


ValueError: shapes (6,6) and (1,6) not aligned: 6 (dim 1) != 1 (dim 0)