In [3]:
import numpy as np

In [7]:
class RNN:
    def __init__(self,input_size, hidden_size, output_size, sequence_length):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size 
        self.sequence_length = sequence_length

        self.Wxh = np.random.randn(input_size, hidden_size) * 0.01  
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01 
        self.Why = np.random.randn(hidden_size, output_size) * 0.01

        self.bh = np.zeros((1, hidden_size))
        self.by = np.zeros((1, output_size))

    def softmax(self, x):
        e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return e_x / np.sum(e_x, axis=1, keepdims=True)

    def tanh(self, x):
        return np.tanh(x)

    def tanh_derivative(self, x):
        return 1 - np.tanh(x)**2

    def forward(self, inputs):
        self.inputs = inputs
        self.hs = {}
        self.hs[-1] = np.zeros((inputs.shape[0], self.hidden_size))

        for t in range(self.sequence_length):
            x_t = inputs[:, t, :]
            self.hs[t] = self.tanh(x_t @ self.Wxh + self.hs[t-1] @ self.Whh + self.bh)

        output = self.hs[self.sequence_length - 1] @ self.Why + self.by
        return self.softmax(output)

    def compute_loss(self, y_true, y_pred):
        m = y_true.shape[0]
        correct_logprobs = -np.log(y_pred[range(m), y_true])
        return np.sum(correct_logprobs) / m

    def backward(self, y_true, y_pred, learning_rate=0.01):
        m = y_true.shape[0]
        dWhy = np.zeros_like(self.Why)
        dby = np.zeros_like(self.by)
        dWxh = np.zeros_like(self.Wxh)
        dWhh = np.zeros_like(self.Whh)
        dbh = np.zeros_like(self.bh)

        # Output gradient
        dy = y_pred
        dy[range(m), y_true] -= 1
        dy /= m

        # From final output to hidden state
        dWhy += self.hs[self.sequence_length - 1].T @ dy
        dby += np.sum(dy, axis=0, keepdims=True)

        dh_next = dy @ self.Why.T

        # Backpropagate through time
        for t in reversed(range(self.sequence_length)):
            dtanh = self.tanh_derivative(self.hs[t]) * dh_next
            dWxh += self.inputs[:, t, :].T @ dtanh
            dWhh += self.hs[t-1].T @ dtanh
            dbh += np.sum(dtanh, axis=0, keepdims=True)
            dh_next = dtanh @ self.Whh.T

        # Update weights
        self.Wxh -= learning_rate * dWxh
        self.Whh -= learning_rate * dWhh
        self.Why -= learning_rate * dWhy
        self.bh -= learning_rate * dbh
        self.by -= learning_rate * dby

    def train(self, X, y, epochs=100, learning_rate=0.01):
        for epoch in range(epochs):
            y_pred = self.forward(X)
            loss = self.compute_loss(y, y_pred)
            self.backward(y, y_pred, learning_rate)
            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        probs = self.forward(X)
        return np.argmax(probs, axis=1)

In [9]:
if __name__ == "__main__":
    # Simulated data: 100 samples, 5 time steps, 3 input features
    np.random.seed(42)
    X = np.random.randn(100, 5, 3)  # (batch, time, features)
    y = np.random.randint(0, 2, size=(100,))  # Binary classes (0 or 1)

    rnn = RNN(input_size=3, hidden_size=8, output_size=2, sequence_length=5)
    rnn.train(X, y, epochs=100, learning_rate=0.05)

    preds = rnn.predict(X)
    acc = np.mean(preds == y)
    print("Training accuracy:", acc)


Epoch 0, Loss: 0.6932
Epoch 10, Loss: 0.6924
Epoch 20, Loss: 0.6920
Epoch 30, Loss: 0.6917
Epoch 40, Loss: 0.6916
Epoch 50, Loss: 0.6915
Epoch 60, Loss: 0.6914
Epoch 70, Loss: 0.6914
Epoch 80, Loss: 0.6914
Epoch 90, Loss: 0.6914
Training accuracy: 0.53
