In [14]:
import numpy as np

def train_xor_rnn(X, Y, hidden_size=2, epochs=25, learning_rate=0.1):
    """
    Train an RNN on the XOR problem.
    
    :param X: Input data - sequence of bits.
    :param Y: Target output data.
    :param hidden_size: Number of neurons in the hidden layer.
    :param epochs: Number of epochs to train for.
    :param learning_rate: Learning rate for the optimizer.
    :return: Trained weight and bias parameters.
    """
    
    # Initialize weights
    Wxh = np.random.randn(hidden_size, 1)  # Weight matrix for input to hidden
    Whh = np.random.randn(hidden_size, hidden_size)  # Weight matrix for hidden to hidden
    Why = np.random.randn(1, hidden_size)  # Weight matrix for hidden to output
    bh = np.zeros((hidden_size, 1))  # Bias for hidden layer
    by = np.zeros((1, 1))  # Bias for output layer

    # Sigmoid activation function and its derivative
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_prime(x):
        return sigmoid(x) * (1 - sigmoid(x))

    # Training loop
    for epoch in range(epochs):
        total_loss = 0

        for i in range(len(X)):
            # Forward pass
            h = np.zeros((hidden_size, 1))  # Reset the hidden state at the start of each sequence
            y_pred = None

            for t in range(2):  # Loop through the sequence
                x = X[i][t].reshape(-1, 1)  # Get the input for this timestep

                # Update the hidden state
                h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)

                # Calculate the output
                y_pred = sigmoid(np.dot(Why, h) + by)

            # Calculate the loss using mean squared error
            loss = (y_pred - Y[i][0]) ** 2 / 2
            total_loss += loss[0]
            
            # Backward pass
            # Gradients for each parameter
            dWhy = np.zeros_like(Why)
            dWxh = np.zeros_like(Wxh)
            dWhh = np.zeros_like(Whh)
            dbh = np.zeros_like(bh)
            dby = np.zeros_like(by)
            dhnext = np.zeros_like(h)
            
            # Derivative of loss with respect to y_pred
            dy = (y_pred - Y[i][0]) * sigmoid_prime(y_pred)
            
            # Backpropagate through the network
            dWhy += np.dot(dy, h.T)
            dby += dy
            dh = np.dot(Why.T, dy)  # Backprop into h
            
            # Backprop through time (BPTT)
            for t in reversed(range(2)):
                dhraw = (1 - h * h) * dh  # Backprop through tanh nonlinearity
                dbh += dhraw
                dWxh += np.dot(dhraw, X[i][t].reshape(1, -1))
                dWhh += np.dot(dhraw, h.T)
                dhnext = np.dot(Whh.T, dhraw)

            # Clip gradients to mitigate exploding gradients
            for dparam in [dWhy, dWxh, dWhh, dbh, dby]:
                np.clip(dparam, -5, 5, out=dparam)

            # Update parameters using gradient descent
            Wxh -= learning_rate * dWxh
            Whh -= learning_rate * dWhh
            Why -= learning_rate * dWhy
            bh -= learning_rate * dbh
            by -= learning_rate * dby

        # Print loss every epoch
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(X)}")

# Example usage:
# XOR input and output sequences
X = np.array([[[0], [0]], [[0], [1]], [[1], [0]], [[1], [1]]], dtype=np.float32)
Y = np.array([[0], [1], [1], [0]], dtype=np.float32)

train_xor_rnn(X, Y)

Epoch 1/25, Loss: [0.12336938]
Epoch 2/25, Loss: [0.1229217]
Epoch 3/25, Loss: [0.12248081]
Epoch 4/25, Loss: [0.12204565]
Epoch 5/25, Loss: [0.12161534]
Epoch 6/25, Loss: [0.1211891]
Epoch 7/25, Loss: [0.12076631]
Epoch 8/25, Loss: [0.12034646]
Epoch 9/25, Loss: [0.11992915]
Epoch 10/25, Loss: [0.11951406]
Epoch 11/25, Loss: [0.11910097]
Epoch 12/25, Loss: [0.11868973]
Epoch 13/25, Loss: [0.11828027]
Epoch 14/25, Loss: [0.11787256]
Epoch 15/25, Loss: [0.11746664]
Epoch 16/25, Loss: [0.11706258]
Epoch 17/25, Loss: [0.1166605]
Epoch 18/25, Loss: [0.11626054]
Epoch 19/25, Loss: [0.11586288]
Epoch 20/25, Loss: [0.1154677]
Epoch 21/25, Loss: [0.11507521]
Epoch 22/25, Loss: [0.11468563]
Epoch 23/25, Loss: [0.11429916]
Epoch 24/25, Loss: [0.11391604]
Epoch 25/25, Loss: [0.11353648]
