In [1]:
import numpy as np

class ScratchSimpleRNNClassifier:
    def __init__(self, n_features, n_nodes, n_output, activation=np.tanh):
        """
        Initialize the RNN classifier.

        Parameters:
        - n_features: Number of input features
        - n_nodes: Number of nodes in the RNN layer
        - n_output: Number of output classes
        - activation: Activation function (default: tanh)
        """
        self.n_features = n_features
        self.n_nodes = n_nodes
        self.n_output = n_output
        self.activation = activation

        # Initialize weights and biases
        self.Wx = np.random.randn(n_features, n_nodes) * 0.01
        self.Wh = np.random.randn(n_nodes, n_nodes) * 0.01
        self.B = np.zeros(n_nodes)
        self.W_out = np.random.randn(n_nodes, n_output) * 0.01
        self.B_out = np.zeros(n_output)

    def forward(self, X):
        """
        Perform forward propagation through the RNN.

        Parameters:
        - X: Input data of shape (batch_size, n_sequences, n_features)

        Returns:
        - Output probabilities after the final fully connected layer
        """
        batch_size, n_sequences, n_features = X.shape

        # Initialize hidden state h0 to zeros
        h_t = np.zeros((batch_size, self.n_nodes))
        self.h_states = []  # To store all hidden states for each time step

        for t in range(n_sequences):
            x_t = X[:, t, :]  # Extract input at time t

            # Compute activation and hidden state
            a_t = np.dot(x_t, self.Wx) + np.dot(h_t, self.Wh) + self.B
            h_t = self.activation(a_t)

            # Store the hidden state
            self.h_states.append(h_t)

        # Use the last hidden state for classification (can be modified as needed)
        h_last = self.h_states[-1]

        # Fully connected layer to output
        z_out = np.dot(h_last, self.W_out) + self.B_out

        # Softmax for output probabilities
        y_pred = self._softmax(z_out)

        return y_pred

    def _softmax(self, x):
        """
        Compute the softmax function for the output layer.

        Parameters:
        - x: Input logits

        Returns:
        - Softmax probabilities
        """
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Stability improvement
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Example usage:
n_features = 3  # Number of input features
n_nodes = 5     # Number of RNN nodes
n_output = 2    # Number of output classes
batch_size = 4
n_sequences = 6

# Initialize the model
rnn = ScratchSimpleRNNClassifier(n_features, n_nodes, n_output)

# Dummy input data (batch_size, n_sequences, n_features)
X = np.random.randn(batch_size, n_sequences, n_features)

# Forward propagation
output = rnn.forward(X)
print("Output probabilities:", output)


Output probabilities: [[0.49995477 0.50004523]
 [0.50000342 0.49999658]
 [0.50001503 0.49998497]
 [0.49996735 0.50003265]]


In [2]:
import numpy as np

# Given inputs
x = np.array([[[1, 2], [2, 3], [3, 4]]]) / 100  # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]]) / 100  # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]]) / 100  # (n_nodes, n_nodes)
b = np.array([1, 1, 1, 1])  # (n_nodes,)

batch_size = x.shape[0]  # 1
n_sequences = x.shape[1]  # 3
n_features = x.shape[2]  # 2
n_nodes = w_x.shape[1]  # 4

# Initialize hidden state
h = np.zeros((batch_size, n_nodes))  # (batch_size, n_nodes)

# Forward propagation for the small array
for t in range(n_sequences):
    x_t = x[:, t, :]  # Input at time step t
    a_t = np.dot(x_t, w_x) + np.dot(h, w_h) + b  # Linear transformation
    h = np.tanh(a_t)  # Apply tanh activation

# Final hidden state
h


array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])

In [3]:
import numpy as np

class ScratchSimpleRNNClassifier:
    def __init__(self, n_features, n_nodes, n_output, activation=np.tanh, learning_rate=0.01):
        """
        Initialize the RNN classifier.

        Parameters:
        - n_features: Number of input features
        - n_nodes: Number of nodes in the RNN layer
        - n_output: Number of output classes
        - activation: Activation function (default: tanh)
        - learning_rate: Learning rate for weight updates
        """
        self.n_features = n_features
        self.n_nodes = n_nodes
        self.n_output = n_output
        self.activation = activation
        self.learning_rate = learning_rate

        # Initialize weights and biases
        self.Wx = np.random.randn(n_features, n_nodes) * 0.01
        self.Wh = np.random.randn(n_nodes, n_nodes) * 0.01
        self.B = np.zeros(n_nodes)
        self.W_out = np.random.randn(n_nodes, n_output) * 0.01
        self.B_out = np.zeros(n_output)

    def forward(self, X):
        """
        Perform forward propagation through the RNN.

        Parameters:
        - X: Input data of shape (batch_size, n_sequences, n_features)

        Returns:
        - Output probabilities after the final fully connected layer
        """
        batch_size, n_sequences, n_features = X.shape

        # Initialize hidden state h0 to zeros
        h_t = np.zeros((batch_size, self.n_nodes))
        self.h_states = []  # To store all hidden states for each time step
        self.a_states = []  # To store all activation states for backpropagation

        for t in range(n_sequences):
            x_t = X[:, t, :]  # Extract input at time t

            # Compute activation and hidden state
            a_t = np.dot(x_t, self.Wx) + np.dot(h_t, self.Wh) + self.B
            h_t = self.activation(a_t)

            # Store the hidden state and activation state
            self.h_states.append(h_t)
            self.a_states.append(a_t)

        # Use the last hidden state for classification (can be modified as needed)
        h_last = self.h_states[-1]

        # Fully connected layer to output
        z_out = np.dot(h_last, self.W_out) + self.B_out

        # Softmax for output probabilities
        y_pred = self._softmax(z_out)

        return y_pred

    def backward(self, X, y_true, y_pred):
        """
        Perform backpropagation to compute gradients and update weights.

        Parameters:
        - X: Input data of shape (batch_size, n_sequences, n_features)
        - y_true: True labels of shape (batch_size, n_output)
        - y_pred: Predicted probabilities from forward propagation
        """
        batch_size, n_sequences, n_features = X.shape

        # Compute the output layer gradient
        dL_dz_out = (y_pred - y_true) / batch_size  # Gradient of loss w.r.t. z_out

        # Gradients for W_out and B_out
        h_last = self.h_states[-1]
        dL_dW_out = np.dot(h_last.T, dL_dz_out)
        dL_dB_out = np.sum(dL_dz_out, axis=0)

        # Backpropagate through time for RNN layer
        dL_dh_t = np.dot(dL_dz_out, self.W_out.T)  # Initial gradient from output layer

        # Initialize gradients for Wx, Wh, and B
        dL_dWx = np.zeros_like(self.Wx)
        dL_dWh = np.zeros_like(self.Wh)
        dL_dB = np.zeros_like(self.B)

        for t in reversed(range(n_sequences)):
            x_t = X[:, t, :]
            a_t = self.a_states[t]
            h_prev = self.h_states[t - 1] if t > 0 else np.zeros_like(self.h_states[0])

            # Gradient of tanh activation
            dL_da_t = dL_dh_t * (1 - np.tanh(a_t) ** 2)

            # Gradients for Wx, Wh, and B
            dL_dWx += np.dot(x_t.T, dL_da_t)
            dL_dWh += np.dot(h_prev.T, dL_da_t)
            dL_dB += np.sum(dL_da_t, axis=0)

            # Propagate the gradient to the previous time step
            dL_dh_t = np.dot(dL_da_t, self.Wh.T)

        # Update weights and biases
        self.Wx -= self.learning_rate * dL_dWx
        self.Wh -= self.learning_rate * dL_dWh
        self.B -= self.learning_rate * dL_dB
        self.W_out -= self.learning_rate * dL_dW_out
        self.B_out -= self.learning_rate * dL_dB_out

    def _softmax(self, x):
        """
        Compute the softmax function for the output layer.

        Parameters:
        - x: Input logits

        Returns:
        - Softmax probabilities
        """
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Stability improvement
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Example usage:
n_features = 3  # Number of input features
n_nodes = 5     # Number of RNN nodes
n_output = 2    # Number of output classes
batch_size = 4
n_sequences = 6

# Initialize the model
rnn = ScratchSimpleRNNClassifier(n_features, n_nodes, n_output)

# Dummy input data (batch_size, n_sequences, n_features)
X = np.random.randn(batch_size, n_sequences, n_features)

# Dummy true labels (one-hot encoded)
y_true = np.eye(n_output)[np.random.choice(n_output, batch_size)]

# Forward propagation
output = rnn.forward(X)

# Backward propagation
rnn.backward(X, y_true, output)

print("Updated weights Wx:", rnn.Wx)
print("Updated weights Wh:", rnn.Wh)


Updated weights Wx: [[-0.00906779 -0.03115919 -0.00489831  0.02224527  0.01096823]
 [ 0.00556174 -0.01704299  0.00776704 -0.0189186   0.02670235]
 [-0.00237759 -0.01454756 -0.00153554  0.00618662  0.00561791]]
Updated weights Wh: [[-1.54403697e-03 -9.75842758e-03 -4.82742345e-03 -1.17631678e-03
  -1.83959392e-02]
 [-2.38838167e-04  2.21249572e-03  9.32489828e-04 -9.32711316e-03
   8.65597328e-03]
 [-2.85852162e-03 -6.65136276e-04  7.08011433e-03  2.52844326e-02
  -6.26597809e-03]
 [-6.42058883e-04 -1.17437681e-03  3.55737406e-03 -7.27818626e-03
  -2.98792290e-03]
 [-1.07230527e-02 -2.15891955e-03 -3.52296271e-04 -1.62774863e-02
  -1.29308031e-05]]
