In [1]:
import  numpy as np

RNN Layer

In [8]:
# Dense layer
class Layer_RNN:
    # Layer initialization
    def __init__(self, n_inputs, n_neurons, n_outputs):
        # Initialize weights and biases
        self.weights_input = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.weights_hidden = 0.01 * np.random.randn(n_neurons, n_neurons)
        self.weights_output = 0.01 * np.random.randn(n_neurons, n_outputs)

        # Bias vectors
        self.bias_hidden = np.zeros((1, n_neurons))
        self.bias_output = np.zeros((1, n_outputs))

        # Store hidden state
        self.hidden_state = np.zeros((1, n_neurons))

       

    # Forward pass
    def forward(self, inputs):
        self.inputs = inputs
        self.hidden_state = np.tanh(
            np.dot(inputs, self.weights_input) +
            np.dot(self.hidden_state, self.weights_hidden) +
            self.bias_hidden
        )
        self.output = np.dot(self.hidden_state, self.weights_output) + self.bias_output

    # Backward pass
    def backward(self, dvalues):
        # Gradient for output layer
        self.dweights_output = np.dot(self.hidden_state.T, dvalues)
        self.dbias_output = np.sum(dvalues, axis=0, keepdims=True)

        # Gradient for hidden layer
        dhidden = np.dot(dvalues, self.weights_output.T) * (1 - self.hidden_state ** 2)
        self.dweights_input = np.dot(self.inputs.T, dhidden)
        self.dweights_hidden = np.dot(self.hidden_state.T, dhidden)
        self.dbias_hidden = np.sum(dhidden, axis=0, keepdims=True)

        # Gradient for next time step
        self.dinputs = np.dot(dhidden, self.weights_input.T)


Tanh for RNN hidden states

In [3]:
class Activation_Tanh:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.tanh(inputs)

    def backward(self, dvalues):
        self.dinputs = dvalues * (1 - self.output ** 2)

In [4]:
class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        self.output = exp_values / np.sum(exp_values, axis=1, keepdims=True)

    def backward(self, dvalues):
        self.dinputs = dvalues.copy()

In [5]:
class Loss_CategoricalCrossentropy:
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        return -np.log(correct_confidences)

    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        labels = len(dvalues[0])
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]

        self.dinputs = -y_true / dvalues
        self.dinputs /= samples

In [6]:
class Activation_Softmax_Loss_CategoricalCrossentropy:
    def __init__(self):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossentropy()

    def forward(self, inputs, y_true):
        self.activation.forward(inputs)
        self.output = self.activation.output
        return self.loss.forward(self.output, y_true)

    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)

        self.dinputs = dvalues.copy()
        self.dinputs[range(samples), y_true] -= 1
        self.dinputs /= samples

In [7]:
class Optimizer_Adam:
    def __init__(self, learning_rate=0.001, decay=0., epsilon=1e-7, beta_1=0.9, beta_2=0.999):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.epsilon = epsilon
        self.beta_1 = beta_1
        self.beta_2 = beta_2

    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate = self.learning_rate * (1. / (1. + self.decay * self.iterations))

    def update_params(self, layer):
        if not hasattr(layer, 'weight_cache'):
            layer.weight_momentums = np.zeros_like(layer.weights_input)
            layer.weight_cache = np.zeros_like(layer.weights_input)
            layer.bias_momentums = np.zeros_like(layer.bias_hidden)
            layer.bias_cache = np.zeros_like(layer.bias_hidden)

        layer.weight_momentums = self.beta_1 * layer.weight_momentums + (1 - self.beta_1) * layer.dweights_input
        layer.bias_momentums = self.beta_1 * layer.bias_momentums + (1 - self.beta_1) * layer.dbias_hidden

        weight_momentums_corrected = layer.weight_momentums / (1 - self.beta_1 ** (self.iterations + 1))
        bias_momentums_corrected = layer.bias_momentums / (1 - self.beta_1 ** (self.iterations + 1))

        layer.weight_cache = self.beta_2 * layer.weight_cache + (1 - self.beta_2) * layer.dweights_input ** 2
        layer.bias_cache = self.beta_2 * layer.bias_cache + (1 - self.beta_2) * layer.dbias_hidden ** 2

        weight_cache_corrected = layer.weight_cache / (1 - self.beta_2 ** (self.iterations + 1))
        bias_cache_corrected = layer.bias_cache / (1 - self.beta_2 ** (self.iterations + 1))

        layer.weights_input -= self.current_learning_rate * weight_momentums_corrected / (np.sqrt(weight_cache_corrected) + self.epsilon)
        layer.bias_hidden -= self.current_learning_rate * bias_momentums_corrected / (np.sqrt(bias_cache_corrected) + self.epsilon)

    def post_update_params(self):
        self.iterations += 1

In [13]:
# Create RNN layer
rnn_layer = Layer_RNN(n_inputs=3, n_neurons=5, n_outputs=2)

# Activation & Loss
activation_softmax_loss = Activation_Softmax_Loss_CategoricalCrossentropy()

# Optimizer
optimizer = Optimizer_Adam(learning_rate=0.01, decay=1e-3)

# Dummy data
X = np.random.randn(10, 3)  # 10 samples, 3 features
y = np.random.randint(0, 2, size=(10,))  # 10 target labels (0 or 1)

for epoch in range(1000):
    # Forward pass
    rnn_layer.forward(X)
    loss = activation_softmax_loss.forward(rnn_layer.output, y)

    # Get predictions (class with the highest probability)
    predictions = np.argmax(activation_softmax_loss.output, axis=1)

    # Calculate accuracy
    accuracy = np.mean(predictions == y)

    # Backward pass
    activation_softmax_loss.backward(activation_softmax_loss.output, y)
    rnn_layer.backward(activation_softmax_loss.dinputs)

    # Update weights
    optimizer.pre_update_params()
    optimizer.update_params(rnn_layer)
    optimizer.post_update_params()

    print(f"Epoch:{epoch}, accuracy: {accuracy:.4f}, Loss: {loss.mean()}")


Epoch:0, accuracy: 0.1000, Loss: 0.6932619461532649
Epoch:1, accuracy: 0.5000, Loss: 0.6930317637548876
Epoch:2, accuracy: 0.6000, Loss: 0.6928021588520837
Epoch:3, accuracy: 0.7000, Loss: 0.6925736288067998
Epoch:4, accuracy: 0.7000, Loss: 0.6923464080641659
Epoch:5, accuracy: 0.7000, Loss: 0.6921207991532565
Epoch:6, accuracy: 0.7000, Loss: 0.6918971260156808
Epoch:7, accuracy: 0.7000, Loss: 0.6916756916810521
Epoch:8, accuracy: 0.7000, Loss: 0.6914567799975709
Epoch:9, accuracy: 0.7000, Loss: 0.6912406396393361
Epoch:10, accuracy: 0.7000, Loss: 0.6910274483096227
Epoch:11, accuracy: 0.7000, Loss: 0.6908173069142809
Epoch:12, accuracy: 0.7000, Loss: 0.6906102809059232
Epoch:13, accuracy: 0.7000, Loss: 0.6904063529682511
Epoch:14, accuracy: 0.7000, Loss: 0.6902053909294052
Epoch:15, accuracy: 0.7000, Loss: 0.6900072050889414
Epoch:16, accuracy: 0.7000, Loss: 0.6898115822240688
Epoch:17, accuracy: 0.7000, Loss: 0.6896182967894782
Epoch:18, accuracy: 0.7000, Loss: 0.689427133378737
Epoc

In [17]:
X[2]

array([ 0.62614319,  0.75376038, -0.03483344])

In [18]:
y[2]

1

In [19]:
rnn_layer.forward(np.array([0.7, 0.7, -0.2]))
loss = activation_softmax_loss.forward(rnn_layer.output, y)

# Get predictions (class with the highest probability)
predictions = np.argmax(activation_softmax_loss.output, axis=1)

print(predictions)

[1 1 1 1 1 1 1 1 1 1]
