- At the heart of LSTM network is the memory cell, which retains information over time.
- It is manipulated by three type of gates:
  - Forget gate: Decides what information to discard from the cell state.
  - Input gate: Determines which neew information to add the cell state.
  - The candidate itself represents what the LSTM might add to the cell state.
  - Output gate: Controls the ouptu based on the updated cell state.
  - The tanh function ensures that the values of the candidate cell state range between -1 and 1  

In [7]:
import numpy as np

class LSTM:
    def __init__(self, input_size, hidden_size):
        self.input_size = input_size # Number of input features (dimensionality of each input vector)
        self.hidden_size = hidden_size # The number of hiddden units in the LSTM (size of hidden state)

        # Initialize weights and biases for forget, input, cell, and output gates
        self.Wf = np.random.randn(hidden_size, hidden_size + input_size) # Forget gate
        self.Wi = np.random.randn(hidden_size, hidden_size + input_size) # Input gate
        self.WC = np.random.randn(hidden_size, hidden_size + input_size) # Cell gate
        self.Wo = np.random.randn(hidden_size, hidden_size + input_size) # Output gate

        # Biases for each gate initialized to zeros. 
        self.bf = np.zeros((hidden_size, 1))
        self.bi = np.zeros((hidden_size, 1))
        self.bC = np.zeros((hidden_size, 1))
        self.bo = np.zeros((hidden_size, 1))

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def tanh(self, z):
        return np.tanh(z)

    # Calculates the next hidden state (h_t) and cell state (C_t) based on current input and prev cell state
    # x: The current input vector at time step t 
    # h_prev : the hidden state from prev step (t-1)
    # C_prev : the cell state froom the prev step (t-1)
    def forward(self, x, h_prev, C_prev):
        # Concatenate hidden state and input
        concat = np.vstack((h_prev, x))

        # Forget gate
        # this gate controls what portion of the previous cell state should be forgotten 
        # It takes the concatenated input of (h_prev, x) nd multiplies with forgate gate weights (Wf) and adds the forget gate bias (bf)
        f_t = self.sigmoid(np.dot(self.Wf, concat) + self.bf)

        # Input gate
        # Deternmines which gate info will be added to the cell state 
        # It applies the same operation (weights and biases, followed by sigmoid)
        i_t = self.sigmoid(np.dot(self.Wi, concat) + self.bi)
        C_tilda = self.tanh(np.dot(self.WC, concat) + self.bC)

        # Cell state update
        
        C_t = f_t * C_prev + i_t * C_tilda

        # Output gate
        # determines the next hidden state by deciding which parts of the cell state to output. 
        o_t = self.sigmoid(np.dot(self.Wo, concat) + self.bo)

        # Hidden state update
        h_t = o_t * self.tanh(C_t)

        return h_t, C_t

In [8]:
# Example usage 
input_size = 3 # input frature size
hidden_size = 5 # Size of the LSTM's hidden state
lstm = LSTM(input_size, hidden_size)

In [9]:
# Dummy input data 
x_t = np.random.randn(input_size, 1)
h_prev = np.zeros((hidden_size, 1))
C_prev = np.zeros((hidden_size, 1))



In [10]:
# Forward passs 
h_t, C_t = lstm.forward(x_t, h_prev, C_prev)

print("Hidden state: ", h_t)
print("Cell state: ", C_t)

Hidden state:  [[-0.08083578]
 [-0.59070795]
 [ 0.37315557]
 [ 0.29706343]
 [ 0.10785178]]
Cell state:  [[-0.64143477]
 [-0.9282291 ]
 [ 0.75818771]
 [ 0.83166596]
 [ 0.12467049]]
