In [15]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

**LSTM from scratch**


In [16]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def disgmoid(x):
    s = sigmoid(x)
    return s * (1-s)
def tanh(x):
    return np.tanh(x)
def dtanh(x):
    return 1 - np.tanh(x) ** 2

In [17]:
class LSTM():
    def __init__(self,input_size,hidden_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.W = np.random.randn(4*hidden_size,input_size)*0.1
        self.U = np.random.randn(4*hidden_size, hidden_size) * 0.1
        self.b = np.zeros((4 * hidden_size, 1))
    def forward(self,x):
        self.cache = []
        h = np.zeros((self.hidden_size, 1))        
        c = np.zeros((self.hidden_size,1))
        for x_seq in x:
            x_seq = x_seq.reshape(-1, 1)
            z = np.dot(self.W,x_seq) + np.dot(self.U,h) + self.b
            i = sigmoid(z[0:self.hidden_size])
            f = sigmoid(z[self.hidden_size:self.hidden_size*2])
            o = sigmoid(z[self.hidden_size*2 : self.hidden_size*3])
            g = tanh(z[self.hidden_size*3 : self.hidden_size*4])
            c = f * c + i * g
            h = o * tanh(c)
            self.cache.append((h,c,i,f,o,g,x_seq))
        return h,c
    def backward(self,dh_next, dc_next, learning_rate=0.01):
        dW = np.zeros_like(self.W)
        dU = np.zeros_like(self.U)
        db = np.zeros_like(self.b)
        for t in reversed(range(len(self.cache))):
            h,c,i,f,o,g,x_seq = self.cache[t]
            do = dh_next * tanh(c)
            dc = dh_next * o * (1 - np.tanh(c)**2) + dc_next
            di = dc * g
            dg = dc * i
            df = dc * (self.cache[t-1][1] if t > 0 else 0)
            dwi = di * i * (1-i)
            dwf = df * f * (1-f)
            dwo = do * o * (1-o)
            dwg = dg * (1-g**2)
            dz = np.vstack((dwi, dwf, dwo, dwg))
            dW += dz @ x_seq.T
            dU += dz @ (self.cache[t-1][0].T if t > 0 else np.zeros_like(h).T)
            db += dz
            dh_next = self.U.T @ dz
            dc_next = dc * (self.cache[t][3])
        self.W = self.W - learning_rate * dW 
        self.U = self.U - learning_rate * dU 
        self.b = self.b - learning_rate * db
        

**Train Loop**

In [None]:
np.random.seed(0)
lstm = LSTM(input_size=1, hidden_size=20)

def generate_data():
    seq = np.random.randint(0, 2, size=(5, 1))
    target = np.sum(seq)  
    return seq, target

for epoch in range(500):
    x, target = generate_data()
    h, c = lstm.forward(x)
    pred = h[0][0]
    loss = (pred - target)**2
    dloss_dh = np.zeros_like(h)
    dloss_dh[0][0] = 2 * (pred - target)
    lstm.backward(dloss_dh, np.zeros_like(c), learning_rate=0.01)
    if epoch % 50 == 0:
        print(f"epoch={epoch}, loss={loss:.4f}, pred={pred:.2f}, target={target}")
print("\nTraining Complete!")
C:\Users\tabarak\Downloads\Ai project\LSTM from scratch.ipynb

epoch=0, loss=4.0743, pred=-0.02, target=2
epoch=50, loss=6.6531, pred=0.42, target=3
epoch=100, loss=1.9873, pred=0.59, target=2
epoch=150, loss=0.0669, pred=0.74, target=1
epoch=200, loss=9.9354, pred=0.85, target=4
epoch=250, loss=0.0193, pred=0.86, target=1
epoch=300, loss=0.0126, pred=0.89, target=1
epoch=350, loss=1.1772, pred=0.91, target=2
epoch=400, loss=0.0031, pred=0.94, target=1
epoch=450, loss=0.0054, pred=0.93, target=1

Training Complete!
