<a href="https://colab.research.google.com/github/michaelharold/ML-/blob/main/RNNXLSTMXGRU_Understanding_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

RNN Scratch

In [None]:
import numpy as np
def tanh(x):return np.tanh(x)
def dtanh(x):return 1.0-np.tanh(x)**2

In [None]:
seq=np.array([1.0,2.0,3.0,4.0])

In [None]:
wx=np.random.randn(1)*0.1
wh=np.random.randn(1)*0.1
bh=np.zeros(1)
Wy = np.random.randn(1)*0.1
by = np.zeros(1)
lr = 1e-2

In [None]:
def forward(xs):
  h_prev=np.zeros(1)
  hs,ys,as_=[],[],[]
  for x in xs:
    a=wx*x+wh*h_prev+bh
    h=tanh(a)
    y=Wy*h+by

    hs.append(h)
    ys.append(y)
    as_.append(a)
    h_prev=h
  return np.array(hs),np.array(ys),np.array(as_)




In [None]:
for epoch in range(200):
  hs,ys,as_=forward(seq[:-1])
  targets=seq[1:]
  loss=0.5*np.mean((ys.flatten()-targets)**2)
  dwx=0;dwh=0;dbh=0;dwy=0;dby=0
  dh_next=0
  for t in reversed(range(len(ys))):
    dy=(ys[t]-targets[t])
    dwy+=dy*hs[t]
    dby+=dy
    dh=dy*Wy+dh_next
    da=dh*dtanh(as_[t])
    dwx+=da*seq[t]
    dwh+=da*hs[t-1] if t>0 else 0.0
    dbh+=da
    dh_next=da*wh
    for param,dparam in [(wx,dwx),(wh,dwh),(bh,dbh),(Wy,dwy),(by,dby)]:
      param-=lr*dparam
    if epoch%50 ==0:
      print("epoch", epoch, "loss", loss)




epoch 0 loss 4.846201105237669
epoch 0 loss 4.846201105237669
epoch 0 loss 4.846201105237669
epoch 50 loss 0.15056756471476979
epoch 50 loss 0.15056756471476979
epoch 50 loss 0.15056756471476979
epoch 100 loss 0.0373963411731432
epoch 100 loss 0.0373963411731432
epoch 100 loss 0.0373963411731432
epoch 150 loss 0.005252438842250678
epoch 150 loss 0.005252438842250678
epoch 150 loss 0.005252438842250678


# LSTM

In [None]:
import numpy as np

In [None]:
def sigmoid(x): return 1/(1+np.exp(-x))
def dsig(x): return sigmoid(x)*(1-sigmoid(x))
def dtanh(x): return 1.0-np.tanh(x)**2

In [None]:
D=3
H=4

In [None]:
Wf = np.random.randn(D,H)*0.1; Uf = np.random.randn(H,H)*0.1; bf = np.zeros(H)
Wi = np.random.randn(D,H)*0.1; Ui = np.random.randn(H,H)*0.1; bi = np.zeros(H)
Wo = np.random.randn(D,H)*0.1; Uo = np.random.randn(H,H)*0.1; bo = np.zeros(H)
Wc = np.random.randn(D,H)*0.1; Uc = np.random.randn(H,H)*0.1; bc = np.zeros(H)


In [None]:
def lstm_frwd(x,h_prev,c_prev):
  af=x.dot(Wf)+h_prev.dot(Uf)+bf
  ai=x.dot(Wi)+h_prev.dot(Ui)+bi
  ao = x.dot(Wo) + h_prev.dot(Uo) + bo
  ag = x.dot(Wc) + h_prev.dot(Uc) + bc
  f=sigmoid(af)
  i=sigmoid(ai)
  o=sigmoid(ao)
  g=np.tanh(ag)
  c=f*c_prev+i*g
  h=o*np.tanh(c)
  cache = (x, h_prev, c_prev, f, i, o, g, c, af, ai, ao, ag)
  return h,c,cache



In [None]:
def lstm_backwrd(dh,dc,cache):
  (x, h_prev, c_prev, f, i, o, g, c, af, ai, ao, ag) = cache
  dtanh_c=o*(1-np.tanh(c)**2)
  # h = o * tanh(c)
  do=dh*np.tanh(c)
  dc_total=dc+dh*dtanh_c
  # c = f*c_prev + i*g
  df=dc_total*c_prev
  di=dc_total*g
  dg=dc_total*i
  dc_prev=dc_total*f
  daf=dsig(af)*df
  dai=di*dsig(ai)
  dao=do*dsig(ao)
  dag=dg*dtanh(ag)

  dwf=np.outer(x,daf)
  dwi=np.outer(x,dai)
  dwo=np.outer(x,dao)
  dwc=np.outer(x,dag)

  duf=np.outer(h_prev,daf)
  dui=np.outer(h_prev,dai)
  duo=np.outer(h_prev,dao)
  duc=np.outer(h_prev,dag)

  dbf=daf
  dbi=dai
  dbo=dao
  dbc=dag

  dx=(
      daf.dot(Wf.T)
      +dai.dot(Wi.T)
      +dao.dot(Wo.T)
      +dag.dot(Wc.T)
  )

  dh_prev=(
      daf.dot(Uf.T)
      +dai.dot(Ui.T)
      +dao.dot(Uo.T)
      +dag.dot(Uc.T)
  )
  return dx, dh_prev, dc_prev, \
           dwf, dwi, dwo, dwc, \
           duf, dui, duo, duc, \
           dbf, dbi, dbo, dbc


x = np.random.randn(D)
h0 = np.zeros(H)
c0 = np.zeros(H)

h1, c1, cache = lstm_frwd(x, h0, c0)

print("h1 shape:", h1.shape)
print("c1 shape:", c1.shape)


dh = np.random.randn(H)
dc = np.random.randn(H)

grads = lstm_backwrd(dh, dc, cache)

print("\nBackward pass outputs:")
print("dx shape:", grads[0].shape)
print("dh_prev shape:", grads[1].shape)
print("dc_prev shape:", grads[2].shape)
print("dWf shape:", grads[3].shape)


lr = 0.01

(dx, dh_prev, dc_prev,
 dWf, dWi, dWo, dWc,
 dUf, dUi, dUo, dUc,
 dbf, dbi, dbo, dbc) = grads


Wf -= lr * dWf
Wi -= lr * dWi
Wo -= lr * dWo
Wc -= lr * dWc

Uf -= lr * dUf
Ui -= lr * dUi
Uo -= lr * dUo
Uc -= lr * dUc

bf -= lr * dbf
bi -= lr * dbi
bo -= lr * dbo
bc -= lr * dbc












h1 shape: (4,)
c1 shape: (4,)

Backward pass outputs:
dx shape: (3,)
dh_prev shape: (4,)
dc_prev shape: (4,)
dWf shape: (3, 4)


# GRU

In [3]:
import numpy as np
def sig(x):
  return 1/(1+np.exp(-x))
def dsig(x):
  return sig(x)*(1-sig(x))
def dtanh(x):
  return 1-np.tanh(x)**2

class GRUCell:
  def __init__(self,input_size,hidden_size):
    self.input_size=input_size
    self.hidden_size=hidden_size
    self.H=H

    self.Wz = np.random.randn(H, D) * 0.1
    self.Uz = np.random.randn(H, H) * 0.1
    self.bz = np.zeros((H, 1))

    self.Wr = np.random.randn(H, D) * 0.1
    self.Ur = np.random.randn(H, H) * 0.1
    self.br = np.zeros((H, 1))

    self.Wh = np.random.randn(H, D) * 0.1
    self.Uh = np.random.randn(H, H) * 0.1
    self.bh = np.zeros((H, 1))

    def forward(self,x,h_prev):
      self.x=x
      self.h_prev=h_prev

      self.az=self.Wz.dot(x)+self.Uz.dot(h_prev)+self.bz
      self.ar=self.Wr.dot(x)+self.Ur.dot(h_prev)+self.br
      self.ah=self.Wh.dot(x)+self.Uh.dot(h_prev)+self.bh
      self.z=sig(self.az)
      self.r=sig(self.ar)
      self.h_hat=np.tanh(self.ah)
      self.h=self.z*h_prev+(1-self.z)*self.h_hat
      return self.h

    def backward(self,dh):
      H=self.H
      x=self.x
      h_prev=self.h_prev
      dz = dh * (self.h_tilde - h_prev)
      da_z = dz * dsig(self.az)

      dh_tilde = dh * self.z
      da_h = dh_tilde * dtanh(self.ah)
      dr = (self.Uh.T @ da_h) * h_prev
      da_r = dr * dsig(self.ar)

      dh_prev=(
          dh*(1-self.z)
          +(self.Uh.T @ da_h)*self.r
          +self.Wr.T @ da_r
          +self.Wz.T @ da_z
      )
      self.dWz = da_z @ x.T
      self.dUz = da_z @ h_prev.T
      self.dbz = da_z

      self.dWr = da_r @ x.T
      self.dUr = da_r @ h_prev.T
      self.dbr = da_r

      self.dWh = da_h @ x.T
      self.dUh = da_h @ (self.r * h_prev).T
      self.dbh = da_h

      return dh_prev

    class GRUModel:
      def __init__(self,D,H,O):
        self.gru=GRUCell(D,H)
        self.Wy=np.random.randn(H,O)*0.1
        self.by=np.zeros((O,1))

      def forward(self,xs):
        h=np.zeros((self.gru.H,1))
        self.cache=[]
        for x in xs:
          h=self.gru.forward(x,h)
          y=self.Wy@h+self.by
          self.cache.append((h,y))
        return [c[1] for c in self.cache]

      def backward(self,dys,lr=0.01):

        dWy = np.zeros_like(self.Wy)
        dby = np.zeros_like(self.by)

        dh = np.zeros((self.gru.H, 1))

        for t in reversed(range(len(dys))):
            h, y = self.cache[t]

            dy = dys[t]
            dWy += dy @ h.T
            dby += dy
            dh += self.Wy.T @ dy
            dh = self.gru.backward(dh)
        self.Wy -= lr * dWy
        self.by -= lr * dby


        for param in ["Wz","Uz","bz","Wr","Ur","br","Wh","Uh","bh"]:
            p = getattr(self.gru, param)
            dp = getattr(self.gru, "d"+param)
            setattr(self.gru, param, p - lr * dp)





