<a href="https://colab.research.google.com/github/shusank8/SEQUENCEModels/blob/main/GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
print('SIMPLE Gated Recurrent Network; BY SHUSANKET BASYAL')

SIMPLE Gated Recurrent Network; BY SHUSANKET BASYAL


In [2]:
# LOADING THE DATASET
# DATASET IS THE SHORT JOKES FROM KAGGLE
import kagglehub
path = kagglehub.dataset_download("abhinavmoudgil95/short-jokes")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/abhinavmoudgil95/short-jokes/versions/1


In [3]:
# IMPORTING THE NECESSARY LIBARIES

import os
import pandas as pd
import torch
import torch.nn as nn
from torch.nn import functional as F

In [4]:
# LOOKING WHERE THE FILES HAS BEEN DOWNLOADED
os.listdir(path)

['shortjokes.csv']

In [5]:
# LOADING THE FILE INTO DF
df = pd.read_csv(path+"/shortjokes.csv")
# GETTING ALL THE VALUES IN JOKE COLUMN => RETURNS A LIST
text = df['Joke'].values
# JOINING ALL THE STR VAL IN THE LIST TO GET A SINGLE STR
text = "".join(text)
# GETTING THE UNIQUE CHAR PRESENT IN THE DATASET AND CREATING A VARIABLE VOCAB_SIZE THAT STORES THE LEN OF THE UNIQUE ELEMENTS
char = sorted(list(set(text)))
vocab_size = len(char)
# SIMPLE ENCODER, DECODER
# CREATING A HASMAP THAT MAPS STRING TO ID AND VICE VERSA
stringtoid = {sti:i for i,sti in enumerate(char)}
idtostring = {i:sti for i, sti in enumerate(char)}
# USING THE CREATED HASMAP TO CREATER ENCODER AND DECODER
encode = lambda x : [stringtoid[i] for i in x]
decode = lambda x: "".join([idtostring[i] for i in x])
# ENCODING THE TEXT
text = torch.tensor(encode(text), dtype=torch.long)
# CREATING TRAIN AND VAL SIZE
n = int(0.8*len(text))
train = text[0:n]
val = text[n:]

In [6]:
# This function creates batches of data for training or validation.
# It selects random starting points, extracts sequences of a given length (block_size), and prepares input (x) and target (y) tensors for a model.

def generate_batch(split, batch_size, block_size):
  data = train if split =='train' else val
  idx = torch.randint(0, len(data)-block_size, (batch_size, ))
  x = torch.stack([data[i:i+block_size] for i in idx])
  y = torch.stack([data[i+1:i+1+block_size] for i in idx])
  return x,y


In [7]:
# This function estimates the model's loss on the validation set by running 64 mini-batches through it.
# It calculates cross-entropy loss for each batch and returns the average loss, temporarily switching the model to evaluation mode for accurate assessment.

def estimate_loss(model, vocab_size, batch_size, block_size):

  model.eval()

  losses = torch.zeros(64)
  for _ in range(64):
    x,y = generate_batch('val', batch_size, block_size)
    x = x.to('cuda')
    y = y.to('cuda')
    logits = model(x)
    logits = logits.reshape(-1, vocab_size)
    y = y.view(-1)
    loss = F.cross_entropy(logits, y)
    losses[_] = loss.item()
  model.train()
  return losses.mean()


In [8]:
embdim = 64
block_size = 64
hidim = 64
# outdim = 32
batch_size = 128
vocab_size

97

In [9]:
class SimpleGRU(nn.Module):

  def __init__(self):
    super().__init__()

    self.embeddings = nn.Embedding(vocab_size, embdim)

    # reset_gate
    self.reset_x = nn.Linear(embdim, hidim, bias = False)
    self.reset_hid = nn.Linear(hidim, hidim, bias = False)

    # update gate
    self.update_gate_x = nn.Linear(embdim, hidim, bias = False)
    self.update_gate_hid = nn.Linear(hidim, hidim, bias=False)

    # new gate
    self.new_gate_x = nn.Linear(embdim, hidim, bias = False)
    self.new_gate_hid = nn.Linear(hidim, hidim, bias = False)

    # output
    self.out = nn.Linear(hidim, vocab_size, bias = False)

  def forward(self,x, hid = None):
    # shape of x => (B,T)
    x = self.embeddings(x)
    # shape of x => (B,T,C)
    B,T,C = x.shape
    x = x.transpose(0,1)
    # x shape=> (T,B,C)
    if hid is None:
      hid = torch.zeros(B,hidim, device = 'cuda')


    res = []

    for _ in range(T):
      xi = x[_]

      reset_gate = torch.sigmoid(self.reset_x(xi)+self.reset_hid(hid))

      update_gate = torch.sigmoid(self.update_gate_x(xi)+self.update_gate_hid(hid))

      a = self.new_gate_x(xi)

      b = self.new_gate_hid(hid)

      new_gate = torch.tanh(a+reset_gate*b)

      hid = ((1-update_gate) * new_gate) + (update_gate*hid)

      ot = self.out(hid)

      res.append(ot)

    res = torch.stack(res)
    # shape of res => (T,B,C)
    res = res.transpose(0,1)
    # shape of res => (B,T,C)
    return res


In [10]:
model = SimpleGRU()
for p in model.parameters():
  if p.dim()>=2:
    torch.nn.init.xavier_normal_(p)
model = model.to("cuda")
optimizer = torch.optim.AdamW(model.parameters(), lr =1e-3)


In [21]:
epoches = 10000
for _ in range(epoches):

  x,y = generate_batch('train', batch_size, block_size)
  x = x.to("cuda")
  y = y.to("cuda")
  logits = model(x)
  logits = logits.reshape(-1, vocab_size)
  y = y.view(-1)
  loss = F.cross_entropy(logits, y)
  optimizer.zero_grad(set_to_none=True)
  loss.backward()
  optimizer.step()
  if _%200==0:
    l = estimate_loss(model, vocab_size, batch_size, block_size)
    print("step:", _ , "loss=>", l.item())

step: 0 loss=> 1.776522159576416
step: 200 loss=> 1.7784684896469116
step: 400 loss=> 1.770607352256775
step: 600 loss=> 1.7663456201553345
step: 800 loss=> 1.7740823030471802
step: 1000 loss=> 1.7692002058029175
step: 1200 loss=> 1.7708969116210938
step: 1400 loss=> 1.765285611152649
step: 1600 loss=> 1.775416612625122
step: 1800 loss=> 1.7654868364334106
step: 2000 loss=> 1.766884684562683
step: 2200 loss=> 1.7665541172027588
step: 2400 loss=> 1.7586684226989746
step: 2600 loss=> 1.7624338865280151
step: 2800 loss=> 1.7682567834854126
step: 3000 loss=> 1.7639693021774292
step: 3200 loss=> 1.7569485902786255
step: 3400 loss=> 1.756165623664856
step: 3600 loss=> 1.7585806846618652
step: 3800 loss=> 1.7591888904571533
step: 4000 loss=> 1.7562925815582275
step: 4200 loss=> 1.755536675453186
step: 4400 loss=> 1.759547233581543
step: 4600 loss=> 1.758110761642456
step: 4800 loss=> 1.7585285902023315
step: 5000 loss=> 1.753968596458435
step: 5200 loss=> 1.7531100511550903
step: 5400 loss=> 

In [22]:

# This function generates tokens using the trained model.
# Starting from a given input, it predicts the next token, samples from the probability distribution, appends it to the sequence,
# and continues for max_tok steps without updating gradients.

def generatetok(model, start, max_tok):
  with torch.no_grad():
    for _ in range(max_tok):
      start2 = start[:, -block_size:]
      B,T = start.shape
      logits = model(start2)

      # logits = logits.reshape(-1, vocab_size)
      prob = logits[:,-1,:]
      prob = F.softmax(prob, dim=-1)
      lo = torch.multinomial(prob, num_samples=1)
      start = torch.cat([start, lo], dim=1)
  return start


In [23]:
start = torch.tensor([2,2,2,2,2,2], device='cuda',dtype=torch.long).reshape(6,1)

In [24]:
# INITIALIZING THE START AS 0
# start = torch.zeros([3,1], device='cuda',dtype=torch.long)
# GENERATING FROM THE MODEL
out = generatetok(model, start, 156)
out.shape
# output
res = []
for _ in range(start.shape[0]):
  o = out[_]
  res.append(decode(o.tolist()))
for x in res:
  print(x)
  print("--------------\n")


 is the firin, I bear a feel bower M. Hamking arrestentionts not just turner president it!Me: Hi, Jakulaking have alar better that a lagdy" me pleguainty goo
--------------

 down somaighed cows of Finlsians work hamper luizinade with he was say to sumes with about so you."Neverd"I reall it.sparnsWhy kis] call It all the song use
--------------

 the forprying her to bave addil aners That's have seriartion buybed while have a girl" "Im man.The brandier their at hergeton overstahrons mole. If my favil
--------------

 fine? TheyWhat do a understically thresthil more sat, to be them?"Some carry depprasy!Roler bana Gues too.If "hidn your womeun with builds if Just is say I'
--------------

 you got.What, "Your wife lefd today shoppy anything and soghtrose screw on fine of nothem!" because you can the mictict: It is start saidan eat how to qureb
--------------

 coments drink for pay too? Co people you candether types arrective what woman that chricer, W-maromirds in the nnecty a hoody, an