<a href="https://colab.research.google.com/github/supraja777/ATS-Tracker/blob/main/Small_Language_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U datasets
!pip install tiktoken
!pip install numpy matplotlib tqdm
!pip install accelerate wandb tensorboard

In [6]:
from datasets import load_dataset

df = load_dataset("roneneldan/TinyStories")
#print(df)

In [None]:
# Tokenization with BPE

import tiktoken
import os
import numpy as np
from tqdm.auto import tqdm

encoding = tiktoken.get_encoding("gpt2")

def processing(sample_text):
  ids = encoding.encode_ordinary(sample_text['text'])
  out = {'ids': ids, 'len': len(ids)}
  return out



In [5]:
# Transformer Model Architecture

from dataclasses import dataclass
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

@dataclass
class GPTConfig:
  block_size: int = 128
  vocab_size: int = 50257
  n_layer: int = 6
  n_head: int = 6
  n_embd: int = 384
  dropout: float = 0.1
  bias: bool = True

In [7]:
# Implementing Layer Normalization
class LayerNorm(nn.Module):
  def __init__(self, ndim, bias):
    super().__init__()
    self.weight = nn.Parameter(torch.ones(ndim))
    self.bias = nn.Parameter(torch.zeros(ndim)) # is bias else None

  def forward(self, x):
    return F.layer_norm(x, self.weight.shape, self.weight, self.bias, 1e-5)


In [None]:
class GPT(nn.Module):
  def __init__(self, config):
    super().__init__()
    self.config = config,
    self.transformer = nn.ModuleDict(dict(
        wte = nn.Embedding(config.vocab_size, config.n_embd),
        wpe = nn.Embedding(config.block_size, config.n_embd),
        drop = nn.Dropout(config.dropout),
        h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), # Initializing transformer block
        ln_f = LayerNorm(config.n_embd, config.bias)
    ))