<a href="https://colab.research.google.com/github/sauravsharm/lab-DL/blob/main/DL_assign05_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
# utils_text.py
import re
from collections import Counter
from typing import List, Tuple, Dict

def simple_tokenize(text: str) -> List[str]:
    """
    Basic word-level tokenizer:
    - lowercases
    - keeps words and basic punctuation as separate tokens
    """
    text = text.lower()
    # separate punctuation
    text = re.sub(r"([.,!?;:()\"'])", r" \1 ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text.split()


def load_poems(path: str) -> str:
    with open(path, "r", encoding="utf-8") as f:
        return f.read()


def build_vocab(tokens: List[str], min_freq: int = 1) -> Tuple[Dict[str, int], Dict[int, str]]:
    counts = Counter(tokens)
    vocab = ["<pad>", "<unk>", "<bos>", "<eos>"]
    for w, c in counts.items():
        if c >= min_freq and w not in vocab:
            vocab.append(w)
    stoi = {w: i for i, w in enumerate(vocab)}
    itos = {i: w for w, i in stoi.items()}
    return stoi, itos


def tokens_to_ids(tokens: List[str], stoi: Dict[str, int]) -> List[int]:
    unk = stoi["<unk>"]
    return [stoi.get(t, unk) for t in tokens]


def make_sequences(ids: List[int], seq_len: int) -> Tuple[List[List[int]], List[List[int]]]:
    """
    Make (X, Y) where Y is next-token targets.
    """
    X, Y = [], []
    for i in range(0, len(ids) - seq_len):
        x = ids[i:i+seq_len]
        y = ids[i+1:i+seq_len+1]
        X.append(x)
        Y.append(y)
    return X, Y


In [21]:
!pip install utils



In [22]:
import time
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# from utils import load_poems, simple_tokenize, build_vocab, tokens_to_ids, make_sequences

DEVICE = "mps" if torch.backends.mps.is_available(
) else "cuda" if torch.cuda.is_available() else "cpu"


class SeqDatasetOneHot(Dataset):
    def __init__(self, X, Y, vocab_size):
        self.X = torch.tensor(X, dtype=torch.long)
        self.Y = torch.tensor(Y, dtype=torch.long)
        self.V = vocab_size

    def __len__(self):
        return self.X.size(0)

    def __getitem__(self, idx):
        x_ids = self.X[idx]                    # [T]
        y_ids = self.Y[idx]                    # [T]
        # one-hot: [T, V]
        x_oh = torch.zeros(x_ids.size(0), self.V, dtype=torch.float32)
        x_oh.scatter_(1, x_ids.unsqueeze(1), 1.0)
        return x_oh, y_ids


class OneHotRNNLM(nn.Module):
    def __init__(self, vocab_size, hidden=256):
        super().__init__()
        self.rnn = nn.RNN(input_size=vocab_size,
                          hidden_size=hidden, batch_first=True)
        self.fc = nn.Linear(hidden, vocab_size)

    def forward(self, x_oh, h0=None):
        out, hn = self.rnn(x_oh, h0)        # out: [B,T,H]
        logits = self.fc(out)               # [B,T,V]
        return logits, hn


@torch.no_grad()
def generate(model, stoi, itos, seed_text="<bos>", max_new=40, temperature=1.0):
    model.eval()
    tokens = seed_text.split()
    ids = [stoi.get(t, stoi["<unk>"]) for t in tokens]
    V = len(stoi)

    h = None
    for _ in range(max_new):
        x = torch.tensor(ids[-1:], dtype=torch.long,
                         device=DEVICE)  # last token
        x_oh = torch.zeros(1, 1, V, device=DEVICE)
        x_oh.scatter_(2, x.view(1, 1, 1), 1.0)

        logits, h = model(x_oh, h)
        next_logits = logits[0, -1] / max(temperature, 1e-6)
        probs = torch.softmax(next_logits, dim=0)
        nxt = torch.multinomial(probs, 1).item()
        ids.append(nxt)

    words = [itos[i] for i in ids]
    return " ".join(words)


def main():
    text = load_poems("poems.txt")
    tokens = ["<bos>"] + simple_tokenize(text) + ["<eos>"]
    stoi, itos = build_vocab(tokens, min_freq=1)
    ids = tokens_to_ids(tokens, stoi)

    seq_len = 25
    X, Y = make_sequences(ids, seq_len)
    ds = SeqDatasetOneHot(X, Y, vocab_size=len(stoi))
    dl = DataLoader(ds, batch_size=64, shuffle=True, drop_last=True)

    model = OneHotRNNLM(vocab_size=len(stoi), hidden=256).to(DEVICE)
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()

    print("Training One-Hot RNN on", DEVICE)
    start = time.time()

    for epoch in range(25):
        model.train()
        total = 0.0
        steps = 0
        for x_oh, y in dl:
            x_oh = x_oh.to(DEVICE)      # [B,T,V]
            y = y.to(DEVICE)            # [B,T]

            logits, _ = model(x_oh)     # [B,T,V]
            loss = loss_fn(logits.reshape(-1, logits.size(-1)), y.reshape(-1))

            opt.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()

            total += loss.item()
            steps += 1

        print(f"Epoch {epoch+1} | loss: {total/steps:.4f}")
        print("Sample:", generate(model, stoi, itos,
              seed_text="<bos>", max_new=40, temperature=0.9))
        print("-"*80)

    elapsed = time.time() - start
    print(f"Total training time (one-hot): {elapsed:.2f}s")


if __name__ == "__main__":
    main()

Training One-Hot RNN on cuda
Epoch 1 | loss: 6.1549
Sample: <bos> fastenings the retire , and other , where a sold of had , and the childhood and service , and all some , i every the ages who snake the succeed ' opposite from the leaving of deep into and
--------------------------------------------------------------------------------
Epoch 2 | loss: 5.0038
Sample: <bos> be contradict art that fraction no spring brown , where yellow-crown greatest s his early a prepared few around like myself rubs one i’d spring , and elaborate shelf your devour as the prepared sunlight and higher at the beats
--------------------------------------------------------------------------------
Epoch 3 | loss: 3.8625
Sample: <bos> blow whether madest for a love-chant to exactly , enwrought with tale ' look each , the moon of the moon in all great or bad a hollow done with the passage to loss , in her moon , (
--------------------------------------------------------------------------------
Epoch 4 | loss: 2.6

In [23]:
# train_torch_embedding.py
import time
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# from utils import load_poems, simple_tokenize, build_vocab, tokens_to_ids, make_sequences

DEVICE = "mps" if torch.backends.mps.is_available(
) else "cuda" if torch.cuda.is_available() else "cpu"


class SeqDatasetIdx(Dataset):
    def __init__(self, X, Y):
        self.X = torch.tensor(X, dtype=torch.long)
        self.Y = torch.tensor(Y, dtype=torch.long)

    def __len__(self):
        return self.X.size(0)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]     # both [T]


class EmbRNNLM(nn.Module):
    def __init__(self, vocab_size, emb=128, hidden=256):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, emb)
        self.rnn = nn.RNN(input_size=emb, hidden_size=hidden, batch_first=True)
        self.fc = nn.Linear(hidden, vocab_size)

    def forward(self, x_ids, h0=None):
        x = self.emb(x_ids)         # [B,T,E]
        out, hn = self.rnn(x, h0)   # [B,T,H]
        logits = self.fc(out)       # [B,T,V]
        return logits, hn


@torch.no_grad()
def generate(model, stoi, itos, seed_text="<bos>", max_new=40, temperature=1.0):
    model.eval()
    tokens = seed_text.split()
    ids = [stoi.get(t, stoi["<unk>"]) for t in tokens]

    h = None
    for _ in range(max_new):
        x = torch.tensor([[ids[-1]]], dtype=torch.long, device=DEVICE)  # [1,1]
        logits, h = model(x, h)  # logits: [1,1,V]
        next_logits = logits[0, -1] / max(temperature, 1e-6)
        probs = torch.softmax(next_logits, dim=0)
        nxt = torch.multinomial(probs, 1).item()
        ids.append(nxt)

    return " ".join(itos[i] for i in ids)


def main():
    text = load_poems("poems.txt")
    tokens = ["<bos>"] + simple_tokenize(text) + ["<eos>"]
    stoi, itos = build_vocab(tokens, min_freq=1)
    ids = tokens_to_ids(tokens, stoi)

    seq_len = 25
    X, Y = make_sequences(ids, seq_len)
    ds = SeqDatasetIdx(X, Y)
    dl = DataLoader(ds, batch_size=64, shuffle=True, drop_last=True)

    model = EmbRNNLM(vocab_size=len(stoi), emb=128, hidden=256).to(DEVICE)
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()

    print("Training Embedding RNN on", DEVICE)
    start = time.time()

    for epoch in range(25):
        model.train()
        total = 0.0
        steps = 0
        for x_ids, y in dl:
            x_ids = x_ids.to(DEVICE)  # [B,T]
            y = y.to(DEVICE)          # [B,T]

            logits, _ = model(x_ids)  # [B,T,V]
            loss = loss_fn(logits.reshape(-1, logits.size(-1)), y.reshape(-1))

            opt.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()

            total += loss.item()
            steps += 1

        print(f"Epoch {epoch+1} | loss: {total/steps:.4f}")
        print("Sample:", generate(model, stoi, itos,
              seed_text="<bos>", max_new=40, temperature=0.9))
        print("-"*80)

    elapsed = time.time() - start
    print(f"Total training time (embedding): {elapsed:.2f}s")


if __name__ == "__main__":
    main()


Training Embedding RNN on cuda
Epoch 1 | loss: 5.0168
Sample: <bos> floor-men this delightful estate , i celebrate the morning of convicts , ( your collapses with soft jerks , and twist poured was born with me at last , but two well as myself ! it seem to stand as
--------------------------------------------------------------------------------
Epoch 2 | loss: 2.4437
Sample: <bos> thee well the lights . . and loafe there ' d while i always to earth springs . i take my place among a life ' s playmate , we watch his hip-band under its couch , and massacred them
--------------------------------------------------------------------------------
Epoch 3 | loss: 1.1032
Sample: <bos> the park will the man or woman , and cease , and naked manifold no , where bee-hives range on a bank lounged the bayou ; the winds that the heart ? when i hear you , stallion , why
--------------------------------------------------------------------------------
Epoch 4 | loss: 0.6023
Sample: <bos> the boy i love , t

In [24]:
# scratch_rnn_numpy.py
import numpy as np
# from utils import load_poems, simple_tokenize, build_vocab, tokens_to_ids


def softmax(x):
    x = x - np.max(x)
    e = np.exp(x)
    return e / np.sum(e)


def one_hot(idx, V):
    v = np.zeros((V, 1))
    v[idx] = 1.0
    return v


class ScratchRNN:
    def __init__(self, vocab_size, hidden_size=64, lr=1e-2, seed=42):
        rng = np.random.default_rng(seed)
        self.V = vocab_size
        self.H = hidden_size
        self.lr = lr

        # weights
        self.Wxh = rng.normal(0, 0.01, (self.H, self.V))
        self.Whh = rng.normal(0, 0.01, (self.H, self.H))
        self.Why = rng.normal(0, 0.01, (self.V, self.H))
        self.bh = np.zeros((self.H, 1))
        self.by = np.zeros((self.V, 1))

    def forward(self, inputs, hprev):
        """
        inputs: list of token indices length T
        returns cache for backprop
        """
        xs, hs, ys, ps = {}, {}, {}, {}
        hs[-1] = hprev

        for t, idx in enumerate(inputs):
            xs[t] = one_hot(idx, self.V)                         # [V,1]
            hs[t] = np.tanh(self.Wxh @ xs[t] + self.Whh @
                            hs[t-1] + self.bh)  # [H,1]
            ys[t] = self.Why @ hs[t] + self.by                  # [V,1]
            ps[t] = softmax(ys[t].ravel()).reshape(-1, 1)       # [V,1]
        return xs, hs, ps

    def loss_and_grads(self, inputs, targets, hprev):
        xs, hs, ps = self.forward(inputs, hprev)

        loss = 0.0
        for t in range(len(inputs)):
            loss += -np.log(ps[t][targets[t], 0] + 1e-12)

        # grads init
        dWxh = np.zeros_like(self.Wxh)
        dWhh = np.zeros_like(self.Whh)
        dWhy = np.zeros_like(self.Why)
        dbh = np.zeros_like(self.bh)
        dby = np.zeros_like(self.by)

        dhnext = np.zeros((self.H, 1))

        for t in reversed(range(len(inputs))):
            dy = ps[t].copy()
            # softmax CE gradient
            dy[targets[t]] -= 1.0
            dWhy += dy @ hs[t].T
            dby += dy

            dh = self.Why.T @ dy + dhnext
            dhraw = (1 - hs[t] * hs[t]) * dh                    # tanh'
            dbh += dhraw
            dWxh += dhraw @ xs[t].T
            dWhh += dhraw @ hs[t-1].T
            dhnext = self.Whh.T @ dhraw

        # clip
        for d in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(d, -5, 5, out=d)

        hlast = hs[len(inputs)-1]
        return loss, (dWxh, dWhh, dWhy, dbh, dby), hlast

    def step(self, grads):
        dWxh, dWhh, dWhy, dbh, dby = grads
        self.Wxh -= self.lr * dWxh
        self.Whh -= self.lr * dWhh
        self.Why -= self.lr * dWhy
        self.bh -= self.lr * dbh
        self.by -= self.lr * dby

    def sample(self, start_idx, itos, length=30, temperature=1.0):
        h = np.zeros((self.H, 1))
        x = one_hot(start_idx, self.V)
        out = []

        for _ in range(length):
            h = np.tanh(self.Wxh @ x + self.Whh @ h + self.bh)
            y = self.Why @ h + self.by
            p = softmax((y.ravel() / max(temperature, 1e-6)))
            idx = np.random.choice(range(self.V), p=p)
            out.append(itos[idx])
            x = one_hot(idx, self.V)
        return " ".join(out)


def main():
    text = load_poems("poems.txt")
    tokens = ["<bos>"] + simple_tokenize(text) + ["<eos>"]
    stoi, itos = build_vocab(tokens, min_freq=1)
    ids = tokens_to_ids(tokens, stoi)

    rnn = ScratchRNN(vocab_size=len(stoi), hidden_size=128, lr=0.05)
    seq_len = 25
    h = np.zeros((rnn.H, 1))

    # train a bit
    for epoch in range(25):
        total_loss = 0.0
        n = 0
        for i in range(0, len(ids) - seq_len - 1, seq_len):
            inp = ids[i:i+seq_len]
            tgt = ids[i+1:i+seq_len+1]
            loss, grads, h = rnn.loss_and_grads(inp, tgt, h)
            rnn.step(grads)
            total_loss += loss
            n += 1

        avg = total_loss / max(n, 1)
        print(f"Epoch {epoch+1} | avg loss: {avg:.4f}")
        print("Sample:", rnn.sample(
            stoi["<bos>"], itos, length=30, temperature=0.9))
        print("-"*80)


if __name__ == "__main__":
    main()


Epoch 1 | avg loss: 328.6132
Sample: something calculation in the this of other , , snow-flakes by to bird the this tardy ' , , could by to and in idle will edge , , to
--------------------------------------------------------------------------------
Epoch 2 | avg loss: 270.7710
Sample: tops for , love- have in it immigrants , , , spur in in it fancy , , , , in in pursue by , , , swing appeared in
--------------------------------------------------------------------------------
Epoch 3 | avg loss: 275.1941
Sample: for : straying to it awakes this , the the and to by branches , eager the the majesty and it grave , , confusion the song to birch in
--------------------------------------------------------------------------------
Epoch 4 | avg loss: 275.3231
Sample: us it mount by follow about things higher resist to it grave , , they barr spirit to it it , name me talk round to it . this ,
--------------------------------------------------------------------------------
Epoch 5 | avg loss: 27