In [None]:
# Short BLEU + Perplexity example for Bigram and RNN (paste into one Colab cell)
# Uncomment if needed:
# !pip install --quiet torch nltk

import math, random
from collections import defaultdict, Counter
import numpy as np
import torch, torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import nltk
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import corpus_bleu
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)

# --- tiny dataset ---
corpus = [
  "The quick brown fox jumps over the lazy dog",
  "I love natural language processing",
  "Language models predict the next word",
  "RNNs and n-gram models are classic",
  "This is a small test corpus"
]
random.shuffle(corpus)
train, val, test = corpus[:3], corpus[3:4], corpus[4:]

tok = lambda s: word_tokenize(s.lower())
train_t = [tok(s) for s in train]
val_t = [tok(s) for s in val]
test_t = [tok(s) for s in test]

# --- vocab ---
special = ['<pad>','<unk>','<bos>','<eos>']
counter = Counter(w for s in (train_t+val_t+test_t) for w in s)
itos = special + [w for w,c in counter.items()]
stoi = {w:i for i,w in enumerate(itos)}

def encode(sent):
    return [stoi.get(w, stoi['<unk>']) for w in (['<bos>'] + sent + ['<eos>'])]

# ---------------- Bigram LM (add-k smoothing) ----------------
class Bigram:
    def __init__(self, add_k=0.1):
        self.counts = defaultdict(Counter)
        self.ctx = Counter()
        self.add_k = add_k
        self.vocab = set()
    def train(self, sents):
        for s in sents:
            s2 = ['<bos>'] + s + ['<eos>']
            for a,b in zip(s2, s2[1:]):
                self.counts[a][b] += 1
                self.ctx[a] += 1
                self.vocab.add(a); self.vocab.add(b)
    def prob(self, a,b):
        V = len(self.vocab)
        return (self.counts[a][b] + self.add_k) / (self.ctx[a] + self.add_k * V)
    def sent_logprob(self, s):
        s2 = ['<bos>'] + s + ['<eos>']
        lp=0.0
        for a,b in zip(s2, s2[1:]):
            lp += math.log(self.prob(a,b))
        return lp
    def perplexity(self, sents):
        N=0; lp=0.0
        for s in sents:
            N += len(s)+1
            lp += self.sent_logprob(s)
        return math.exp(-lp/N)
    def generate(self, maxlen=15):
        cur='<bos>'; out=[]
        for _ in range(maxlen):
            choices=list(self.counts[cur].keys()) or list(self.vocab)
            probs=[self.prob(cur,c) for c in choices]
            probs=np.array(probs); probs/=probs.sum()
            nxt=np.random.choice(choices,p=probs)
            if nxt=='<eos>': break
            out.append(nxt); cur=nxt
        return out

bg = Bigram(add_k=0.1)
bg.train(train_t)
print("Bigram perplexity on test:", round(bg.perplexity(test_t),3))

# ---------------- Small RNN LM ----------------
class LMDataset(Dataset):
    def __init__(self,sents):
        self.data=[encode(s) for s in sents]
    def __len__(self): return sum(len(x)-1 for x in self.data)
    def __getitem__(self,idx):
        # flatten into sequence of (input_ids, target)
        cum=0
        for seq in self.data:
            n = len(seq)-1
            if idx < cum + n:
                i = idx - cum
                return torch.tensor(seq[:i+1],dtype=torch.long), torch.tensor(seq[i+1],dtype=torch.long)
            cum += n
        raise IndexError
def collate(batch):
    xs,ys=zip(*batch)
    L=max(len(x) for x in xs)
    X = torch.full((len(xs),L), stoi['<pad>'], dtype=torch.long)
    for i,x in enumerate(xs): X[i,-len(x):]=x  # right-align
    return X, torch.stack(ys)

class RNNLM(nn.Module):
    def __init__(self,vocab_size,emb=64,hidden=128):
        super().__init__()
        self.e=nn.Embedding(vocab_size,emb,padding_idx=0)
        self.rnn=nn.LSTM(emb,hidden,batch_first=True)
        self.fc=nn.Linear(hidden,vocab_size)
    def forward(self,x):
        emb=self.e(x)
        out,_=self.rnn(emb)
        return self.fc(out[:,-1,:])  # predict next token

vocab_size = len(itos)
train_ds = LMDataset(train_t)
val_ds = LMDataset(val_t)
test_ds = LMDataset(test_t)
train_loader = DataLoader(train_ds,batch_size=8,shuffle=True,collate_fn=collate)
test_loader = DataLoader(test_ds,batch_size=8,collate_fn=collate)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = RNNLM(vocab_size).to(device)
opt = torch.optim.Adam(model.parameters(), lr=0.01)
crit = nn.CrossEntropyLoss()

# tiny training
for epoch in range(3):
    model.train()
    total=0.0
    for X,y in train_loader:
        X=X.to(device); y=y.to(device)
        opt.zero_grad()
        logits = model(X)
        loss = crit(logits, y)
        loss.backward(); opt.step()
        total += loss.item()
    # quick val perplexity:
    model.eval()
    nll=0.0; toks=0
    with torch.no_grad():
        for X,y in test_loader:
            X=X.to(device); y=y.to(device)
            logits = model(X)
            loss = nn.CrossEntropyLoss(reduction='sum')(logits,y)
            nll += loss.item(); toks += X.size(0)
    ppl = math.exp(nll / toks) if toks>0 else float('inf')
    print(f"Epoch {epoch+1} train_loss={total:.4f} test_ppl={ppl:.3f}")

# RNN generation helper
def gen_rnn(model, maxlen=12):
    model.eval()
    ids = [stoi['<bos>']]
    hidden=None
    out=[]
    with torch.no_grad():
        for _ in range(maxlen):
            x = torch.tensor([ids], dtype=torch.long, device=device)
            emb = model.e(x)
            out_r, hidden = model.rnn(emb, hidden)
            logits = model.fc(out_r[:, -1, :])
            probs = torch.softmax(logits, dim=-1).squeeze().cpu().numpy()
            nxt = np.random.choice(len(probs), p=probs/probs.sum())
            token = itos[nxt]
            if token=='<eos>': break
            out.append(token); ids.append(nxt)
    return out

# generate examples
gen_bg = [' '.join(bg.generate()) for _ in range(3)]
gen_rnn = [' '.join(gen_rnn(model)) for _ in range(3)]
print("Bigram generations:", gen_bg)
print("RNN generations:", gen_rnn)

# --- BLEU: compare generated sentences to test references ---
refs = [[s] for s in test_t]                # list of [reference_tokens]
hyps_bg = [g.split() for g in gen_bg][:len(refs)]
hyps_rnn = [g.split() for g in gen_rnn][:len(refs)]
print("Bigram BLEU:", round(corpus_bleu(refs, hyps_bg),4))
print("RNN BLEU:", round(corpus_bleu(refs, hyps_rnn),4))

Bigram perplexity on test: 24.929
Epoch 1 train_loss=10.4700 test_ppl=35.817
Epoch 2 train_loss=7.8277 test_ppl=49.695
Epoch 3 train_loss=4.6931 test_ppl=105.430
Bigram generations: ['the next word', 'the quick brown fox jumps over the next word', 'the lazy dog']
RNN generations: ['the quick brown language models fox jumps the the jumps over word', 'n-gram rnns are', 'and models the lazy word']
Bigram BLEU: 0
RNN BLEU: 0.0


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
