In [None]:
!pip install transformers datasets torch torchvision torchtext

# 1. Transformer

## Install Packages

In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
from torchtext.datasets import WikiText2
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
import math

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Load Transformer Model (GPT-2)

In [None]:
gpt_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
gpt_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
gpt_model.eval()

## Load Data

In [None]:
# --- 2. Load WikiText2 Test Dataset ---
def get_text():
    return WikiText2(split='test')

# Load text data
test_text = list(get_text())

# Prepare sample batch
sample_text = " ".join(test_text[:1000])
inputs = gpt_tokenizer(sample_text, return_tensors="pt", truncation=True, max_length=1024).to(device)

## Evaluate Transformer

In [None]:
# Evaluate Transformer Model
with torch.no_grad():
    outputs = gpt_model(**inputs, labels=inputs["input_ids"])
    loss = outputs.loss
    gpt2_ppl = math.exp(loss.item())

print(f"Transformer (GPT-2) Perplexity: {gpt2_ppl:.2f}")

# 2. RNN-Based Language Model

## Install Packages

In [None]:
from torchtext.models import RobertaBundle
from torchtext.models.lstm_lm import LSTMLanguageModel
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader

## Build Vocab from Training Data

In [None]:
tokenizer = get_tokenizer("basic_english")
def yield_tokens(data_iter):
    for text in data_iter:
        yield tokenizer(text)

vocab = build_vocab_from_iterator(yield_tokens(WikiText2(split='train')), specials=["<unk>"])
vocab.set_default_index(vocab["<unk>"])

## Prepare test dataset

In [None]:
def data_process(raw_text_iter):
    data = [torch.tensor(vocab(tokenizer(item)), dtype=torch.long) for item in raw_text_iter]
    return torch.cat(tuple(filter(lambda t: t.numel() > 0, data)))

test_data = data_process(WikiText2(split='test')).to(device)

# Batchify
def batchify(data, bsz):
    seq_len = data.size(0) // bsz
    data = data[:seq_len * bsz]
    data = data.view(bsz, seq_len).t().contiguous()
    return data

batch_size = 20
eval_batch_size = 10
test_data = batchify(test_data, eval_batch_size)

## Load pretrained LSTM model

In [None]:
lstm_model = LSTMLanguageModel(vocab_size=len(vocab), emsize=200, nhid=200, nlayers=2).to(device)
lstm_model.load_state_dict(torch.load("path_to_pretrained_lstm_model.pt"))  # <-- You need this checkpoint
lstm_model.eval()

## Evaluate RNN Model


In [None]:
 bptt = 35
def get_batch(source, i):
    seq_len = min(bptt, len(source) - 1 - i)
    data = source[i:i+seq_len]
    target = source[i+1:i+1+seq_len].reshape(-1)
    return data, target

total_loss = 0.
ntokens = len(vocab)
criterion = torch.nn.CrossEntropyLoss()

with torch.no_grad():
    for i in range(0, test_data.size(0) - 1, bptt):
        data, targets = get_batch(test_data, i)
        output = lstm_model(data)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).item()

lstm_ppl = math.exp(total_loss / (len(test_data) - 1))
print(f"RNN (LSTM) Perplexity: {lstm_ppl:.2f}")