In [2]:
!pip install torch torchvision torchaudio

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Downloading torch-2.9.0-cp313-cp313-win_amd64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.24.0-cp313-cp313-win_amd64.whl.metadata (5.9 kB)
Collecting torchaudio
  Downloading torchaudio-2.9.0-cp313-cp313-win_amd64.whl.metadata (6.9 kB)
Collecting filelock (from torch)
  Using cached filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx>=2.5.1 (from torch)
  Downloading networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec>=0.8.5 (from torch)
  Downloading fsspec-2025.10.0-py3-none-any.whl.metadata (10 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.9.0-cp313-cp313-win_amd64.whl (109.3 MB)
   ---------------------------------------- 0.0/109.3 M


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
import torch
print(torch.__version__)

2.9.0+cpu


In [2]:
import nltk, torch
from nltk.tokenize import word_tokenize
nltk.download('punkt')

text = open("sample.txt").read().lower()
tokens = word_tokenize(text)
vocab = sorted(set(tokens))
stoi = {w:i for i,w in enumerate(vocab)}
itos = {i:w for w,i in stoi.items()}

data = torch.tensor([stoi[w] for w in tokens])
block = 40
def get_batch(bs=32):
    ix = torch.randint(len(data)-block, (bs,))
    x = torch.stack([data[i:i+block] for i in ix])
    y = torch.stack([data[i+1:i+block+1] for i in ix])
    return x,y

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\OAOyede\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
import torch.nn as nn
class RNNModel(nn.Module):
    def __init__(self, vocab_size, hidden=128):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, hidden)
        self.rnn = nn.RNN(hidden, hidden, batch_first=True)
        self.fc = nn.Linear(hidden, vocab_size)
    def forward(self,x,h=None):
        x = self.embed(x)
        out,h = self.rnn(x,h)
        return self.fc(out),h

In [4]:
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, hidden=128):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, hidden)
        self.lstm = nn.LSTM(hidden, hidden, batch_first=True)
        self.fc = nn.Linear(hidden, vocab_size)
    def forward(self,x,h=None):
        x = self.embed(x)
        out,h = self.lstm(x,h)
        return self.fc(out),h

In [5]:
import torch.optim as optim
def train(model,epochs=10):
    opt = optim.Adam(model.parameters(),lr=0.003)
    loss_fn = nn.CrossEntropyLoss()
    for e in range(epochs):
        x,y = get_batch()
        logits,_ = model(x)
        loss = loss_fn(logits.view(-1,len(vocab)),y.view(-1))
        opt.zero_grad(); loss.backward(); opt.step()
        if (e+1)%2==0:
            print(f"Epoch {e+1}, Loss={loss.item():.3f}")

In [6]:
# Make sure text exists (if you used a tiny snippet)
MIN_TOKENS = 400   # increase if you want longer contexts
if len(data) < MIN_TOKENS:
    # repeat the text to reach a safe length
    reps = (MIN_TOKENS // max(1, len(data))) + 1
    data = data.repeat(reps)

# Auto-adjust block so it's always valid
block = min(block, max(10, len(data) - 2))  # keep at least 10
print("Data length:", len(data), " | block:", block)

Data length: 418  | block: 40


In [7]:
def get_batch(bs=32):
    # guarantee we can sample
    high = len(data) - block - 1
    if high <= 0:
        # fallback: shrink block temporarily if text is still too short
        b = max(5, len(data) - 2)
        x = data[:b].unsqueeze(0).repeat(bs, 1)
        y = data[1:b+1].unsqueeze(0).repeat(bs, 1)
        return x, y

    ix = torch.randint(high, (bs,))
    x = torch.stack([data[i:i+block] for i in ix])
    y = torch.stack([data[i+1:i+block+1] for i in ix])
    return x, y

In [8]:
model = RNNModel(len(vocab))
train(model)

Epoch 2, Loss=3.329
Epoch 4, Loss=2.978
Epoch 6, Loss=2.594
Epoch 8, Loss=2.173
Epoch 10, Loss=1.735


In [13]:
def generate(model,start='the',length=50):
    model.eval()
    ids=[stoi[start]] if start in stoi else [0]
    h=None
    for _ in range(length):
        x=torch.tensor([[ids[-1]]])
        logits,h=model(x,h)
        probs=torch.softmax(logits[0,-1],dim=0)
        next_id=torch.multinomial(probs,1).item()
        ids.append(next_id)
    return ' '.join(itos[i] for i in ids)

print(generate(model,'the student'))

. word a rnn was about an an a student student there was student in an rnn to soon . a networks there remember there they learning long could remember remember remember how remember could realised word learning rnn how in predict context built upon much upon . they built long
