<a href="https://colab.research.google.com/github/kareemullah123456789/NLP/blob/main/RNN_LSTM_Text_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# nothing to install
import torch, torch.nn as nn, matplotlib.pyplot as plt, numpy as np, random
from ipywidgets import interact, IntSlider

In [7]:
# ---- Cell 1 (unchanged except decode) ----
text  = "hello world, how art thou? hello world, how art thou? "
chars = sorted(list(set(text)))
stoi  = {ch:i for i,ch in enumerate(chars)}
itos  = {i:ch for i,ch in enumerate(chars)}

encode = lambda s: [stoi[c] for c in s]
decode = lambda idx: ''.join(itos[i.item() if hasattr(i,'item') else i] for i in idx)

print("📜 Vocab:", ''.join(chars))
print("Example encoded:", encode("hello")[:3])

📜 Vocab:  ,?adehlortuw
Example encoded: [6, 5, 7]


In [8]:
def make_chunks(seq_len=20):
    data = torch.tensor(encode(text), dtype=torch.long)
    ix = random.randint(0, len(data)-seq_len-1)
    x = data[ix:ix+seq_len]
    y = data[ix+1:ix+seq_len+1]
    return x.unsqueeze(0), y.unsqueeze(0)   # (1, seq_len)

x, y = make_chunks()
print("Input :", decode(x[0].tolist()))
print("Target:", decode(y[0].tolist()))

Input : art thou? hello worl
Target: rt thou? hello world


In [10]:
def make_chunks(seq_len=20):
    data = torch.tensor(encode(text), dtype=torch.long)
    ix = random.randint(0, len(data)-seq_len-1)
    x = data[ix:ix+seq_len]
    y = data[ix+1:ix+seq_len+1]
    return x.unsqueeze(0), y.unsqueeze(0)   # (1, seq_len)

x, y = make_chunks()
print("Input :", decode(x[0].tolist()))
print("Target:", decode(y[0].tolist()))

Input : ? hello world, how a
Target:  hello world, how ar


In [11]:
vocab_size = len(chars)
hidden = 32

class TextRNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, hidden)
        self.rnn   = nn.RNN(hidden, hidden, batch_first=True)
        self.fc    = nn.Linear(hidden, vocab_size)

    def forward(self, x):
        x = self.embed(x)
        out, _ = self.rnn(x)
        return self.fc(out)

class TextLSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, hidden)
        self.lstm  = nn.LSTM(hidden, hidden, batch_first=True)
        self.fc    = nn.Linear(hidden, vocab_size)

    def forward(self, x):
        x = self.embed(x)
        out, _ = self.lstm(x)
        return self.fc(out)

In [12]:
def play(seq_len=20, model_type="LSTM", epochs=200):
    Model = TextLSTM if model_type == "LSTM" else TextRNN
    model = Model()
    opt = torch.optim.Adam(model.parameters(), 0.01)
    loss_fn = nn.CrossEntropyLoss()

    for _ in range(epochs):
        xb, yb = make_chunks(seq_len)
        logits = model(xb)
        loss = loss_fn(logits.view(-1, vocab_size), yb.view(-1))
        opt.zero_grad(); loss.backward(); opt.step()

    # sample next 40 chars
    model.eval()
    seed, _ = make_chunks(seq_len)
    seed_str = decode(seed[0].tolist())
    with torch.no_grad():
        inp = seed
        generated = list(seed[0])
        for _ in range(40):
            logits = model(inp)
            probs = torch.softmax(logits[0,-1], dim=0)
            next_id = torch.multinomial(probs, num_samples=1)
            generated.append(next_id.item())
            inp = torch.cat([inp[:,1:], next_id.view(1,1)], dim=1)
    result = decode(generated)
    print(f"{model_type} ({seq_len} chars) →", result)

interact(play,
         seq_len=IntSlider(20, min=15, max=80, step=5),
         model_type=["RNN", "LSTM"])

interactive(children=(IntSlider(value=20, description='seq_len', max=80, min=15, step=5), Dropdown(description…