In [3]:
!pip install torch==2.2.0 torch_xla==2.2.0 cloud-tpu-client==0.10 torchtext torchvision torchaudio portalocker==2.8.2



In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import math
from torchtext.data.utils import get_tokenizer
from torchtext.datasets import PennTreebank
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import random



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

tokenizer = get_tokenizer("basic_english")

def yield_tokens(data_iter):
    for text in data_iter:
        yield tokenizer(text)

train_iter = iter(PennTreebank (split="train"))
vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>", "<pad>"], min_freq=3)
vocab.set_default_index(vocab["<unk>"])

def data_process(raw_text_iter, seq_len=35):
    sequences = []
    for item in raw_text_iter:
        tokens = torch.tensor([vocab[token] for token in tokenizer(item)], dtype=torch.long)
        if len(tokens) >= 5:
            sequences.append(tokens[:seq_len])
    return sequences

train_data = data_process(iter(PennTreebank (split="train")))

def collate_fn(batch):
    batch = sorted(batch, key=lambda x: len(x), reverse=True)
    original_lengths = [len(seq) for seq in batch]
    padded_batch = pad_sequence(batch, batch_first=True, padding_value=vocab["<pad>"])
    input_seq = padded_batch[:, :-1]
    target_seq = padded_batch[:, 1:]
    input_lengths = torch.tensor([l - 1 for l in original_lengths], dtype=torch.long)
    return input_seq, target_seq, input_lengths  # Pass corrected lengths to model

train_loader = DataLoader(train_data, batch_size=64, shuffle=True, collate_fn=collate_fn, pin_memory=True)

class RNN_LM(nn.Module):
    def __init__(self, vocab_size, embed_dim=256, hidden_size=512, num_layers=3, dropout=0.3):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=vocab["<pad>"])
        self.lstm = nn.LSTM(embed_dim, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, vocab_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src, lengths, hidden=None):
        embedded = self.dropout(self.embed(src))
        packed_input = pack_padded_sequence(embedded, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_output, hidden = self.lstm(packed_input, hidden)
        output, _ = pad_packed_sequence(packed_output, batch_first=True)

        output = self.fc(output)
        return output.contiguous(), hidden

model = RNN_LM(len(vocab)).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=vocab["<pad>"])
optimizer = optim.AdamW(model.parameters(), lr=0.001)
scaler = torch.cuda.amp.GradScaler()

epochs = 15
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for data, target, lengths in train_loader:
        data, target, lengths = data.to(device), target.to(device), lengths.to(device)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            output, _ = model(data, lengths)

            loss = criterion(output.view(-1, len(vocab)), target.reshape(-1))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()

    perplexity = math.exp(total_loss / len(train_loader))
    print(f"Epoch {epoch+1} | Perplexity: {perplexity:.2f}")

def generate_text(seed_text, max_len=50, temperature=1.0, top_k=5):
    model.eval()
    tokens = [vocab[token] for token in tokenizer(seed_text)]
    input_tensor = torch.tensor(tokens).unsqueeze(0).to(device)

    generated = tokens
    hidden = None

    with torch.no_grad():
        for _ in range(max_len):
            output, hidden = model(input_tensor, torch.tensor([len(input_tensor[0])]), hidden)
            logits = output[:, -1, :] / temperature

            top_k_logits, top_k_indices = torch.topk(logits, top_k)
            probs = torch.softmax(top_k_logits, dim=-1)
            next_token = top_k_indices[0, torch.multinomial(probs, 1).item()].item()

            if next_token == vocab["<pad>"]:
                break

            generated.append(next_token)
            input_tensor = torch.cat([input_tensor, torch.tensor([[next_token]]).to(device)], dim=1)

    return " ".join([vocab.lookup_token(idx) for idx in generated])

print("\n",generate_text("The economy is improving", max_len=30))

Using device: cuda
Epoch 1 | Perplexity: 728.97
Epoch 2 | Perplexity: 295.86
Epoch 3 | Perplexity: 185.81
Epoch 4 | Perplexity: 143.35
Epoch 5 | Perplexity: 119.63
Epoch 6 | Perplexity: 103.55
Epoch 7 | Perplexity: 91.58
Epoch 8 | Perplexity: 82.31
Epoch 9 | Perplexity: 74.59
Epoch 10 | Perplexity: 68.29
Epoch 11 | Perplexity: 62.85
Epoch 12 | Perplexity: 58.32
Epoch 13 | Perplexity: 54.27
Epoch 14 | Perplexity: 50.68
Epoch 15 | Perplexity: 47.63

 the economy is improving by the <unk> of a market economy in ual shares and has a big impact in actual earnings per share from a year earlier and the yen from the end


In [5]:
!pip install gradio



In [6]:
import gradio as gr
import torch

# Load the trained model and vocab (assuming they are saved and available)
# model = ...
# vocab = ...
# tokenizer = ...

def generate_text(seed_text, max_len=50, temperature=1.0, top_k=5):
    model.eval()
    tokens = [vocab[token] for token in tokenizer(seed_text)]
    input_tensor = torch.tensor(tokens).unsqueeze(0).to(device)

    generated = tokens
    hidden = None

    with torch.no_grad():
        for _ in range(max_len):
            output, hidden = model(input_tensor, torch.tensor([len(input_tensor[0])]), hidden)
            logits = output[:, -1, :] / temperature

            top_k_logits, top_k_indices = torch.topk(logits, top_k)
            probs = torch.softmax(top_k_logits, dim=-1)
            next_token = top_k_indices[0, torch.multinomial(probs, 1).item()].item()

            if next_token == vocab["<pad>"]:
                break

            generated.append(next_token)
            input_tensor = torch.cat([input_tensor, torch.tensor([[next_token]]).to(device)], dim=1)

    return " ".join([vocab.lookup_token(idx) for idx in generated])

demo = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(label="Seed Text"),
        gr.Slider(10, 100, value=50, label="Max Length"),
        gr.Slider(0.5, 2.0, value=1.0, label="Temperature"),
        gr.Slider(1, 10, value=5, label="Top-K Sampling")
    ],
    outputs="text",
    title="LSTM Language Model",
    description="Enter a seed text to generate new text using the trained LSTM model."
)

demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://42e38d19f08b51ea8c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


