In [3]:
!pip install torch


Collecting torch
  Using cached torch-2.3.1-cp310-cp310-win_amd64.whl.metadata (26 kB)
Collecting sympy (from torch)
  Using cached sympy-1.12.1-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Using cached networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting fsspec (from torch)
  Using cached fsspec-2024.6.0-py3-none-any.whl.metadata (11 kB)
Collecting mkl<=2021.4.0,>=2021.1.1 (from torch)
  Using cached mkl-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting intel-openmp==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Using cached intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.2 kB)
Collecting tbb==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Using cached tbb-2021.12.0-py3-none-win_amd64.whl.metadata (1.1 kB)
Collecting mpmath<1.4.0,>=1.1.0 (from sympy->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.3.1-cp310-cp310-win_amd64.whl (159.8 MB)
   ---------------------------------------- 15

ERROR: Could not install packages due to an OSError: [Errno 28] No space left on device



In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import re
import string

# Sample text corpus
text_corpus = """
    Transformers are a type of model architecture used for natural language processing tasks.
    They have become the model of choice for many NLP tasks due to their performance and flexibility.
    """

# Preprocessing the text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)
    text = re.sub("\s+", " ", text)
    return text

# Tokenizing the text
def tokenize(text):
    return text.split()

# Preprocess and tokenize the corpus
processed_text = preprocess_text(text_corpus)
tokens = tokenize(processed_text)

# Create a vocabulary
vocab = list(set(tokens))
word_to_idx = {word: idx for idx, word in enumerate(vocab)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}

# Encode the tokens
encoded_tokens = [word_to_idx[word] for word in tokens]


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, max_seq_length):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = nn.Parameter(torch.zeros(1, max_seq_length, d_model))
        self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward)
        self.fc_out = nn.Linear(d_model, vocab_size)

    def forward(self, src, tgt):
        src = self.embedding(src) + self.positional_encoding[:, :src.size(1), :]
        tgt = self.embedding(tgt) + self.positional_encoding[:, :tgt.size(1), :]
        src = src.permute(1, 0, 2)  # [seq_len, batch_size, d_model]
        tgt = tgt.permute(1, 0, 2)  # [seq_len, batch_size, d_model]
        output = self.transformer(src, tgt)
        output = self.fc_out(output.permute(1, 0, 2))  # [batch_size, seq_len, vocab_size]
        return output

# Hyperparameters
vocab_size = len(vocab)
d_model = 64
nhead = 8
num_encoder_layers = 3
num_decoder_layers = 3
dim_feedforward = 256
max_seq_length = 10

# Instantiate the model
model = TransformerModel(vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, max_seq_length)


In [None]:
import torch.optim as optim

class TextDataset(Dataset):
    def __init__(self, encoded_tokens, seq_length):
        self.encoded_tokens = encoded_tokens
        self.seq_length = seq_length

    def __len__(self):
        return len(self.encoded_tokens) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.tensor(self.encoded_tokens[idx:idx + self.seq_length]),
            torch.tensor(self.encoded_tokens[idx + 1:idx + self.seq_length + 1])
        )

# Dataset and DataLoader
seq_length = 5
dataset = TextDataset(encoded_tokens, seq_length)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 100
for epoch in range(epochs):
    for src, tgt in dataloader:
        optimizer.zero_grad()
        output = model(src, tgt[:, :-1])
        loss = criterion(output.view(-1, vocab_size), tgt[:, 1:].reshape(-1))
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')


In [None]:
def predict_next_word(model, sentence, word_to_idx, idx_to_word, max_seq_length):
    model.eval()
    words = tokenize(preprocess_text(sentence))
    input_ids = [word_to_idx[word] for word in words if word in word_to_idx]
    src = torch.tensor(input_ids).unsqueeze(0)
    
    with torch.no_grad():
        output = model(src, src)
        next_word_logits = output[0, -1, :]
        predicted_word_idx = torch.argmax(next_word_logits).item()
        return idx_to_word[predicted_word_idx]

# Example prediction
sentence = "Transformers are a type"
predicted_word = predict_next_word(model, sentence, word_to_idx, idx_to_word, max_seq_length)
print(f"Next word prediction: {predicted_word}")
