In [None]:
!git clone https://github.com/podderSoykot/Transformer-Basic-to-Advance-Project.git

Cloning into 'Transformer-Basic-to-Advance-Project'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (3/3), done.


In [None]:
!pip install torch

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [21]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import re
import string

# Sample text corpus
text_corpus = """
    Transformers are a type of model architecture used for natural language processing tasks.
    They have become the model of choice for many NLP tasks due to their performance and flexibility.
    """

# Preprocessing the text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)
    text = re.sub("\s+", " ", text)
    return text

# Tokenizing the text
def tokenize(text):
    return text.split()

# Preprocess and tokenize the corpus
processed_text = preprocess_text(text_corpus)
tokens = tokenize(processed_text)

# Create a vocabulary
vocab = list(set(tokens))
word_to_idx = {word: idx for idx, word in enumerate(vocab)}
idx_to_word = {idx: word for word, idx in word_to_idx.items()}

# Encode the tokens
encoded_tokens = [word_to_idx[word] for word in tokens]


In [22]:
import torch.nn as nn
import torch.nn.functional as F

class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, max_seq_length):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = nn.Parameter(torch.zeros(1, max_seq_length, d_model))
        self.transformer = nn.Transformer(d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward)
        self.fc_out = nn.Linear(d_model, vocab_size)

    def forward(self, src, tgt):
        src = self.embedding(src) + self.positional_encoding[:, :src.size(1), :]
        tgt = self.embedding(tgt) + self.positional_encoding[:, :tgt.size(1), :]
        src = src.permute(1, 0, 2)  # [seq_len, batch_size, d_model]
        tgt = tgt.permute(1, 0, 2)  # [seq_len, batch_size, d_model]
        output = self.transformer(src, tgt)
        output = self.fc_out(output.permute(1, 0, 2))  # [batch_size, seq_len, vocab_size]
        return output

# Hyperparameters
vocab_size = len(vocab)
d_model = 64
nhead = 8
num_encoder_layers = 3
num_decoder_layers = 3
dim_feedforward = 256
max_seq_length = 10

# Instantiate the model
model = TransformerModel(vocab_size, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, max_seq_length)




In [23]:
import torch.optim as optim

class TextDataset(Dataset):
    def __init__(self, encoded_tokens, seq_length):
        self.encoded_tokens = encoded_tokens
        self.seq_length = seq_length

    def __len__(self):
        return len(self.encoded_tokens) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.tensor(self.encoded_tokens[idx:idx + self.seq_length]),
            torch.tensor(self.encoded_tokens[idx + 1:idx + self.seq_length + 1])
        )

# Dataset and DataLoader
seq_length = 5
dataset = TextDataset(encoded_tokens, seq_length)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 100
for epoch in range(epochs):
    for src, tgt in dataloader:
        optimizer.zero_grad()
        output = model(src, tgt[:, :-1])
        loss = criterion(output.view(-1, vocab_size), tgt[:, 1:].reshape(-1))
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.1401
Epoch [20/100], Loss: 0.0386
Epoch [30/100], Loss: 0.0257
Epoch [40/100], Loss: 0.0121
Epoch [50/100], Loss: 0.0072
Epoch [60/100], Loss: 0.0066
Epoch [70/100], Loss: 0.0041
Epoch [80/100], Loss: 0.0038
Epoch [90/100], Loss: 0.0038
Epoch [100/100], Loss: 0.0025


In [24]:
def predict_next_word(model, sentence, word_to_idx, idx_to_word, max_seq_length):
    model.eval()
    words = tokenize(preprocess_text(sentence))
    input_ids = [word_to_idx[word] for word in words if word in word_to_idx]
    src = torch.tensor(input_ids).unsqueeze(0)

    with torch.no_grad():
        output = model(src, src)
        next_word_logits = output[0, -1, :]
        predicted_word_idx = torch.argmax(next_word_logits).item()
        return idx_to_word[predicted_word_idx]

# Example prediction
sentence = "Transformers are a type"
predicted_word = predict_next_word(model, sentence, word_to_idx, idx_to_word, max_seq_length)
print(f"Next word prediction: {predicted_word}")

Next word prediction: of
