## Library Import

In [None]:
import pandas as pd
import re
import nltk
import torch
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
from torchtext.vocab import build_vocab_from_iterator
from torch.nn.utils.rnn import pad_sequence
from torch.nn import Embedding
from gensim.models import KeyedVectors
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import random
import torch.optim as optim
from nltk import pos_tag
import torch.nn.functional as F
!pip install banglanltk
!pip install rouge
from google.colab import drive
drive.mount('/content/drive')

Collecting banglanltk
  Downloading banglanltk-0.0.4-py3-none-any.whl (462 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m462.3/462.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: banglanltk
Successfully installed banglanltk-0.0.4
Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
Mounted at /content/drive


In [None]:
data = pd.read_csv('/content/drive/MyDrive/CSE400 Dataset/bangla_to_english.csv')
data.head()

Unnamed: 0,Summary,Translated_Summary
0,ভারতের বাজার নিয়ন্ত্রক আইপিও নথির যাচাই-বাছাই...,India markets regulator ups scrutiny of IPO do...
1,আইএমএফের জর্জিভা নরম অবতরণে 'খুব আত্মবিশ্বাসী'...,IMF's Georgieva 'very confident' on soft landi...
2,UK নিয়োগকারীরা 2024-এর জন্য ছোট বেতন বৃদ্ধির ...,UK employers plan smaller pay rises for 2024: ...
3,"EU ঋণ কমাতে, বিনিয়োগ বাড়াতে শিথিল আর্থিক নিয...","EU agrees on looser fiscal rules to cut debt, ..."
4,রাশিয়ার তেলের মূল্যসীমা লঙ্ঘনের জন্য যুক্তরাষ...,US imposes sanctions for violations of Russia ...


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45149 entries, 0 to 45148
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Summary             45149 non-null  object
 1   Translated_Summary  45149 non-null  object
dtypes: object(2)
memory usage: 705.6+ KB


## Data Preprocessing


Text Cleaning

In [None]:
import banglanltk as bn
def clean_text(text):
  text = re.sub(r"<.*?>", "", text)
  text = re.sub(r"[^\w\s]", "", text)
  text = text.lower()
  text = " ".join(text.split())
  return text

data['Summary'] = data['Summary'].apply(bn.clean_text)

data["Translated_Summary"] = data["Translated_Summary"].apply(clean_text)

data.head()

Unnamed: 0,Summary,Translated_Summary
0,ভারতের বাজার নিয়ন্ত্রক আইপিও নথির যাচাইবাছাই ...,india markets regulator ups scrutiny of ipo do...
1,আইএমএফের জর্জিভা নরম অবতরণে খুব আত্মবিশ্বাসী র...,imfs georgieva very confident on soft landing ...
2,UK নিয়োগকারীরা 2024এর জন্য ছোট বেতন বৃদ্ধির প...,uk employers plan smaller pay rises for 2024 c...
3,EU ঋণ কমাতে বিনিয়োগ বাড়াতে শিথিল আর্থিক নিয়...,eu agrees on looser fiscal rules to cut debt b...
4,রাশিয়ার তেলের মূল্যসীমা লঙ্ঘনের জন্য যুক্তরাষ...,us imposes sanctions for violations of russia ...


Tokenization

In [None]:
nltk.download('punkt')

data['Summary'] = data['Summary'].apply(bn.word_tokenize)

data["Translated_Summary"] = data["Translated_Summary"].apply(word_tokenize)

data.head()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Unnamed: 0,Summary,Translated_Summary
0,"[ভারতের, বাজার, নিয়ন্ত্রক, আইপিও, নথির, যাচাই...","[india, markets, regulator, ups, scrutiny, of,..."
1,"[আইএমএফের, জর্জিভা, নরম, অবতরণে, খুব, আত্মবিশ্...","[imfs, georgieva, very, confident, on, soft, l..."
2,"[UK, নিয়োগকারীরা, 2024এর, জন্য, ছোট, বেতন, বৃ...","[uk, employers, plan, smaller, pay, rises, for..."
3,"[EU, ঋণ, কমাতে, বিনিয়োগ, বাড়াতে, শিথিল, আর্থ...","[eu, agrees, on, looser, fiscal, rules, to, cu..."
4,"[রাশিয়ার, তেলের, মূল্যসীমা, লঙ্ঘনের, জন্য, যু...","[us, imposes, sanctions, for, violations, of, ..."


Removing Stopwords

In [None]:
nltk.download('stopwords')

stop_words = stopwords.words("english")
data["Translated_Summary"] = data["Translated_Summary"].apply(lambda x: [token for token in x if token not in stop_words])

with open("/content/drive/MyDrive/CSE400 Dataset/ranksnl-bengali.txt", "r", encoding="utf-8") as f:
  stopwords = [line.strip() for line in f]

def remove_stopwords(text_list, stopwords):
    filtered_text = [word for word in text_list if word not in stopwords]
    return filtered_text

data["Summary"] = data["Summary"].apply(remove_stopwords, stopwords=stopwords)

data.head()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Unnamed: 0,Summary,Translated_Summary
0,"[ভারতের, বাজার, নিয়ন্ত্রক, আইপিও, নথির, যাচাই...","[india, markets, regulator, ups, scrutiny, ipo..."
1,"[আইএমএফের, জর্জিভা, নরম, অবতরণে, আত্মবিশ্বাসী,...","[imfs, georgieva, confident, soft, landing, se..."
2,"[UK, নিয়োগকারীরা, 2024এর, ছোট, বেতন, বৃদ্ধির,...","[uk, employers, plan, smaller, pay, rises, 202..."
3,"[EU, ঋণ, কমাতে, বিনিয়োগ, বাড়াতে, শিথিল, আর্থ...","[eu, agrees, looser, fiscal, rules, cut, debt,..."
4,"[রাশিয়ার, তেলের, মূল্যসীমা, লঙ্ঘনের, যুক্তরাষ...","[us, imposes, sanctions, violations, russia, o..."


Lemmatization

In [None]:
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()

def lemmatize_tokens(tokens):
    return [lemmatizer.lemmatize(token) for token in tokens]

data["Translated_Summary"] = data["Translated_Summary"].apply(lemmatize_tokens)

data.head()

[nltk_data] Downloading package wordnet to /root/nltk_data...


Unnamed: 0,Summary,Translated_Summary
0,"[ভারতের, বাজার, নিয়ন্ত্রক, আইপিও, নথির, যাচাই...","[india, market, regulator, ups, scrutiny, ipo,..."
1,"[আইএমএফের, জর্জিভা, নরম, অবতরণে, আত্মবিশ্বাসী,...","[imf, georgieva, confident, soft, landing, see..."
2,"[UK, নিয়োগকারীরা, 2024এর, ছোট, বেতন, বৃদ্ধির,...","[uk, employer, plan, smaller, pay, rise, 2024,..."
3,"[EU, ঋণ, কমাতে, বিনিয়োগ, বাড়াতে, শিথিল, আর্থ...","[eu, agrees, looser, fiscal, rule, cut, debt, ..."
4,"[রাশিয়ার, তেলের, মূল্যসীমা, লঙ্ঘনের, যুক্তরাষ...","[u, imposes, sanction, violation, russia, oil,..."


Vocabulary Creation

In [None]:
summary_vocab = build_vocab_from_iterator(data["Summary"], specials=["<pad>", "<sos>", "<eos>", "<unk>"])
translation_vocab = build_vocab_from_iterator(data["Translated_Summary"], specials=["<pad>", "<sos>", "<eos>", "<unk>"])

summary_itos = summary_vocab.get_itos()
summary_stoi = summary_vocab.get_stoi()
translation_itos = translation_vocab.get_itos()
translation_stoi = translation_vocab.get_stoi()

Padding

In [None]:
summary_max_len = int(data['Summary'].str.len().quantile(0.70))
translation_max_len = int(data['Translated_Summary'].str.len().quantile(0.70))

print("95th percentile length for Summary:", summary_max_len)
print("95th percentile length for Translation:", translation_max_len)

95th percentile length for Summary: 18
95th percentile length for Translation: 18


In [None]:
def pad_tokens(tokens, vocab_stoi, max_len):
    pad_index = vocab_stoi["<pad>"]
    tokens = tokens[:max_len]
    sequence = [vocab_stoi.get(token, vocab_stoi["<unk>"]) for token in tokens] + [pad_index] * (max_len - len(tokens))
    return sequence

summary_sequences = [pad_tokens(tokens, summary_stoi, 18) for tokens in data["Summary"]]
translation_sequences = [pad_tokens(tokens, translation_stoi, 18) for tokens in data["Translated_Summary"]]

summary_tensor = torch.tensor(summary_sequences)
translation_tensor = torch.tensor(translation_sequences)

## Word Embedding

Summary Embedding

In [None]:
bangla_glove = KeyedVectors.load_word2vec_format("/content/drive/MyDrive/CSE400 Dataset/bn_glove.39M.300d.txt",  no_header=True)
summary_vocab_size = len(summary_vocab)
summary_embedding_dim = 300

bangla_pretrained = torch.zeros(summary_vocab_size, summary_embedding_dim)
for word, i in summary_vocab.get_stoi().items():
    if word in bangla_glove.key_to_index:
        bangla_pretrained[i] = torch.tensor(bangla_glove[word])
    else:
        bangla_pretrained[i] = torch.randn(summary_embedding_dim)
pad_idx = summary_vocab.get_stoi()["<pad>"]
bangla_pretrained[pad_idx] = torch.zeros(summary_embedding_dim)
bangla_pretrained.requires_grad = False

summary_embedding = Embedding.from_pretrained(bangla_pretrained, freeze=True)

Translation Embedding

In [None]:
glove_vectors = KeyedVectors.load_word2vec_format("/content/drive/MyDrive/CSE400 Dataset/glove.6B.300d.txt",  no_header=True)
translation_vocab_size = len(translation_vocab)
translation_embedding_dim = 300

english_pretrained = torch.zeros(translation_vocab_size, translation_embedding_dim)
for word, i in translation_vocab.get_stoi().items():
    if word in glove_vectors.key_to_index:
        english_pretrained[i] = torch.tensor(glove_vectors[word])
    else:
        english_pretrained[i] = torch.randn(translation_embedding_dim)
pad_idx = translation_vocab.get_stoi()["<pad>"]
english_pretrained[pad_idx] = torch.zeros(translation_embedding_dim)
english_pretrained.requires_grad = False

translation_embedding = Embedding.from_pretrained(english_pretrained, freeze=True)

## Train Test Split and Dataset Loader

In [None]:
X_train, X_test, y_train, y_test = train_test_split(summary_tensor, translation_tensor, test_size=0.1, random_state=42)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, summaries, translations):
        self.summaries = summaries
        self.translations = translations

    def __len__(self):
        return len(self.summaries)

    def __getitem__(self, idx):
        return self.summaries[idx], self.translations[idx]

train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## Seq2Seq Bahdanau Attention

In [None]:
print("Summary Vocabulary Size: ", len(summary_vocab))
print("Translation Vocabulary Size:", len(translation_vocab))
SOS_token = summary_stoi["<sos>"]
PAD_token = summary_stoi["<pad>"]
EOS_token = summary_stoi["<eos>"]
UNK_token = summary_stoi["<unk>"]

Summary Vocabulary Size:  53933
Translation Vocabulary Size: 38954


In [None]:
embedding_dim = 300
hidden_dim = 512
output_dim = len(translation_vocab)
n_layers = 2
dropout = 0.30
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Unidirectional Implementation

In [None]:
class Seq2SeqModel(nn.Module):
  def __init__(self, embedding_dim, hidden_dim, output_dim, n_layers, dropout, summary_embedding, translation_embedding):
    super().__init__()

    self.output_dim = output_dim
    self.bangla_embedding = summary_embedding
    self.english_embedding = translation_embedding
    self.encoder = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, dropout=dropout)
    self.decoder = nn.LSTM(embedding_dim + hidden_dim, hidden_dim, num_layers=n_layers, dropout=dropout)
    self.W1 = nn.Linear(hidden_dim, hidden_dim)
    self.W2 = nn.Linear(hidden_dim, hidden_dim)
    self.V = nn.Linear(hidden_dim, 1)
    self.fc = nn.Linear(hidden_dim, output_dim)

  def forward(self, source, target, teacher_forcing_ratio=0.5):
    batch_size = source.shape[0]
    target_len = target.shape[1]
    target_vocab_size = self.output_dim

    embedded =  self.bangla_embedding(source.transpose(0, 1))
    encoder_outputs, (hidden, cell) = self.encoder(embedded)
    decoder_input = torch.tensor([[SOS_token] * batch_size], device=device)
    decoder_hidden = hidden
    decoder_cell = cell

    outputs = torch.zeros(batch_size, target_len, target_vocab_size).to(device)
    attentions = torch.zeros(batch_size, target_len, source.shape[1]).to(device)

    for t in range(target_len):
      embedded = self.english_embedding(decoder_input)
      decoder_hidden_last = decoder_hidden[-1].unsqueeze(0).expand_as(encoder_outputs)

      score = self.V(torch.tanh(self.W1(encoder_outputs) + self.W2(decoder_hidden_last)))
      attention_weights = F.softmax(score, dim=1)
      context_vector = torch.bmm(attention_weights.permute(1, 2, 0), encoder_outputs.permute(1, 0, 2))
      context_vector = context_vector.permute(1, 0, 2)
      embedded_with_context = torch.cat((embedded, context_vector), dim=2)

      decoder_output, (decoder_hidden, decoder_cell) = self.decoder(embedded_with_context, (decoder_hidden, decoder_cell))

      output = self.fc(decoder_output.squeeze(0))
      outputs[:, t, :] = output
      attentions[:, t, :] =  attention_weights.squeeze(-1).transpose(0, 1)
      teacher_force = random.random() < teacher_forcing_ratio
      top1 = output.argmax(1)
      decoder_input = (target[:, t] if teacher_force else top1).unsqueeze(0)
    return outputs, attentions

Bidirectional Implementation

In [None]:
class Seq2SeqModel(nn.Module):
  def __init__(self, embedding_dim, hidden_dim, output_dim, n_layers, dropout, summary_embedding, translation_embedding):
    super().__init__()

    self.output_dim = output_dim
    self.bangla_embedding = summary_embedding
    self.english_embedding = translation_embedding
    self.encoder = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, dropout=dropout, bidirectional = True)
    self.decoder = nn.LSTM(embedding_dim + 2 * hidden_dim, 2 * hidden_dim, num_layers=n_layers, dropout=dropout)
    self.W1 = nn.Linear(2 * hidden_dim, 2 * hidden_dim)
    self.W2 = nn.Linear(2 * hidden_dim, 2 * hidden_dim)
    self.V = nn.Linear(2 * hidden_dim, 1)
    self.fc = nn.Linear(2 * hidden_dim, output_dim)

  def forward(self, source, target, teacher_forcing_ratio=0.5):
    batch_size = source.shape[0]
    target_len = target.shape[1]
    target_vocab_size = self.output_dim

    embedded =  self.bangla_embedding(source.transpose(0, 1))
    encoder_outputs, (hidden, cell) = self.encoder(embedded)

    hidden = hidden.view(n_layers, 2, batch_size, hidden_dim)
    cell = cell.view(n_layers, 2, batch_size, hidden_dim)

    decoder_input = torch.tensor([[SOS_token] * batch_size], device=device)

    decoder_hidden = torch.cat((hidden[:, 0, :, :], hidden[:, 1, :, :]), dim=2)
    decoder_cell = torch.cat((cell[:, 0, :, :], cell[:, 1, :, :]), dim=2)

    outputs = torch.zeros(batch_size, target_len, target_vocab_size).to(device)
    attentions = torch.zeros(batch_size, target_len, source.shape[1]).to(device)

    for t in range(target_len):
      embedded = self.english_embedding(decoder_input)
      decoder_hidden_last = decoder_hidden[-1].unsqueeze(0).expand_as(encoder_outputs)

      score = self.V(torch.tanh(self.W1(encoder_outputs) + self.W2(decoder_hidden_last)))
      attention_weights = F.softmax(score, dim=1)
      context_vector = torch.bmm(attention_weights.permute(1, 2, 0), encoder_outputs.permute(1, 0, 2))
      context_vector = context_vector.permute(1, 0, 2)
      embedded_with_context = torch.cat((embedded, context_vector), dim=2)

      decoder_output, (decoder_hidden, decoder_cell) = self.decoder(embedded_with_context, (decoder_hidden, decoder_cell))

      output = self.fc(decoder_output.squeeze(0))
      outputs[:, t, :] = output
      attentions[:, t, :] =  attention_weights.squeeze(-1).transpose(0, 1)
      teacher_force = random.random() < teacher_forcing_ratio
      top1 = output.argmax(1)
      decoder_input = (target[:, t] if teacher_force else top1).unsqueeze(0)
    return outputs, attentions

In [None]:
model = Seq2SeqModel(embedding_dim, hidden_dim, output_dim, n_layers, dropout, summary_embedding, translation_embedding).to(device)

In [None]:
criterion =  nn.CrossEntropyLoss(ignore_index=PAD_token)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    for i, (source, target) in enumerate(train_loader):
        source = source.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        outputs, attentions = model(source, target, teacher_forcing_ratio=0.5)
        output_dim = outputs.shape[-1]
        outputs = outputs.view(-1, output_dim)
        target = target.view(-1)
        loss = criterion(outputs, target)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        if i % 636 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.3f}")

In [None]:
torch.save(model.state_dict(), '/content/drive/MyDrive/CSE400 Dataset/translation_b2e_20.pth')

In [None]:
model.load_state_dict(torch.load('/content/drive/MyDrive/CSE400 Dataset/bi_translation_b2e_25.pth'))

<All keys matched successfully>

## Model Evaluation

In [None]:
model.eval()

Seq2SeqModel(
  (bangla_embedding): Embedding(53933, 300)
  (english_embedding): Embedding(38954, 300)
  (encoder): LSTM(300, 512, num_layers=2, dropout=0.3, bidirectional=True)
  (decoder): LSTM(1324, 1024, num_layers=2, dropout=0.3)
  (W1): Linear(in_features=1024, out_features=1024, bias=True)
  (W2): Linear(in_features=1024, out_features=1024, bias=True)
  (V): Linear(in_features=1024, out_features=1, bias=True)
  (fc): Linear(in_features=1024, out_features=38954, bias=True)
)

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction
from rouge import Rouge
def calculate_scores(references, predictions):
    rouge = Rouge()
    bleu_scores = []
    rouge_scores = {'rouge-1': {'F-1 Score': [], 'Precision': [], 'Recall': []},
                    'rouge-2': {'F-1 Score': [], 'Precision': [], 'Recall': []},
                    'rouge-l': {'F-1 Score': [], 'Precision': [], 'Recall': []}}
    smoothie = SmoothingFunction().method4
    for ref, pred in zip(references, predictions):
        bleu_score = sentence_bleu([ref], pred, weights=(0.8, 0.2, 0, 0), smoothing_function=smoothie)
        bleu_scores.append(bleu_score)
        rouge_score = rouge.get_scores(' '.join(pred), ' '.join(ref), avg=True)
        for key in rouge_scores.keys():
            for metric, label in zip(['f', 'p', 'r'], ['F-1 Score', 'Precision', 'Recall']):
                rouge_scores[key][label].append(rouge_score[key][metric])
    return bleu_scores, rouge_scores


references = []
predictions = []

with torch.no_grad():
    for summaries, translations in test_loader:
        summaries = summaries.to(device)
        translations = translations.to(device)
        outputs, _ = model(summaries, translations)
        predicted_indices = outputs.argmax(dim=-1)
        for i in range(predicted_indices.shape[0]):
            references.append([translation_itos[idx] for idx in translations[i]])
            prediction = []
            for idx in predicted_indices[i]:
                if idx == translation_stoi["<eos>"]:
                    break
                prediction.append(translation_itos[idx])
            if prediction[-1] != "<eos>":
                prediction.append("<eos>")
            predictions.append(prediction)

bleu_scores, rouge_scores = calculate_scores(references, predictions)

print("Average BLEU score:", sum(bleu_scores) / len(bleu_scores))
print("Average ROUGE scores:")
for key in rouge_scores.keys():
    print(f"{key}:")
    for label in ['F-1 Score', 'Precision', 'Recall']:
      print(f"  {label}: {sum(rouge_scores[key][label]) / len(rouge_scores[key][label])}")

Average BLEU score: 0.12839363715572877
Average ROUGE scores:
rouge-1:
  F-1 Score: 0.23499591197758543
  Precision: 0.26430642190750364
  Recall: 0.2183835720190015
rouge-2:
  F-1 Score: 0.03886920422117718
  Precision: 0.03790470910902823
  Recall: 0.04080666595616804
rouge-l:
  F-1 Score: 0.20625662640121165
  Precision: 0.23205700853608185
  Recall: 0.1917248355947774


## Generate Translations

In [None]:
def generate_translation(input_text):
    tokens = word_tokenize(input_text)
    sequence = pad_tokens(tokens, summary_stoi, 18)
    input_tensor = torch.tensor(sequence).unsqueeze(0).to(device)
    batch_size = input_tensor.shape[0]
    decoder_input = torch.tensor([SOS_token] * batch_size).unsqueeze(1).to(device)
    output_indices = []
    attentions = []
    for _ in range(18):
        with torch.no_grad():
            outputs, attention = model(input_tensor, decoder_input)
        predicted_indices = outputs[:, -1, :].argmax(dim=-1).unsqueeze(1)
        output_indices.append(predicted_indices)
        attentions.append(attention)
        decoder_input = torch.cat((decoder_input, predicted_indices), dim=1)
        if predicted_indices[0][0] == EOS_token:
            break
    output_indices = torch.cat(output_indices, dim=1)
    predicted_words = []
    for index in output_indices[0]:
        if index == SOS_token or index == PAD_token:
            continue
        elif index == EOS_token:
            break
        elif index == UNK_token:
            predicted_words.append("")
        else:
            predicted_words.append(translation_itos[index])
    summary = ' '.join(predicted_words)
    return summary

iter_test_loader = iter(test_loader)
for i in range(13):
  first_batch = next(iter_test_loader)
X_test, y_test = first_batch[0][0], first_batch[1][0]
X_test_tokens = [summary_itos[idx] for idx in X_test if summary_itos[idx] != "<pad>"]
y_test_tokens = [translation_itos[idx] for idx in y_test if translation_itos[idx] != "<pad>"]
input_text = ' '.join(X_test_tokens)
contents = ' '.join(y_test_tokens)
print("Input Text: " + input_text)
print("Sample Translation: " + contents)
print("Generated Translation: " + generate_translation(input_text))