In [None]:
!pip install torchtext==0.6.0 --quiet
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator
import numpy as np
import pandas as pd
import spacy
import random
from torchtext.data.metrics import bleu_score
from pprint import pprint
from torch.utils.tensorboard import SummaryWriter
import nltk
from nltk.tokenize.treebank import TreebankWordDetokenizer

from torchsummary import summary
!python -m spacy download en --quiet
!python -m spacy download de --quiet

[K     |████████████████████████████████| 71kB 3.4MB/s 
[K     |████████████████████████████████| 1.2MB 7.4MB/s 
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')
[38;5;2m✔ Linking successful[0m
/usr/local/lib/python3.7/dist-packages/en_core_web_sm -->
/usr/local/lib/python3.7/dist-packages/spacy/data/en
You can now load the model via spacy.load('en')
[K     |████████████████████████████████| 14.9MB 4.8MB/s 
[?25h  Building wheel for de-core-news-sm (setup.py) ... [?25l[?25hdone
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('de_core_news_sm')
[38;5;2m✔ Linking successful[0m
/usr/local/lib/python3.7/dist-packages/de_core_news_sm -->
/usr/local/lib/python3.7/dist-packages/spacy/data/de
You can now load the model via spacy.load('de')


In [None]:
spacy_german = spacy.load("de")
spacy_english = spacy.load("en")
def tokenizer_german(text):
  return [token.text for token in spacy_german.tokenizer(text)]

def tokenizer_english(text):
  return [token.text for token in spacy_english.tokenizer(text)]

german = Field(tokenize=tokenizer_german,
               lower=True,
               init_token="<sos>",
               eos_token="<eos>")

english = Field(tokenize=tokenizer_english,
               lower=True,
               init_token="<sos>",
               eos_token="<eos>")

train_data, valid_data, test_data = Multi30k.splits(exts = (".de", ".en"),
                                                    fields=(german, english))

german.build_vocab(train_data, max_size=5000, min_freq=3)
english.build_vocab(train_data, max_size=5000, min_freq=3)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 16

train_iterator, valid_iterator, test_iterator = BucketIterator.splits((train_data, valid_data, test_data), 
                                                                      batch_size = BATCH_SIZE, 
                                                                      sort_within_batch=True,
                                                                      sort_key=lambda x: len(x.src),
                                                                      device = device)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class EncoderLSTM(nn.Module):
  def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
    super(EncoderLSTM, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.dropout = nn.Dropout(p)
    self.tag = True
    self.embedding = nn.Embedding(input_size, embedding_size)
    self.LSTM = nn.LSTM(embedding_size, hidden_size, num_layers, dropout = p)

  def forward(self, x):
    embedding = self.dropout(self.embedding(x))
    outputs, (hidden_state, cell_state) = self.LSTM(embedding)
    return hidden_state, cell_state

input_size_encoder = 5476 #len(english.vocab)
encoder_embedding_size = 300
hidden_size = 512
num_layers = 2
encoder_dropout = 0.5

encoder_lstm = EncoderLSTM(input_size_encoder, encoder_embedding_size,
                           hidden_size, num_layers, encoder_dropout).to(device)
print(encoder_lstm)

class DecoderLSTM(nn.Module):
  def __init__(self, input_size, embedding_size, hidden_size, num_layers, p, output_size):
    super(DecoderLSTM, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.output_size = output_size
    self.dropout = nn.Dropout(p)
    self.embedding = nn.Embedding(input_size, embedding_size)
    self.LSTM = nn.LSTM(embedding_size, hidden_size, num_layers, dropout = p)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, x, hidden_state, cell_state):
    x = x.unsqueeze(0)
    embedding = self.dropout(self.embedding(x))
    outputs, (hidden_state, cell_state) = self.LSTM(embedding, (hidden_state, cell_state))
    predictions = self.fc(outputs)
    predictions = predictions.squeeze(0)
    return predictions, hidden_state, cell_state

input_size_decoder = 4556 #len(german.vocab)
decoder_embedding_size = 300
hidden_size = 1024
num_layers = 2
decoder_dropout = 0.5
output_size = 4556 #len(german.vocab)

decoder_lstm = DecoderLSTM(input_size_decoder, decoder_embedding_size,
                           hidden_size, num_layers, decoder_dropout, output_size).to(device)
print(decoder_lstm)

EncoderLSTM(
  (dropout): Dropout(p=0.5, inplace=False)
  (embedding): Embedding(5476, 300)
  (LSTM): LSTM(300, 512, num_layers=2, dropout=0.5)
)
DecoderLSTM(
  (dropout): Dropout(p=0.5, inplace=False)
  (embedding): Embedding(4556, 300)
  (LSTM): LSTM(300, 1024, num_layers=2, dropout=0.5)
  (fc): Linear(in_features=1024, out_features=4556, bias=True)
)


In [None]:
class Seq2Seq(nn.Module):
  def __init__(self, Encoder_LSTM, Decoder_LSTM):
    super(Seq2Seq, self).__init__()
    self.Encoder_LSTM = Encoder_LSTM
    self.Decoder_LSTM = Decoder_LSTM

  def forward(self, source, target, tfr=0.5):
    batch_size = source.shape[1]
    target_len = target.shape[0]
    target_vocab_size = 4556#len(german.vocab)
    outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
    hidden_state, cell_state = self.Encoder_LSTM(source)
    x = target[0]

    for i in range(1, target_len):
      output, hidden_state, cell_state = self.Decoder_LSTM(x, hidden_state, cell_state)
      outputs[i] = output
      best_guess = output.argmax(1)
      x = target[i] if random.random() < tfr else best_guess

    return outputs


In [None]:
# Hyperparameters
learning_rate = 0.001
writer = SummaryWriter(f"runs/loss_plot")
step = 0
ya
model = Seq2Seq(encoder_lstm, decoder_lstm).to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

pad_idx = german.vocab.stoi["<pad>"]
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

def translate_sentence(model, sentence, german, english, device, max_length=50):
    spacy_ger = spacy.load("en")

    if type(sentence) == str:
        tokens = [token.text.lower() for token in spacy_ger(sentence)]
    else:
        tokens = [token.lower() for token in sentence]
    tokens.insert(0, english.init_token)
    tokens.append(english.eos_token)
    text_to_indices = [english.vocab.stoi[token] for token in tokens]
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    with torch.no_grad():
        hidden, cell = model.Encoder_LSTM(sentence_tensor)

    outputs = [german.vocab.stoi["<sos>"]]

    for _ in range(max_length):
        previous_word = torch.LongTensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hidden, cell = model.Decoder_LSTM(previous_word, hidden, cell)
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)
        if output.argmax(1).item() == german.vocab.stoi["<eos>"]:
            break

    translated_sentence = [german.vocab.itos[idx] for idx in outputs]
    return translated_sentence[1:]

def bleu(data, model, german, english, device):
    targets = []
    outputs = []

    for example in data:
        src = vars(example)["src"]
        trg = vars(example)["trg"]

        prediction = translate_sentence(model, src, german, english, device)
        prediction = prediction[:-1]  # Clip the end of string token

        targets.append([trg])
        outputs.append(prediction)

    return bleu_score(outputs, targets)

def checkpoint_and_save(model, best_loss, epoch, optimizer, epoch_loss):
    print('Saving the model')
    print()
    state = {'model': model,'best_loss': best_loss,'epoch': epoch,'rng_state': torch.get_rng_state(), 'optimizer': optimizer.state_dict(),}
    torch.save(state, '/content/checkpoint-model')
    torch.save(model.state_dict(),'/content/checkpoint-model_state_dict')

In [None]:
epoch_loss = 0.0
num_epochs = 20
best_loss = 9999

for epoch in range(num_epochs):

  model.train(True)
  for batch_idx, batch in enumerate(train_iterator):
    input = batch.src.to(device)
    target = batch.trg.to(device)
    output = model(input, target)
    output = output[1:].reshape(-1, output.shape[2])
    target = target[1:].reshape(-1)
    optimizer.zero_grad()
    loss = criterion(output, target)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
    optimizer.step()
    step += 1
    epoch_loss += loss.item()
    writer.add_scalar("Training loss", loss, global_step=step)

  if epoch_loss < best_loss:
    best_loss = epoch_loss
    best_epoch = epoch
    checkpoint_and_save(model, best_loss, epoch, optimizer, epoch_loss) 
    if ((epoch - best_epoch) >= 10):
      print("Stopping the training as the loss has plateaud")
      break
  print("Loss = {}".format(loss.item()))
  print()
  
#print(epoch_loss / len(train_iterator))

score = bleu(test_data[1:100], model, german, english, device)
print(f"Bleu score {score*100:.2f}")


RuntimeError: ignored

In [None]:
checkpoint_and_save(model, best_loss, epoch, optimizer, epoch_loss) 

In [None]:

def test():
    test_de = pd.read_csv('/content/test_de',error_bad_lines=False, sep='\n')
    test_en = pd.read_csv('/content/test_en',error_bad_lines=False, sep='\n')

    test_de = test_de.values.tolist()
    test_en = test_en.values.tolist()


    SD_PATH = "/content/checkpoint-model_state_dict "
    MODEL_PATH = "/content/checkpoint-model"

    model = Seq2Seq(encoder_lstm, decoder_lstm)

    model.load_state_dict(torch.load('/content/checkpoint-NMT-SD'))
    #model = torch.load(MODEL_PATH)
    model.eval()
    test_sentences =  [item for sublist in test_de for item in sublist]
    actual_sentences =  [item for sublist in test_en for item in sublist]
    pred_sentences = []
    progress=[]
    for idx, i in enumerate(test_sentences):
        model.eval()
        translated_sentence = translate_sentence(model, i, german, english, device, max_length=50)
        progress.append(TreebankWordDetokenizer().detokenize(translated_sentence))
        print("English : {}".format(i))
        print("Translated Sentence in German : {}".format(progress[-1]))
        score = bleu_score(progress[-1], actual_sentences[idx])
        print(score)
        print()
'''



SD_PATH = "/content/checkpoint-model_state_dict "
MODEL_PATH = "/content/checkpoint-model"

model = Seq2Seq(encoder_lstm, decoder_lstm).to(device)

model.load_state_dict(torch.load('/content/checkpoint-model_state_dict'))
#model = torch.load(MODEL_PATH)
model.eval()
model.eval()
test_sentences  = ["Zwei Männer gehen die Straße entlang", "Kinder spielen im Park.", "Diese Stadt verdient eine bessere Klasse von Verbrechern. Der Spaßvogel"]
actual_sentences  = ["Two men are walking down the street", "Children play in the park", "This city deserves a better class of criminals. The joker"]
pred_sentences = []
progress=[]
for idx, i in enumerate(test_sentences):
  model.eval()
  translated_sentence = translate_sentence(model, i, german, english, device, max_length=50)
  progress.append(TreebankWordDetokenizer().detokenize(translated_sentence))
  print("German : {}".format(i))
  print("Actual Sentence in English : {}".format(actual_sentences[idx]))
  print("Predicted Sentence in English : {}".format(progress[-1]))
  score = bleu_score(progress[-1], actual_sentences[idx])
  print(score)
  print()
'''

'\n\n\n\nSD_PATH = "/content/checkpoint-model_state_dict "\nMODEL_PATH = "/content/checkpoint-model"\n\nmodel = Seq2Seq(encoder_lstm, decoder_lstm).to(device)\n\nmodel.load_state_dict(torch.load(\'/content/checkpoint-model_state_dict\'))\n#model = torch.load(MODEL_PATH)\nmodel.eval()\nmodel.eval()\ntest_sentences  = ["Zwei Männer gehen die Straße entlang", "Kinder spielen im Park.", "Diese Stadt verdient eine bessere Klasse von Verbrechern. Der Spaßvogel"]\nactual_sentences  = ["Two men are walking down the street", "Children play in the park", "This city deserves a better class of criminals. The joker"]\npred_sentences = []\nprogress=[]\nfor idx, i in enumerate(test_sentences):\n  model.eval()\n  translated_sentence = translate_sentence(model, i, german, english, device, max_length=50)\n  progress.append(TreebankWordDetokenizer().detokenize(translated_sentence))\n  print("German : {}".format(i))\n  print("Actual Sentence in English : {}".format(actual_sentences[idx]))\n  print("Predic

In [None]:
test()

RuntimeError: ignored

In [None]:
print('hello')