In [None]:
import torch
import torch.nn as nn
import numpy as np
import random
import time
from torchtext.legacy import data
from torchtext.legacy import datasets

In [None]:
random.seed(1234)
np.random.seed(1234)
torch.manual_seed(1234)
torch.cuda.manual_seed(1234)
torch.backends.cudnn.deterministic = True

In [None]:
import spacy

In [None]:
!python -m spacy download en_core_web_sm
!python -m spacy download de_core_news_sm

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('de_core_news_sm')


In [None]:
en = spacy.load("en_core_web_sm")
de=spacy.load("de_core_news_sm")

In [None]:
def tokenize_de(text):
  return [tok.text for tok in de.tokenizer(text)]

def tokenize_en(text):
  return [tok.text for tok in en.tokenizer(text)]


In [None]:
##defining fields
source = data.Field(init_token="<sos>",eos_token="<eos>",tokenize=tokenize_de,lower=True)
target = data.Field(init_token="<sos>",eos_token="<eos>",tokenize=tokenize_en,lower=True)


In [None]:
train_data,val_data,test_data=datasets.Multi30k.splits(exts=(".de",".en"),fields=(source,target))

In [None]:
vars(train_data.examples[0])

{'src': ['zwei',
  'junge',
  'weiße',
  'männer',
  'sind',
  'im',
  'freien',
  'in',
  'der',
  'nähe',
  'vieler',
  'büsche',
  '.'],
 'trg': ['two',
  'young',
  ',',
  'white',
  'males',
  'are',
  'outside',
  'near',
  'many',
  'bushes',
  '.']}

In [None]:
vars(test_data.examples[0])

{'src': ['ein',
  'mann',
  'mit',
  'einem',
  'orangefarbenen',
  'hut',
  ',',
  'der',
  'etwas',
  'anstarrt',
  '.'],
 'trg': ['a',
  'man',
  'in',
  'an',
  'orange',
  'hat',
  'starring',
  'at',
  'something',
  '.']}

In [None]:
vars(val_data.examples[0])

{'src': ['eine',
  'gruppe',
  'von',
  'männern',
  'lädt',
  'baumwolle',
  'auf',
  'einen',
  'lastwagen'],
 'trg': ['a',
  'group',
  'of',
  'men',
  'are',
  'loading',
  'cotton',
  'onto',
  'a',
  'truck']}

In [None]:
#build vocab
source.build_vocab(train_data,min_freq=2)
target.build_vocab(train_data,min_freq=2)

In [None]:
vars(source.vocab)

{'freqs': Counter({'zwei': 3873,
          'junge': 2068,
          'weiße': 146,
          'männer': 1662,
          'sind': 490,
          'im': 3107,
          'freien': 475,
          'in': 11893,
          'der': 4989,
          'nähe': 383,
          'vieler': 2,
          'büsche': 5,
          '.': 28821,
          'mehrere': 580,
          'mit': 8843,
          'schutzhelmen': 33,
          'bedienen': 9,
          'ein': 18850,
          'antriebsradsystem': 1,
          'kleines': 772,
          'mädchen': 2121,
          'klettert': 171,
          'spielhaus': 2,
          'aus': 910,
          'holz': 61,
          'mann': 7805,
          'einem': 13711,
          'blauen': 992,
          'hemd': 1202,
          'steht': 1778,
          'auf': 8745,
          'einer': 6765,
          'leiter': 56,
          'und': 8925,
          'putzt': 43,
          'fenster': 148,
          'stehen': 939,
          'am': 911,
          'herd': 17,
          'bereiten': 79,
          '

In [None]:
vars(target.vocab)

{'freqs': Counter({'two': 3886,
          'young': 2280,
          ',': 3963,
          'white': 2238,
          'males': 40,
          'are': 3717,
          'outside': 871,
          'near': 571,
          'many': 222,
          'bushes': 19,
          '.': 27623,
          'several': 404,
          'men': 1759,
          'in': 14886,
          'hard': 123,
          'hats': 146,
          'operating': 22,
          'a': 49165,
          'giant': 45,
          'pulley': 3,
          'system': 6,
          'little': 916,
          'girl': 1723,
          'climbing': 193,
          'into': 711,
          'wooden': 175,
          'playhouse': 3,
          'man': 7781,
          'blue': 1880,
          'shirt': 2324,
          'is': 7525,
          'standing': 1614,
          'on': 8035,
          'ladder': 60,
          'cleaning': 60,
          'window': 183,
          'at': 2927,
          'the': 10955,
          'stove': 23,
          'preparing': 105,
          'food': 357,
        

In [None]:
if torch.cuda.is_available():
  device="cuda"
else:
  device="cpu"

In [None]:
device

'cuda'

In [None]:
len(source.vocab)

7855

In [None]:
len(target.vocab)

5893

In [None]:
train_iterator,valid_iterator,test_iterator=data.BucketIterator.splits((train_data,val_data,test_data),batch_size=64,device=device)

In [None]:
batch=next(iter(train_iterator))

In [None]:
batch.src[:,0]

tensor([  2,   5,  13,  10,   8,  16, 302,   4,   3,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1],
       device='cuda:0')

In [None]:
for word in batch.src[:,0]:
  print(source.vocab.itos[word])

<sos>
ein
mann
und
eine
frau
lächeln
.
<eos>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>


In [None]:
batch.trg[:,0]

tensor([  2,   4,   9,  11,   4,  14,  17, 133,   5,   3,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1],
       device='cuda:0')

In [None]:
for word in batch.trg[:,0]:
  print(target.vocab.itos[word])

<sos>
a
man
and
a
woman
are
smiling
.
<eos>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>


In [None]:
batch


[torchtext.legacy.data.batch.Batch of size 64 from MULTI30K]
	[.src]:[torch.cuda.LongTensor of size 25x64 (GPU 0)]
	[.trg]:[torch.cuda.LongTensor of size 28x64 (GPU 0)]

In [None]:
class encoder(nn.Module):
  def __init__(self,input_dim,embedding_dim,hidden_dim,dropout):
    super().__init__()
    self.input_dim=input_dim
    self.embedding_dim=embedding_dim
    self.hidden_dim=hidden_dim
    self.embed = nn.Embedding(input_dim,embedding_dim)
    self.rnn =nn.GRU(embedding_dim,hidden_dim)
    self.dropout = nn.Dropout(dropout)
  
  def forward(self,input):
    embed = self.dropout(self.embed(input))
    output,hidden = self.rnn(embed)
    return hidden

In [None]:
class decoder(nn.Module):
  def __init__(self,output_dim,embedding_dim,hidden_dim,dropout):
    super().__init__()
    self.output_dim=output_dim
    self.embedding_dim=embedding_dim
    self.hidden_dim=hidden_dim
    self.embed = nn.Embedding(output_dim,embedding_dim)
    self.rnn =nn.GRU(embedding_dim+hidden_dim,hidden_dim)
    self.lin1= nn.Linear(embedding_dim+hidden_dim*2,output_dim)
    self.dropout = nn.Dropout(dropout)
  
  def forward(self,input,hidden,context):
    input = input.unsqueeze(0)
    embed = self.dropout(self.embed(input))
    embed_concat = torch.cat((embed,context),dim=2)
    output,hidden = self.rnn(embed_concat,hidden)
    output = torch.cat((embed.squeeze(0),hidden.squeeze(0),context.squeeze(0)),dim=1)
    out = self.lin1(output)
    return out,hidden

In [None]:
class seq2seq(nn.Module):
  def __init__(self,encoder,decoder,device):
    super().__init__()
    self.encoder=encoder
    self.decoder=decoder
    self.device=device
  
  def forward(self,src,trg,teacher_forcing_ratio=0.5):
    trg_len=trg.shape[0]
    batch_size = trg.shape[1]
    output_dim=self.decoder.output_dim
    outputs=torch.zeros(trg_len,batch_size,output_dim).to(self.device)
    context = self.encoder(src)
    hidden = context
    input=trg[0,:]

    for t in range(1,trg_len):
      output,hidden=self.decoder(input,hidden,context)
      outputs[t]=output
      top=output.argmax(1)
      next_word = random.random() < teacher_forcing_ratio
      if next_word:
        input = trg[t]
      else:
        input=top
    return outputs      
            


In [None]:
input_dim = len(source.vocab)
output_dim = len(target.vocab)
embedding_dim = 256
hidden_dim =512
dropout = 0.5

In [None]:
enc = encoder(input_dim,embedding_dim,hidden_dim,dropout)

In [None]:
enc

encoder(
  (embed): Embedding(7855, 256)
  (rnn): GRU(256, 512)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [None]:
dec =decoder(output_dim,embedding_dim,hidden_dim,dropout)

In [None]:
dec

decoder(
  (embed): Embedding(5893, 256)
  (rnn): GRU(768, 512)
  (lin1): Linear(in_features=1280, out_features=5893, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [None]:
model=seq2seq(enc,dec,device).to(device)

In [None]:
model

seq2seq(
  (encoder): encoder(
    (embed): Embedding(7855, 256)
    (rnn): GRU(256, 512)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): decoder(
    (embed): Embedding(5893, 256)
    (rnn): GRU(768, 512)
    (lin1): Linear(in_features=1280, out_features=5893, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [None]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.normal_(param.data, mean=0, std=0.01)
        
model.apply(init_weights)

seq2seq(
  (encoder): encoder(
    (embed): Embedding(7855, 256)
    (rnn): GRU(256, 512)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): decoder(
    (embed): Embedding(5893, 256)
    (rnn): GRU(768, 512)
    (lin1): Linear(in_features=1280, out_features=5893, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 14,220,293 trainable parameters


In [None]:
optim_fn=torch.optim.Adam(model.parameters())

In [None]:

TRG_PAD_IDX = target.vocab.stoi[target.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [None]:
def train(model,train_iterator,optim,criterion,clip):
  epoch_loss=0
  model.train()
  for i,batch in enumerate(train_iterator):
    optim.zero_grad()
    src=batch.src
    trg=batch.trg
    pred=model(src,trg)
    pred_dim=pred.shape[-1]
    pred=pred[1:].view(-1,pred_dim)
    trg=trg[1:].view(-1)
    loss=criterion(pred,trg)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(),clip)
    optim.step()
    
    epoch_loss+=loss.item()
  return epoch_loss/len(train_iterator)


In [None]:
def evaluate(model,iterator,loss_fn):
  model.eval()
  epoch_loss=0
  with torch.no_grad():
    for i,batch in enumerate(iterator):
      src=batch.src
      trg=batch.trg
      pred=model(src,trg,0)
      pred_dim=pred.shape[-1]
      trg=trg[1:].view(-1)
      pred = pred[1:].view(-1, pred_dim)
      loss=loss_fn(pred,trg)
      epoch_loss+=loss.item()
  return epoch_loss/len(iterator)

In [None]:
import time
import math

In [None]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
epochs=10
clip=1
best_valid_loss = float('inf')
for epoch in range(0,epochs):
  start_time = time.time()
  train_loss = train(model,train_iterator,optim_fn,criterion,clip)
  valid_loss=evaluate(model,valid_iterator,criterion)
  end_time =time.time()
  epoch_mins, epoch_secs = epoch_time(start_time, end_time)
  if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'gru_getoen-model.pt')
    
    
  print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
  print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
  print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')


Epoch: 01 | Time: 0m 35s
	Train Loss: 4.822 | Train PPL: 124.238
	 Val. Loss: 5.136 |  Val. PPL: 169.994
Epoch: 02 | Time: 0m 35s
	Train Loss: 3.993 | Train PPL:  54.236
	 Val. Loss: 4.449 |  Val. PPL:  85.580
Epoch: 03 | Time: 0m 35s
	Train Loss: 3.431 | Train PPL:  30.894
	 Val. Loss: 3.958 |  Val. PPL:  52.361
Epoch: 04 | Time: 0m 35s
	Train Loss: 2.978 | Train PPL:  19.657
	 Val. Loss: 3.709 |  Val. PPL:  40.816
Epoch: 05 | Time: 0m 35s
	Train Loss: 2.646 | Train PPL:  14.092
	 Val. Loss: 3.592 |  Val. PPL:  36.289
Epoch: 06 | Time: 0m 35s
	Train Loss: 2.349 | Train PPL:  10.480
	 Val. Loss: 3.547 |  Val. PPL:  34.716
Epoch: 07 | Time: 0m 35s
	Train Loss: 2.111 | Train PPL:   8.254
	 Val. Loss: 3.566 |  Val. PPL:  35.362
Epoch: 08 | Time: 0m 35s
	Train Loss: 1.910 | Train PPL:   6.755
	 Val. Loss: 3.572 |  Val. PPL:  35.581
Epoch: 09 | Time: 0m 35s
	Train Loss: 1.763 | Train PPL:   5.828
	 Val. Loss: 3.612 |  Val. PPL:  37.042
Epoch: 10 | Time: 0m 35s
	Train Loss: 1.628 | Train PPL

In [None]:
model.load_state_dict(torch.load('gru_getoen-model.pt'))

test_loss = evaluate(model, test_iterator, criterion)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

| Test Loss: 3.523 | Test PPL:  33.884 |


In [None]:
def translate_sentence(model, sentence, german, english, device, max_length=50):
    # print(sentence)

    # sys.exit()

    # Load german tokenizer
    spacy_ger = spacy.load("de_core_news_sm")

    # Create tokens using spacy and everything in lower case (which is what our vocab is)
    if type(sentence) == str:
        tokens = [token.text.lower() for token in spacy_ger(sentence)]
    else:
        tokens = [token.lower() for token in sentence]

    # print(tokens)

    # sys.exit()
    # Add <SOS> and <EOS> in beginning and end respectively
    tokens.insert(0, source.init_token)
    tokens.append(source.eos_token)

    # Go through each german token and convert to an index
    text_to_indices = [source.vocab.stoi[token] for token in tokens]

    # Convert to Tensor
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    # Build encoder hidden, cell state
    with torch.no_grad():
        hidden = model.encoder(sentence_tensor)

    outputs = [target.vocab.stoi["<sos>"]]
    context=hidden

    for _ in range(max_length):
        previous_word = torch.LongTensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hidden = model.decoder(previous_word, hidden, context)
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        # Model predicts it's the end of the sentence
        if output.argmax(1).item() == target.vocab.stoi["<eos>"]:
            break

    translated_sentence = [target.vocab.itos[idx] for idx in outputs]

    # remove start token
    return " " .join(translated_sentence[1:])

In [None]:
for i in range(0,20):
  t=vars(test.examples[i])
  t1=t["src"][::-1]

  t2=" " .join(t["src"])
  print("german sentence:",t2)
  target1 =" ".join(t["trg"])
  print("ground truth:",target1)
  translated_sentence = translate_sentence(
        model,t2, source, target, device, max_length=50
    )
  print("tranlated sentence:",translated_sentence[1:])
  print("\n\n")


german sentence: ein mann mit einem orangefarbenen hut , der etwas anstarrt .
ground truth: a man in an orange hat starring at something .
tranlated sentence:  man in a orange hat is something something . <eos>



german sentence: ein boston terrier läuft über saftig-grünes gras vor einem weißen zaun .
ground truth: a boston terrier is running on lush green grass in front of a white fence .
tranlated sentence:  german shepherd dog runs along the grass next to a white fence . <eos>



german sentence: ein mädchen in einem karateanzug bricht ein brett mit einem tritt .
ground truth: a girl in karate uniform breaking a stick with a front kick .
tranlated sentence:  girl in a costume is striking a kick a a . <eos>



german sentence: fünf leute in winterjacken und mit helmen stehen im schnee mit schneemobilen im hintergrund .
ground truth: five people wearing winter jackets and helmets stand in the snow , with snowmobiles in the background .
tranlated sentence: ive people in winter gear an

In [None]:
len(source.vocab)

7855