In [1]:
pip install torchtext==0.16.0

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install portalocker==2.8.2

Note: you may need to restart the kernel to use updated packages.


In [3]:
!python -m spacy download de_core_news_sm

Collecting de-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.7.0/de_core_news_sm-3.7.0-py3-none-any.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('de_core_news_sm')


In [4]:
import torch
import torchtext

from torchtext.vocab import build_vocab_from_iterator



import spacy

en_nlp = spacy.load('en_core_web_sm')
de_nlp = spacy.load('de_core_news_sm')

from torchtext.vocab import build_vocab_from_iterator
import torchtext.transforms as T

from torch.utils.data import DataLoader

import numpy as np

In [5]:
data = torchtext.datasets.Multi30k(split = 'train')
test_data = torchtext.datasets.Multi30k(split = 'valid')

In [6]:
train_input = []
train_target = []

for data_idx,data in enumerate(data):
  target = ' '.join(['sos',data[1],'eos'])
  train_input.append(data[0])
  train_target.append(target)

In [7]:
test_input = []
test_target = []

for data_idx,data in enumerate(test_data):
  target = ' '.join(['sos',data[1],'eos'])
  test_input.append(data[0])
  test_target.append(target)

In [8]:
class Text_Tokenization:
  def __init__(self,nlp):
    self.nlp = nlp

  def _spacy_tokenizer(self,text):
    return [token.text for token in self.nlp(text)]

  def _yield_tokens(self,doc):
    for text in doc:
      text = self._spacy_tokenizer(text)
      yield text

  def _vocab(self,data):
    vocab = build_vocab_from_iterator(
        self._yield_tokens(data),
        specials = ['<pad>','<unk>']
    )
    vocab.set_default_index(vocab['<unk>'])
    self.vocab = vocab


  def tokenize(self,doc,maxlen,vocab = None):
    if vocab == None:
      self._vocab(doc)

    transforms = T.Sequential(
        T.VocabTransform(self.vocab),
        T.Truncate(max_seq_len = maxlen),
        T.ToTensor(padding_value = 0),
        T.PadTransform(max_length = maxlen,pad_value = 0)
    )

    output = np.array([transforms(self._spacy_tokenizer(text)) for text in doc])
    return output

In [9]:
def decoder_inputs_targets(sentences):
  decoder_inputs = []
  decoder_targets = [s[1:] for s in sentences]

  for sentence in sentences:
    sentence = [s for s in sentence if s != 0][:-1]
    while len(sentence) != len(decoder_targets[0]): sentence.append(0)
    decoder_inputs.append(np.array(sentence))

  return np.array(decoder_inputs),np.array(decoder_targets)

In [10]:
source_text_tokenizer = Text_Tokenization(de_nlp)
target_text_tokenizer = Text_Tokenization(en_nlp)

train_input_tokenized = source_text_tokenizer.tokenize(train_input[:3000],maxlen = 40)
train_target_tokenized = target_text_tokenizer.tokenize(train_target[:3000],maxlen = 41)

source_text_vocab = source_text_tokenizer.vocab
source_text_inverse_vocab = {value:key for key,value in source_text_vocab.get_stoi().items()}

target_text_vocab = target_text_tokenizer.vocab
target_text_inverse_vocab = {value:key for key,value in target_text_vocab.get_stoi().items()}


test_input_tokenized = source_text_tokenizer.tokenize(test_input[:100],maxlen = 40,vocab = source_text_vocab)
test_target_tokenized = target_text_tokenizer.tokenize(test_target[:100],maxlen = 41,vocab = target_text_vocab)

In [11]:
encoder_inputs = train_input_tokenized
decoder_inputs,decoder_targets = decoder_inputs_targets(train_target_tokenized)

encoder_inputs_test = test_input_tokenized
decoder_inputs_test,decoder_targets_test = decoder_inputs_targets(test_target_tokenized)

print(f'Encoder input: {encoder_inputs[0]}')
print(f'Decoder input: {decoder_inputs[0]}')
print(f'Decoder target: {decoder_targets[0]}')


print(f'\nEncoder input reconstruction: {" ".join([source_text_inverse_vocab[token] for token in encoder_inputs[0] if token != 0 ])} ')
print(f'Decoder input reconstruction: {" ".join([target_text_inverse_vocab[token] for token in decoder_inputs[0] if token != 0 ])} ')
print(f'Encoder reconstruction: {" ".join([target_text_inverse_vocab[token] for token in decoder_targets[0] if token != 0 ])} ')

print(f'\n\n\nTest Encoder input: {encoder_inputs_test[0]}')
print(f'Test Decoder input: {decoder_inputs_test[0]}')
print(f'Test Decoder target: {decoder_targets_test[0]}')

print(f'\nEncoder input reconstruction: {" ".join([source_text_inverse_vocab[token] for token in encoder_inputs_test[0] if token != 0 ])} ')
print(f'Decoder input reconstruction: {" ".join([target_text_inverse_vocab[token] for token in decoder_inputs_test[0] if token != 0 ])} ')
print(f'Encoder reconstruction: {" ".join([target_text_inverse_vocab[token] for token in decoder_targets_test[0] if token != 0 ])} ')

Encoder input: [  20  103  408   32   88   19   76    6   12   73 4414 1966    2    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0]
Decoder input: [   4   18   25   15 1215  625   16   58   67  322  797    5    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0]
Decoder target: [  18   25   15 1215  625   16   58   67  322  797    5    3    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0]

Encoder input reconstruction: Zwei junge weiße Männer sind im Freien in der Nähe vieler Büsche . 
Decoder input reconstruction: sos Two young , White males are outside near many bushes . 
Encoder reconstruction: Two young , White males are outside near many bushes . eos 



Test Encoder input: [  14   43   31  351 4068    1    9   17 2607 

In [12]:
class Positional_Encoding(torch.nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self,X):
    self.batch_size,self.sequence_length,self.embedding_size = X.shape

    even_i = torch.arange(0,self.embedding_size,2).float()
    odd_i = torch.arange(1,self.embedding_size,2).float()

    dominator = torch.pow(10000,even_i / self.embedding_size)

    position = torch.arange(self.sequence_length,dtype = torch.float).reshape(self.sequence_length,1)

    even_PE = torch.sin(position / dominator)
    odd_PE = torch.cos(position / dominator)

    stacked = torch.stack([even_PE,odd_PE],dim = 2)

    PE = torch.flatten(stacked,start_dim = 1,end_dim = 2)

    return PE

In [13]:
class Multi_Head_Self_Attention(torch.nn.Module):
  def __init__(self,embedding_size,num_heads,Mask = False):
    super().__init__()
    self.embedding_size = embedding_size
    self.num_heads = num_heads
    self.head_dim = embedding_size // num_heads
    self.Mask = Mask

    self.qkv_layer = torch.nn.Linear(embedding_size, 3 * embedding_size)
    self.linear_layer = torch.nn.Linear(embedding_size,embedding_size)

  def forward(self,X):
    import math

    self.batch_size,self.sequence_length,self.embedding_size = X.shape

    #print(f'Input X: {X.shape}')

    qkv = self.qkv_layer(X)
    #print(f'QKV: {qkv.shape}')

    qkv = qkv.reshape(self.batch_size,self.num_heads,self.sequence_length,3 * self.head_dim)
    #print(f'QKV: {qkv.shape}')

    q,k,v = qkv.chunk(3,dim = -1)
    #print(f'\nQuery: {q.shape}')
    #print(f'Key: {k.shape}')
    #print(f'Value: {v.shape}')

    scaled = torch.matmul(q,k.transpose(-2,-1)) / math.sqrt(q.size(-1))
    #print(f'Scaled: {scaled.shape}')

    mask = torch.full(scaled.size(),float('-inf'))
    mask = torch.triu(mask,diagonal = 1)

    attention = torch.softmax(scaled,-1) if self.Mask == False else torch.softmax(scaled + mask,-1)
    #print(f'Attention: {attention.shape}')

    output = torch.matmul(attention,v).reshape(self.batch_size,self.sequence_length,self.embedding_size)
    #print(f'Output: {output.shape}')

    output = self.linear_layer(output)

    return output



In [14]:
class Norm(torch.nn.Module):
  def __init__(self,embedding_size):
    super().__init__()
    self.embedding_size = embedding_size

  def forward(self,X):
    self.batch_size,self.sequence_length,self.embedding_size = X.shape

    parameter_shape = X.shape[-2:]
    #print(f'Parameter shape: {parameter_shape}\n')

    gamma = torch.nn.Parameter(torch.ones(parameter_shape))
    beta = torch.nn.Parameter(torch.zeros(parameter_shape))

    dims = [- ( i + 1) for i in range(len(parameter_shape))]

    mean = X.mean(dim = dims,keepdim = True)
    #print(f'Mean: {mean}')

    var = ((X - mean) ** 2).mean(dim = dims, keepdim = True)
    #print(f'Var: {var}')

    epsilon = 1e-5
    std = (var + epsilon) ** (1/2)
    #print(f'STD: {std}\n')

    y = (X - mean) / std

    output = gamma * y + beta
    #print(f'Output: {output.shape}\n')

    return output

In [15]:
class Feed_Forward(torch.nn.Module):
  def __init__(self,embedding_size,hidden_size,p = 0.5):
    super().__init__()

    self.linear_1 = torch.nn.Linear(embedding_size,hidden_size)
    self.linear_2 = torch.nn.Linear(hidden_size,embedding_size)

    self.relu = torch.nn.ReLU()
    self.dropout = torch.nn.Dropout(p)

  def forward(self,X):
    #print(f'Input X: {X.shape}')

    X = self.linear_1(X)
    X = self.relu(X)
    X = self.dropout(X)
    #print(f'X after linear_1: {X.shape}')

    output = self.linear_2(X)
    #print(f'Output: {output.shape}\n')

    return output

In [16]:
class Cross_Multi_Head_Attention(torch.nn.Module):
  def __init__(self,num_heads,embedding_size):
    super().__init__()
    self.embedding_size = embedding_size
    self.num_heads = num_heads
    self.head_dim = embedding_size // num_heads

    self.q_layer = torch.nn.Linear(embedding_size,embedding_size)
    self.kv_layer = torch.nn.Linear(embedding_size, 2 * embedding_size)
    self.linear_layers = torch.nn.Linear(embedding_size,embedding_size)

    self.linear_layer = torch.nn.Linear(embedding_size,embedding_size)

  def forward(self,x,y,Mask = False):
    import math
    batch_size,sequence_length,embedding_size = x.size()
    #print(f'Batch size: {batch_size}')
    #print(f'Sequence length: {sequence_length}')
    #print(f'Embedding size: {embedding_size}\n')

    q = self.q_layer(x)
    kv = self.kv_layer(x)

    #print(f'Query: {q.shape}')
    #print(f'Key-Value: {kv.shape}\n')

    k,v = kv.chunk(2,-1)
    #print(f'Key: {k.shape}')
    #print(f'Value: {v.shape}\n')

    q = q.reshape(batch_size,self.num_heads,sequence_length,self.head_dim)
    k = k.reshape(batch_size,self.num_heads,sequence_length,self.head_dim)
    v = v.reshape(batch_size,self.num_heads,sequence_length,self.head_dim)

    #print(f'Query: {q.shape}')
    #print(f'Key: {k.shape}')
    #print(f'Value: {v.shape}\n')

    scaled = torch.matmul(q,k.transpose(-2,-1)) / math.sqrt(q.size(-1))
    #print(f'Scaled: {scaled.shape}')

    mask = torch.full(scaled.size(),fill_value = float('-inf'))
    mask = torch.triu(mask,diagonal = 1)
    #print(f'Mask: {mask.shape}')

    attention = torch.softmax(scaled,-1) if Mask == False else torch.softmax(scaled + mask , -1)
    #print(f'Attention: {attention.shape}')

    output = torch.matmul(attention,v).reshape(batch_size,sequence_length,embedding_size)
    output = self.linear_layer(output)
    #print(f'Output: {output.shape}')

    return output

In [17]:
class Encoder_layer(torch.nn.Module):
  def __init__(self,num_heads,embedding_size,hidden_size,source_vocab_size,p = 0.5):
    super().__init__()
    self.embedding = torch.nn.Embedding(source_vocab_size,embedding_size,padding_idx = 0)
    self.position_encoder = Positional_Encoding()
    self.layer_norm = Norm(embedding_size)
    self.multi_head_self_attention = Multi_Head_Self_Attention(embedding_size,num_heads)
    self.feed_forward = Feed_Forward(embedding_size,hidden_size,p)


  def forward(self,X):
    #print(f'Input X: {X}')

    x = self.embedding(X)
    #print(f'X: {X.shape}')
    x = x + self.position_encoder(x)
    x_ = x
    #print(f'X: {X.shape}')

    x = self.multi_head_self_attention(x) + x_
    x = self.layer_norm(x)
    x_ = x
    #print(f'X: {X.shape}')

    x = self.feed_forward(x) + x_
    x = self.layer_norm(x)
    #print(f'X: {X.shape}\n')

    return x

In [18]:
class Encoder(torch.nn.Module):
  def __init__(self,num_heads,embedding_size,hidden_size,source_vocab_size,num_layers = 1,p = 0.5):
    super().__init__()
    self.layers = torch.nn.Sequential(*[Encoder_layer(num_heads,embedding_size,hidden_size,source_vocab_size,p = p) for _ in range(num_layers)])

  def forward(self,X):
    X = self.layers(X)
    return X

In [19]:
class Decoder_layer(torch.nn.Module):
  def __init__(self,num_heads,embedding_size,hidden_size,target_vocab_size,p = 0.5):
    super().__init__()
    self.embedding = torch.nn.Embedding(target_vocab_size,embedding_size,padding_idx = 0)
    self.position_encoder = Positional_Encoding()
    self.layer_norm = Norm(embedding_size)
    self.decoder_multi_head_self_attention = Multi_Head_Self_Attention(embedding_size,num_heads,Mask = True)
    self.feed_forward = Feed_Forward(embedding_size,hidden_size,p)
    self.cross_multi_head_self_attention = Cross_Multi_Head_Attention(num_heads,embedding_size)

    self.linear = torch.nn.Linear(embedding_size,target_vocab_size)

  def forward(self,source_text,decoder_input):
    #print(f'Decoder input: {decoder_input}')

    X = decoder_input
    X = self.embedding(X)

    X = X + self.position_encoder(X)
    X_ = X
    #print(f'X: {X.shape}')

    X = self.decoder_multi_head_self_attention(X)#,Mask = True)
    X = self.layer_norm(X + X_)
    X_ = X
    #print(f'X: {X.shape}')

    X = self.cross_multi_head_self_attention(source_text,decoder_input,Mask = True)
    X = self.layer_norm(X + X_)
    X_ = X
    #print(f'X: {X.shape}')

    X = self.feed_forward(X)
    X = self.layer_norm(X + X_)
    #print(f'X: {X.shape}')

    output = self.linear(X)
    #print(f'output: {output.shape}')

    return output


In [20]:
class Sequential_Decoder(torch.nn.Sequential):
  def forward(self,*inputs):
    x,y = inputs
    for module in self._modules.values():
      y = module(x,y)
    return y

In [21]:
class Decoder(torch.nn.Module):
  def __init__(self,num_heads,embedding_size,hidden_size,target_vocab_size,p = 0.5):
    super().__init__()

    self.layers = Sequential_Decoder(*[Decoder_layer(num_heads,embedding_size,hidden_size,target_vocab_size,p)
                                        for _ in range(num_layers)])

  def forward(self,x,y):
    output = self.layers(x,y)

    return output

In [22]:
class Transformer(torch.nn.Module):
  def __init__(self,encoder,decoder):
    super().__init__()
    self.encoder = encoder
    self.decoder = decoder

  def forward(self,source,target):
    #print(f'Source: {source.shape}')
    #print(f'Target: {target.shape}\n')

    encoder_output = self.encoder(source)
    output = self.decoder(encoder_output,target)

    #print(f'Encoder output: {encoder_output.shape}')
    #print(f'Decoder output: {output.shape}\n')

    return output

In [23]:
batch_size = 32
batched_encoder_inputs = DataLoader(encoder_inputs,batch_size)
batched_decoder_inputs = DataLoader(decoder_inputs,batch_size)
batched_decoder_targets = DataLoader(decoder_targets,batch_size)

batched_encoder_inputs_test = DataLoader(encoder_inputs_test,batch_size)
batched_decoder_inputs_test = DataLoader(decoder_inputs_test,batch_size)
batched_decoder_targets_test = DataLoader(decoder_targets_test,batch_size)

train_data_batched = DataLoader(list(zip(list(zip(encoder_inputs,decoder_inputs)),decoder_targets)),batch_size,drop_last = True)
test_data_batched = DataLoader(list(zip(list(zip(encoder_inputs_test,decoder_inputs_test)),decoder_targets_test)),batch_size,drop_last = True)

X,y = next(iter(train_data_batched))

X[0].shape,X[1].shape,y.shape

(torch.Size([32, 40]), torch.Size([32, 40]), torch.Size([32, 40]))

In [24]:
batch_size = X[0].shape[0]
num_heads = 8
sequence_length = X[0].shape[1]
embedding_size = 512
hidden_size = 128
num_layers = 1
p = 0.2
source_vocab_size = len(source_text_inverse_vocab) + 1
target_vocab_size = len(target_text_inverse_vocab) + 1

encoder = Encoder(num_heads,embedding_size,hidden_size,source_vocab_size,num_layers,p)
decoder = Decoder(num_heads,embedding_size,hidden_size,target_vocab_size,p = 0.5)

model = Transformer(encoder,decoder)

In [25]:
class Train_Transformer:
  def __init__(self,model,loss_function,optimizer,epochs):
    self.model = model
    self.loss_function = loss_function
    self.optimizer = optimizer
    self.epochs = epochs

  def _accuracy(self,target,prediction):
    from sklearn.metrics import accuracy_score as accuracy
    acc = 0
    target = np.array(target).reshape(-1)
    prediction = np.array(prediction).reshape(-1)

    valid_idx = np.argwhere(target != 0)

    target = target[valid_idx].reshape(-1)
    prediction = prediction[valid_idx].reshape(-1)
    #print(f'Target: {target[:10]}')
    #print(f'Prediction: {prediction[:10]}')

    acc = accuracy(target,prediction)
    return acc

  def fit(self,train_data_batched):
    from sklearn.metrics import accuracy_score as accuracy
    from tqdm.auto import tqdm
    self.model.train()

    for epoch in tqdm(range(self.epochs)):

      train_batch_loss = 0
      train_batch_acc = 0

      for batch,(X,y) in tqdm(enumerate(train_data_batched)):
        #print(f'X[0]: {X[0].shape}')
        #print(f'X[1]: {X[1].shape}\n')
        #print(f'y: {y.shape}')
        train_prediction = model(X[0],X[1])
        train_labels = train_prediction.argmax(-1)
        #print(f'Train prediction: {train_prediction.shape}')
        #print(f'Train labels: {train_labels.shape}\n')

        train_loss = self.loss_function(train_prediction.permute(0,2,1),y)
        train_acc = self._accuracy(y,train_labels)
        #print(f'Train Loss: {train_loss}\n')
        #print(f'Train accuracy: {train_acc}')

        train_batch_loss += train_loss
        train_batch_acc += train_acc
        #print(train_batch_acc)

        self.optimizer.zero_grad()
        train_loss.backward()
        self.optimizer.step()

      train_batch_loss /= len(train_data_batched)
      train_batch_acc /= len(train_data_batched)

      print(f'Epoch: {epoch} | Train Loss: {train_batch_loss} | Train Accuracy: {train_batch_acc}\n')


  def eval(self,test_data_batched):
    from sklearn.metrics import accuracy_score as accuracy
    from tqdm.auto import tqdm
    self.model.eval()

    test_batch_loss = 0
    test_batch_acc = 0

    for batch,(X,y) in tqdm(enumerate(test_data_batched)):
      test_prediction = model(X[0],X[1])
      test_labels = test_prediction.argmax(-1)

      test_loss = self.loss_function(test_prediction.permute(0,2,1),y)
      test_acc = self._accuracy(y,test_labels)

      test_batch_loss += test_loss
      test_batch_acc += test_acc

    test_batch_loss /= len(test_data_batched)
    test_batch_acc /= len(test_data_batched)

    print(f'Test Loss: {test_batch_loss} | Test Accuracy: {test_batch_acc}')

In [26]:
loss_function = torch.nn.CrossEntropyLoss(ignore_index = 0)
optimizer = torch.optim.Adam(model.parameters(),lr = 0.0001)
epochs = 40

Trainer = Train_Transformer(model,loss_function,optimizer,epochs)

Trainer.fit(train_data_batched)
Trainer.eval(test_data_batched)

  0%|          | 0/40 [00:00<?, ?it/s]

0it [00:00, ?it/s]

Epoch: 0 | Train Loss: 5.604759216308594 | Train Accuracy: 0.23752975775470003



0it [00:00, ?it/s]

Epoch: 1 | Train Loss: 4.22092342376709 | Train Accuracy: 0.36015609300862433



0it [00:00, ?it/s]

Epoch: 2 | Train Loss: 3.786346197128296 | Train Accuracy: 0.4035843956904797



0it [00:00, ?it/s]

Epoch: 3 | Train Loss: 3.4657182693481445 | Train Accuracy: 0.4329196667754265



0it [00:00, ?it/s]

Epoch: 4 | Train Loss: 3.198378086090088 | Train Accuracy: 0.45640480196419886



0it [00:00, ?it/s]

Epoch: 5 | Train Loss: 2.9563486576080322 | Train Accuracy: 0.4791078232528712



0it [00:00, ?it/s]

Epoch: 6 | Train Loss: 2.729905605316162 | Train Accuracy: 0.5017962206061312



0it [00:00, ?it/s]

Epoch: 7 | Train Loss: 2.512934684753418 | Train Accuracy: 0.5263943368698674



0it [00:00, ?it/s]

Epoch: 8 | Train Loss: 2.304382085800171 | Train Accuracy: 0.552873861448674



0it [00:00, ?it/s]

Epoch: 9 | Train Loss: 2.1015989780426025 | Train Accuracy: 0.5838027952526694



0it [00:00, ?it/s]

Epoch: 10 | Train Loss: 1.91461980342865 | Train Accuracy: 0.6158535100419978



0it [00:00, ?it/s]

Epoch: 11 | Train Loss: 1.737560510635376 | Train Accuracy: 0.650094415046375



0it [00:00, ?it/s]

Epoch: 12 | Train Loss: 1.5715497732162476 | Train Accuracy: 0.6825992909890614



0it [00:00, ?it/s]

Epoch: 13 | Train Loss: 1.4179068803787231 | Train Accuracy: 0.716919734630745



0it [00:00, ?it/s]

Epoch: 14 | Train Loss: 1.2808648347854614 | Train Accuracy: 0.7473387144571364



0it [00:00, ?it/s]

Epoch: 15 | Train Loss: 1.14359450340271 | Train Accuracy: 0.77881874239862



0it [00:00, ?it/s]

Epoch: 16 | Train Loss: 1.0167862176895142 | Train Accuracy: 0.8067800951475481



0it [00:00, ?it/s]

Epoch: 17 | Train Loss: 0.9109020233154297 | Train Accuracy: 0.8311054789313183



0it [00:00, ?it/s]

Epoch: 18 | Train Loss: 0.8182081580162048 | Train Accuracy: 0.8508144264670335



0it [00:00, ?it/s]

Epoch: 19 | Train Loss: 0.7315337657928467 | Train Accuracy: 0.8735979189869905



0it [00:00, ?it/s]

Epoch: 20 | Train Loss: 0.654787003993988 | Train Accuracy: 0.8901577836266032



0it [00:00, ?it/s]

Epoch: 21 | Train Loss: 0.5861280560493469 | Train Accuracy: 0.9046871006242851



0it [00:00, ?it/s]

Epoch: 22 | Train Loss: 0.5303472280502319 | Train Accuracy: 0.9163055125319558



0it [00:00, ?it/s]

Epoch: 23 | Train Loss: 0.4814847707748413 | Train Accuracy: 0.9257751699522035



0it [00:00, ?it/s]

Epoch: 24 | Train Loss: 0.4347056746482849 | Train Accuracy: 0.9345702698480745



0it [00:00, ?it/s]

Epoch: 25 | Train Loss: 0.39222419261932373 | Train Accuracy: 0.9414536957728781



0it [00:00, ?it/s]

Epoch: 26 | Train Loss: 0.357089102268219 | Train Accuracy: 0.9477930099477319



0it [00:00, ?it/s]

Epoch: 27 | Train Loss: 0.31915730237960815 | Train Accuracy: 0.9540700818936172



0it [00:00, ?it/s]

Epoch: 28 | Train Loss: 0.2825717628002167 | Train Accuracy: 0.9601209122072093



0it [00:00, ?it/s]

Epoch: 29 | Train Loss: 0.25240060687065125 | Train Accuracy: 0.9646411236185208



0it [00:00, ?it/s]

Epoch: 30 | Train Loss: 0.2271755188703537 | Train Accuracy: 0.9678987701879838



0it [00:00, ?it/s]

Epoch: 31 | Train Loss: 0.2069457322359085 | Train Accuracy: 0.9710788327135155



0it [00:00, ?it/s]

Epoch: 32 | Train Loss: 0.1887776106595993 | Train Accuracy: 0.9735708794767683



0it [00:00, ?it/s]

Epoch: 33 | Train Loss: 0.17439021170139313 | Train Accuracy: 0.9749232066627785



0it [00:00, ?it/s]

Epoch: 34 | Train Loss: 0.16170406341552734 | Train Accuracy: 0.9762016965522504



0it [00:00, ?it/s]

Epoch: 35 | Train Loss: 0.1505552977323532 | Train Accuracy: 0.9771208154671593



0it [00:00, ?it/s]

Epoch: 36 | Train Loss: 0.14045177400112152 | Train Accuracy: 0.9779780479696069



0it [00:00, ?it/s]

Epoch: 37 | Train Loss: 0.132285013794899 | Train Accuracy: 0.9791236444893197



0it [00:00, ?it/s]

Epoch: 38 | Train Loss: 0.12562741339206696 | Train Accuracy: 0.9799254092245296



0it [00:00, ?it/s]

Epoch: 39 | Train Loss: 0.12014683336019516 | Train Accuracy: 0.9801334807929655



0it [00:00, ?it/s]

Test Loss: 4.706274032592773 | Test Accuracy: 0.41603475399771694


In [27]:
def translate(model,text,source_text_tokenizer,source_text_vocab,target_text_inverse_vocab,max_translation_len = 30):
  print(f'Input text: {text}\n')

  text = torch.tensor(source_text_tokenizer.tokenize([text],maxlen = 40,vocab = source_text_vocab))
  #print(f'Tokenized text: {text}')

  encoder = model.encoder
  decoder = model.decoder

  encoder_output = encoder(text)
  #print(f'Encoder output: {encoder_output.shape}')

  token = torch.tensor([[4]])

  translated_tokens = []

  for i in range(max_translation_len):
    #print(f'Token: {token}')
    prediction = decoder(encoder_output,token)
    next_token = prediction[0,i,:].argmax()

    #print(f'Prediction: {prediction.shape}')
    #print(f'Next Token: {next_token}\n')

    if next_token == 3: break
    translated_tokens.append(next_token)
    token = torch.tensor([[next_token]])

  translated_text = ' '.join(target_text_inverse_vocab[token.item()] for token in translated_tokens)
  print(f'Translated text: {translated_text}')

  return translated_text


In [28]:
translate(model,train_input[7],source_text_tokenizer,source_text_vocab,target_text_inverse_vocab,max_translation_len = 30)

Input text: Ein schickes Mädchen spricht mit dem Handy während sie langsam die Straße entlangschwebt.

Translated text: A trendy girl a green her their communication gliding slowly uniforms cross street street street water fountain red short fence posts a wooden boat grass water fountain red short fence


'A trendy girl a green her their communication gliding slowly uniforms cross street street street water fountain red short fence posts a wooden boat grass water fountain red short fence'

In [29]:
train_target[7]

'sos A trendy girl talking on her cellphone while gliding slowly down the street. eos'

In [30]:
translate(model,test_input[44],source_text_tokenizer,source_text_vocab,target_text_inverse_vocab,max_translation_len = 30)

Input text: Ein Mann fährt ein altmodisches rotes Rennauto.

Translated text: A man drives military k-9 his track wood panelling street water fountain their swimsuits red steps several fake pride water fountains water fountain flowers water fountain colorful Christmas colorful Christmas


'A man drives military k-9 his track wood panelling street water fountain their swimsuits red steps several fake pride water fountains water fountain flowers water fountain colorful Christmas colorful Christmas'

In [31]:
test_target[44]

'sos A man drives an old-fashioned red race car. eos'