## Домашнее задание 10

Разобраться с моделькой перевода как она устроена, запустить для перевода с русского на английский (при желании можно взять другие пары языков) два варианта с вниманием и без внимания. Оценить качество насколько корректно переводит (для теста отобрать примеры с увеличением длины текста) (так как оценка визуальная достаточно 20-ти примеров в тестовой выборке).

## Решение

Импортируем необходимые библиотеки.

In [1]:
import tensorflow as tf

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.model_selection import train_test_split

import unicodedata
import re
import numpy as np
import os
import io
import time

Для работы будем использовать польско-английский датасет. Загрузим и распакуем данные.

In [2]:
!wget http://www.manythings.org/anki/pol-eng.zip

!rm -rf pol-eng
!mkdir pol-eng
!unzip pol-eng.zip -d pol-eng/

--2022-09-17 13:33:16--  http://www.manythings.org/anki/pol-eng.zip
Resolving www.manythings.org (www.manythings.org)... 173.254.30.110
Connecting to www.manythings.org (www.manythings.org)|173.254.30.110|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1843409 (1.8M) [application/zip]
Saving to: ‘pol-eng.zip’


2022-09-17 13:33:16 (7.04 MB/s) - ‘pol-eng.zip’ saved [1843409/1843409]

Archive:  pol-eng.zip
  inflating: pol-eng/pol.txt         
  inflating: pol-eng/_about.txt      


In [3]:
!ls /kaggle/working/pol-eng/ -lah

total 6.4M
drwxr-xr-x 2 root root 4.0K Sep 17 13:33 .
drwxr-xr-x 4 root root 4.0K Sep 17 13:33 ..
-rw-r--r-- 1 root root 1.5K Sep  6 03:10 _about.txt
-rw-r--r-- 1 root root 6.4M Sep  6 03:10 pol.txt


Создадим процедуру для предобработки текста на основе представленной на лекции.

In [4]:
path_to_file = "/kaggle/working/pol-eng/pol.txt"

def preprocess_sentence(w):
  w = w.lower().strip()
  w = re.sub(r"([?.!,])", r" \1 ", w)
  w = re.sub(r'[" "]+', " ", w)

  # Оставляем только интересующие нас символы
  w = re.sub(r"[^AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpQqRrSsŚśTtUuVvWwXxYyZzŹźŻż?.!,']+", " ", w)

  w = w.strip()

  # Добавляем начальный и конечный токены
  w = '<start> ' + w + ' <end>'
  return w


# Пример предобработки
preprocess_sentence("Kocham Cię!")

'<start> kocham cię ! <end>'

In [5]:
# Создаем датасет распределяя текст по парам

def create_dataset(path, num_examples):
  lines = io.open(path, encoding='UTF-8').read().strip().split('\n')

  word_pairs = [[preprocess_sentence(w) for w in l.split('\t')[:2]]  for l in lines[:num_examples]]

  return zip(*word_pairs)

en, pol = create_dataset(path_to_file, None)
print(en[0])
print(pol[0])

<start> go . <end>
<start> idź . <end>


Дополнительные вспомогательные процедуры для токенизации текста и создания тензоров из датасета.

In [6]:
def tokenize(lang):
  lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
      filters='')
  lang_tokenizer.fit_on_texts(lang)

  tensor = lang_tokenizer.texts_to_sequences(lang)

  tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
                                                         padding='post')

  return tensor, lang_tokenizer


def load_dataset(path, num_examples=None):
  targ_lang, inp_lang = create_dataset(path, num_examples)

  input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
  target_tensor, targ_lang_tokenizer = tokenize(targ_lang)

  return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

In [7]:
# Размер датасета
len(en), len(pol)

(46424, 46424)

Как видим выборка относительно небольшая (по сравнению с русско-английском набором), поэтому не будем никак дополнительно уменьшать ее для целей обучения.

In [8]:
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(path_to_file)

max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]

# Оставляем 20% данных на валидацию
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)

# Размеры выборок
print(f'Трейн выборка - {len(input_tensor_train)} примеров.')
print(f'Валидационная выборка - {len(input_tensor_val)} примеров.')

Трейн выборка - 37139 примеров.
Валидационная выборка - 9285 примеров.


Продолжим подготовку данных для обучения/валидации.

In [22]:
# гиперпараметры
BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor_train)//BATCH_SIZE
embedding_dim = 300
units = 2048
vocab_inp_size = len(inp_lang.word_index)+1
vocab_tar_size = len(targ_lang.word_index)+1

# Разделяем датасет на батчи
dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

Перейдем к созданию частей модели без внимания.

In [23]:
# Создаем энкодер

class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.enc_units,
                                   return_sequences=False,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
#     self.gru1 = tf.keras.layers.GRU(self.enc_units,
#                                    return_sequences=False,
#                                    return_state=True,
#                                    recurrent_initializer='glorot_uniform')
    

  def call(self, x, hidden):
    x = self.embedding(x)
#     x = self.gru(x, initial_state=hidden)
    output, state = self.gru(x, initial_state=hidden)
    return state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_sz, self.enc_units))


# Создаем декодер

class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)

  def call(self, x, hidden):
    # enc_output shape == (batch_size, max_length, hidden_size)

    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
    x = self.embedding(x)

    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)

    # passing the concatenated vector to the GRU
    output, state = self.gru(x, initial_state=hidden)

    # output shape == (batch_size * 1, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))

    # output shape == (batch_size, vocab)
    x = self.fc(output)

    return x, state

In [24]:
# Оптимайзер

optimizer = tf.keras.optimizers.Adam()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

# Лосс

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

In [25]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

checkpoint_dir = './training_nmt_checkpoints'

checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")

checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

In [26]:
@tf.function
def train_step(inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:
    enc_hidden = encoder(inp, enc_hidden)

    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

    # Teacher forcing - feeding the target as the next input
    for t in range(1, targ.shape[1]):
      # passing enc_output to the decoder
      predictions, dec_hidden = decoder(dec_input, dec_hidden)

      loss += loss_function(targ[:, t], predictions)

      # using teacher forcing
      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

In [27]:
EPOCHS = 10

for epoch in range(EPOCHS):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix = checkpoint_prefix)

  print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

2022-09-17 13:35:26.232682: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-09-17 13:35:33.592750: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 1 Batch 0 Loss 1.3804
Epoch 1 Batch 100 Loss 0.7177
Epoch 1 Batch 200 Loss 0.6051
Epoch 1 Batch 300 Loss 0.5186
Epoch 1 Batch 400 Loss 0.5284
Epoch 1 Batch 500 Loss 0.5569
Epoch 1 Loss 0.6201
Time taken for 1 epoch 222.03042006492615 sec

Epoch 2 Batch 0 Loss 0.4312
Epoch 2 Batch 100 Loss 0.3703
Epoch 2 Batch 200 Loss 0.4320
Epoch 2 Batch 300 Loss 0.3594
Epoch 2 Batch 400 Loss 0.4070
Epoch 2 Batch 500 Loss 0.3931
Epoch 2 Loss 0.4125
Time taken for 1 epoch 173.11782789230347 sec

Epoch 3 Batch 0 Loss 0.2995
Epoch 3 Batch 100 Loss 0.3129
Epoch 3 Batch 200 Loss 0.3664
Epoch 3 Batch 300 Loss 0.3320
Epoch 3 Batch 400 Loss 0.3242
Epoch 3 Batch 500 Loss 0.2945
Epoch 3 Loss 0.2871
Time taken for 1 epoch 170.09934258460999 sec

Epoch 4 Batch 0 Loss 0.1786
Epoch 4 Batch 100 Loss 0.1571
Epoch 4 Batch 200 Loss 0.1301
Epoch 4 Batch 300 Loss 0.1791
Epoch 4 Batch 400 Loss 0.1923
Epoch 4 Batch 500 Loss 0.2065
Epoch 4 Loss 0.1838
Time taken for 1 epoch 171.36855840682983 sec

Epoch 5 Batch 0 Loss

Оценим качество построенной модели.

In [28]:
def evaluate_without_att(sentence):

  sentence = preprocess_sentence(sentence)

  inputs = [inp_lang.word_index[i] for i in sentence.split(' ')]
  inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=max_length_inp,
                                                         padding='post')
  inputs = tf.convert_to_tensor(inputs)

  result = ''

  hidden = [tf.zeros((1, units))]
  enc_hidden = encoder(inputs, hidden)

  dec_hidden = enc_hidden
  dec_input = tf.expand_dims([targ_lang.word_index['<start>']], 0)

  for t in range(max_length_targ):
    predictions, dec_hidden = decoder(dec_input, dec_hidden)

    # storing the attention weights to plot later on
    predicted_id = tf.argmax(predictions[0]).numpy()
    result += targ_lang.index_word[predicted_id] + ' '

    if targ_lang.index_word[predicted_id] == '<end>':
      return result, sentence

    # the predicted ID is fed back into the model
    dec_input = tf.expand_dims([predicted_id], 0)

  return result, sentence

In [29]:
def translate(sentence, eval_func):
  result, sentence = eval_func(sentence)

  print('Input: %s' % (sentence))
  print('Predicted translation: {}'.format(result))

In [30]:
# restoring the latest checkpoint in checkpoint_dir
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fe7522dbe90>

In [34]:
def convert(lang, tensor):
    words = []
    for t in tensor:
        if t!=0:
            words.append(lang.index_word[t])
    return " ".join(word for word in words if word not in ['<start>', '<end>'])

In [39]:
for i in range(30):
    translate(convert(inp_lang, input_tensor_val[i]), evaluate_without_att)
    print(f'Target translation: {convert(targ_lang, target_tensor_val[i])}')
    print('--------------------')

Input: <start> moja mama potrafi bardzo dobrze grać w golfa . <end>
Predicted translation: my mom takes a little more time . <end> 
Target translation: my mother can play golf very well .
--------------------
Input: <start> dowiedziałem się , jak rozwiązać problem . <end>
Predicted translation: i found the key , but it was dangerous . <end> 
Target translation: i found out how to solve the problem .
--------------------
Input: <start> to właśnie tego słownika szukałem . <end>
Predicted translation: that's the reason i was doing it . <end> 
Target translation: this is the very dictionary i've been looking for .
--------------------
Input: <start> potrzebuję samochodu . <end>
Predicted translation: i need a car . <end> 
Target translation: i need a car .
--------------------
Input: <start> cieszę się , że cię znalazłem . <end>
Predicted translation: i'm glad to see you again . <end> 
Target translation: i'm glad i've found you .
--------------------
Input: <start> wychodzę rano . <end>
P

Теперь построим модель с вниманием.

In [40]:
# Новый энкодер
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.enc_units,
                                   return_sequences=True, # возвращаем результаты
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

  def call(self, x, hidden):
    x = self.embedding(x)
    output, state = self.gru(x, initial_state = hidden)
    return output, state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_sz, self.enc_units))


# Класс внимания
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    # query hidden state shape == (batch_size, hidden size)
    # query_with_time_axis shape == (batch_size, 1, hidden size)
    # values shape == (batch_size, max_len, hidden size)
    # we are doing this to broadcast addition along the time axis to calculate the score
    query_with_time_axis = tf.expand_dims(query, 1)

    # score shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    # the shape of the tensor before applying self.V is (batch_size, max_length, units)
    score = self.V(tf.nn.tanh(
        self.W1(query_with_time_axis) + self.W2(values)))

    # attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis=1)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights


# Новый декодер

class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)

    # used for attention
    self.attention = BahdanauAttention(self.dec_units)

  def call(self, x, hidden, enc_output):
    # enc_output shape == (batch_size, max_length, hidden_size)
    context_vector, attention_weights = self.attention(hidden, enc_output)

    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
    x = self.embedding(x)

    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # passing the concatenated vector to the GRU
    output, state = self.gru(x)

    # output shape == (batch_size * 1, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))

    # output shape == (batch_size, vocab)
    x = self.fc(output)

    return x, state, attention_weights

In [41]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)
attention_layer = BahdanauAttention(10)
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)


optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')


checkpoint_dir_att = './training_attention_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir_att, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

In [42]:
@tf.function
def train_step(inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:
    enc_output, enc_hidden = encoder(inp, enc_hidden)

    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)

    # Teacher forcing - feeding the target as the next input
    for t in range(1, targ.shape[1]):
      # passing enc_output to the decoder
      predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

      loss += loss_function(targ[:, t], predictions)

      # using teacher forcing
      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

Обучаем модель с вниманием.

In [43]:
EPOCHS = 10

for epoch in range(EPOCHS):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix = checkpoint_prefix)

  print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 1.4638
Epoch 1 Batch 100 Loss 0.7546
Epoch 1 Batch 200 Loss 0.6855
Epoch 1 Batch 300 Loss 0.6622
Epoch 1 Batch 400 Loss 0.6244
Epoch 1 Batch 500 Loss 0.5929
Epoch 1 Loss 0.6676
Time taken for 1 epoch 664.2650091648102 sec

Epoch 2 Batch 0 Loss 0.4891
Epoch 2 Batch 100 Loss 0.5022
Epoch 2 Batch 200 Loss 0.4938
Epoch 2 Batch 300 Loss 0.4925
Epoch 2 Batch 400 Loss 0.4770
Epoch 2 Batch 500 Loss 0.4071
Epoch 2 Loss 0.4470
Time taken for 1 epoch 610.6391882896423 sec

Epoch 3 Batch 0 Loss 0.3210
Epoch 3 Batch 100 Loss 0.2990
Epoch 3 Batch 200 Loss 0.2952
Epoch 3 Batch 300 Loss 0.2576
Epoch 3 Batch 400 Loss 0.3162
Epoch 3 Batch 500 Loss 0.2096
Epoch 3 Loss 0.2888
Time taken for 1 epoch 608.4920086860657 sec

Epoch 4 Batch 0 Loss 0.1822
Epoch 4 Batch 100 Loss 0.2040
Epoch 4 Batch 200 Loss 0.2164
Epoch 4 Batch 300 Loss 0.2012
Epoch 4 Batch 400 Loss 0.1573
Epoch 4 Batch 500 Loss 0.1761
Epoch 4 Loss 0.1878
Time taken for 1 epoch 610.0751712322235 sec

Epoch 5 Batch 0 Loss 0.1

Оценка ошибки для модели с вниманием получилось очень близкой к ошибке без внимания. Сделаем предсказания переводов по аналогии с предыдущей моделью.

In [51]:
def evaluate(sentence):
  attention_plot = np.zeros((max_length_targ, max_length_inp))

  sentence = preprocess_sentence(sentence)

  inputs = [inp_lang.word_index[i] for i in sentence.split(' ')]
  inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=max_length_inp,
                                                         padding='post')
  inputs = tf.convert_to_tensor(inputs)

  result = ''

  hidden = [tf.zeros((1, units))]
  enc_out, enc_hidden = encoder(inputs, hidden)

  dec_hidden = enc_hidden
  dec_input = tf.expand_dims([targ_lang.word_index['<start>']], 0)

  for t in range(max_length_targ):
    predictions, dec_hidden, attention_weights = decoder(dec_input,
                                                         dec_hidden,
                                                         enc_out)

    # storing the attention weights to plot later on
    attention_weights = tf.reshape(attention_weights, (-1, ))
    attention_plot[t] = attention_weights.numpy()

    predicted_id = tf.argmax(predictions[0]).numpy()

    result += targ_lang.index_word[predicted_id] + ' '

    if targ_lang.index_word[predicted_id] == '<end>':
      return result, sentence

    # the predicted ID is fed back into the model
    dec_input = tf.expand_dims([predicted_id], 0)

  return result, sentence

In [52]:
# restoring the latest checkpoint in checkpoint_dir
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir_att))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fe6c020a190>

In [53]:
for i in range(30):
    translate(convert(inp_lang, input_tensor_val[i]), evaluate)
    print(f'Target translation: {convert(targ_lang, target_tensor_val[i])}')
    print('--------------------')

Input: <start> moja mama potrafi bardzo dobrze grać w golfa . <end>
Predicted translation: my mom can play very well . <end> 
Target translation: my mother can play golf very well .
--------------------
Input: <start> dowiedziałem się , jak rozwiązać problem . <end>
Predicted translation: i found out how to solve the problem . <end> 
Target translation: i found out how to solve the problem .
--------------------
Input: <start> to właśnie tego słownika szukałem . <end>
Predicted translation: this is the job for him . <end> 
Target translation: this is the very dictionary i've been looking for .
--------------------
Input: <start> potrzebuję samochodu . <end>
Predicted translation: i need a car . <end> 
Target translation: i need a car .
--------------------
Input: <start> cieszę się , że cię znalazłem . <end>
Predicted translation: i'm glad i found you . <end> 
Target translation: i'm glad i've found you .
--------------------
Input: <start> wychodzę rano . <end>
Predicted translation: 

Несмотря на то, что ошибка для обеих моделей практически идентичная, визуальный анализ позволяет утверждать, что предсказания модели с вниманием более точные, по крайней мере для первых 30 предложений валидационной выборки.

|    | Input text                                                                 | Prediction without attention                            |                  Prediction with attention                  |                          Target text                         |
|----|----------------------------------------------------------------------------|---------------------------------------------------------|-----------------------------------------------------------|------------------------------------------------------------|
| 1  | moja mama potrafi bardzo dobrze grać w golfa .                             | my mom takes a little more time .                       | _my mom can play very well ._                               | my mother can play golf very well .                          |
| 2  | dowiedziałem się , jak rozwiązać problem .                                 | i found the key , but it was dangerous .                | **i found out how to solve the problem .**                  | i found out how to solve the problem .                       |
| 3  | to właśnie tego słownika szukałem .                                        | that's the reason i was doing it .                      | this is the job for him .                                   | this is the very dictionary i've been looking for .          |
| 4  | potrzebuję samochodu .                                                     | **i need a car .**                                      | **i need a car .**                                          | i need a car .                                               |
| 5  | cieszę się , że cię znalazłem .                                            | _i'm glad to see you again ._                           | _i'm glad i found you ._                                    | i'm glad i've found you .                                    |
| 6  | wychodzę rano .                                                            | i am reading a book .                                   | _i'm leaving in the morning ._                              | i leave in the morning .                                     |
| 7  | on jest kapitanem drużyny piłkarskiej .                                    | he's a university student in his mouth .                | he is the team team .                                       | he is captain of the football team .                         |
| 8  | kiedy wydarzyło się to po raz pierwszy ?                                   | **when did it first start to happen ?**                 | when did it occur over this ?                               | when did it first start to happen ?                          |
| 9  | tom jest jedynym dorosłym , o którym mary wie , że nie potrafi prowadzić . | tom is the only one that mary doesn't really like tom . | tom is the only reason why mary knows how he doesn't know . | tom is the only adult mary knows who can't drive .           |
| 10 | chcę czasu , nie pieniędzy .                                               | i just want you to know dinner .                        | _i want time , not money ._                                 | i want time instead of money .                               |
| 11 | włączyliśmy radio .                                                        | the explosion of the rule are rotten .                  | they are giving the radio .                                 | we turned on the radio .                                     |
| 12 | umiesz jeździć na łyżwach ?                                                | can you ride a horse ?                                  | can you ride against compete ?                              | can you skate ?                                              |
| 13 | zrobimy wszystko , żeby odnaleźć toma .                                    | we're going to tell you about tom .                     | we've done all to look of tom .                             | we'll do everything we can to find tom .                     |
| 14 | podaj mi sól .                                                             | give me your sponge .                                   | **pass me the salt .**                                      | pass me the salt .                                           |
| 15 | już wychodzę .                                                             | i'm reading .                                           | i'm already .                                               | i'll be right out .                                          |
| 16 | znaleźliśmy klucze toma .                                                  | we found mary's umbrella .                              | **we found tom's keys .**                                   | we found tom's keys .                                        |
| 17 | mogę to zrobić .                                                           | _i can do it ._                                         | _i can do it ._                                             | i can do this .                                              |
| 18 | niektórzy ludzie przybierają na wadze kiedy rzucaja palenie .              | some people believe in eternal life after death .       | politicians people wear weight that raise .                 | some people gain weight when they stop smoking .             |
| 19 | prowadził niedbale i miał wypadek .                                        | the man left the restaurant without paying .            | he drove away and had a accident .                          | he drove carelessly and had an accident .                    |
| 20 | o czym tom mówił mary ?                                                    | what did tom make mary about ?                          | what did tom doing at ?                                     | what did tom talk to mary about ?                            |
| 21 | źle się to skończyło .                                                     | it was hard to deal .                                   | it is bad end .                                             | it ended poorly .                                            |
| 22 | mamy trzy godziny .                                                        | _we've got three hours ._                               | _we've got three hours ._                                   | we have three hours .                                        |
| 23 | książki mnie fascynują .                                                   | our visitors are going out .                            | my physics book am itchy .                                  | books fascinate me .                                         |
| 24 | przyszło mi do głowy , że on ukradł słownik .                              | he took me one had to pay the letter .                  | it would have told me that he stole the dictionary .        | it occurred to me that he must have stolen the dictionary .  |
| 25 | zwykle wolę płacić kartą , a nie gotówką .                                 | i usually prefer to ask people the truth .              | i usually enjoy hearing from the children , but i can't .   | i usually prefer to pay with credit card and not with cash . |
| 26 | lubię brać co wieczór gorącą kąpiel .                                      | i like to eat some vegetables by you .                  | i like to take a bath in the game .                         | i like to take a hot bath every night before bed .           |
| 27 | padał śnieg .                                                              | _it was snowing ._                                      | _it was snowing ._                                          | it snowed .                                                  |
| 28 | byłem na zakupach w sobotę .                                               | i was on the mountain side .                            | _i went shopping ._                                         | i went shopping last saturday .                              |
| 29 | zapach jedzenia sprawił , że zgłodniałem .                                 | it looks like the thief saw it .                        | i sincerely food made it .                                  | the smell of food made me hungry .                           |
| 30 | zaraz wracam .                                                             | _i'll be back right away ._                             | _i will be back soon ._                                     | i'll be right back .                                         |