In [1]:
!pip install torch



In [2]:
from __future__ import unicode_literals, print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import re
import random

device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
torch.cuda.get_device_name()

'Tesla T4'

In [4]:
sos_token = 0 # start of sequence token
eos_token = 1 # end of sequence token
max_length = 20

class Lang:
    def __init__(self):
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2 # Counting SOS and EOS token

    # split sentence to word and add to container
    def add_sentence(self, sentence):
        for word in sentence.split(" "):
            self.add_word(word)

    # if word is not in container, it will be added to it else we update word counter
    def add_word(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] +=1

In [5]:
def normalize_sentence1(df, lang1):
    sentence = df[lang1].str.lower()
    sentence = sentence.str.replace('[^A-Za-z\s]+', "")
    sentence = sentence.str.normalize('NFD')
    sentence = sentence.str.encode("ascii", errors="ignore").str.decode("utf-8")
    return sentence

def normalize_sentence2(df, lang2):
    sentence = df[lang2].str.lower()
    sentence = sentence.str.replace('[^ぁ-んァ-ン一-龯ー 　]+', "")
    sentence = sentence.str.normalize('NFKC')
    return sentence

def read_sentence(df, lang1, lang2):
    sentence1 = normalize_sentence1(df, lang1)
    sentence2 = normalize_sentence2(df, lang2)
    return sentence1, sentence2

def read_file(loc, lang1, lang2):
    df = pd.read_csv(loc, delimiter="\t", header=None, names=[lang1, lang2, "extra"])
    df.drop(["extra"], inplace=True, axis=1)
    return df

def process_data(lang1, lang2):
    df = read_file(f'/content/{lang1}-{lang2}.txt', lang1, lang2)
    print(f"Read {len(df)} Sentence Pairs")
    sentence1, sentence2 = read_sentence(df, lang1, lang2)
    source = Lang()
    target = Lang()

    pairs = []
    for i in range(len(df)):
        if len(sentence1[i].split(" ")) < max_length and len(sentence2[i].split(" ")) < max_length:
            full = [sentence1[i], sentence2[i]]
            source.add_sentence(sentence1[i])
            target.add_sentence(sentence2[i])
            pairs.append(full)

    return source, target, pairs

In [6]:
def indexes_from_sentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(" ")]

def tensor_from_sentence(lang, sentence):
    indexes = indexes_from_sentence(lang, sentence)
    indexes.append(eos_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensors_from_pair(input_lang, output_lang, pair):
    input_tensor = tensor_from_sentence(input_lang, pair[0])
    target_tensor = tensor_from_sentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [7]:
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, embedded_dim, num_layers):
        super(Encoder, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.embedded_dim = embedded_dim
        self.num_layers = num_layers

        self.embedding = nn.Embedding(input_dim, self.embedded_dim)
        self.gru = nn.GRU(self.embedded_dim, self.hidden_dim, num_layers = self.num_layers)

    def forward(self, src):
        embedded = self.embedding(src). view(1, 1, -1)
        outputs, hidden = self.gru(embedded)
        return outputs, hidden

In [8]:
class Decoder(nn.Module):
    def __init__(self, output_dim, hidden_dim, embedded_dim, num_layers):
        super(Decoder, self).__init__()
        self.embedded_dim = embedded_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.embedding = nn.Embedding(self.output_dim, self.embedded_dim)
        self.gru = nn.GRU(self.embedded_dim, self.hidden_dim, num_layers=self.num_layers)
        self.out = nn.Linear(self.hidden_dim, output_dim)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        input = input.view(-1, 1)
        embedded = F.relu(self.embedding(input))
        output, hidden = self.gru(embedded, hidden)
        prediction = self.softmax(self.out(output[0]))
        return prediction, hidden

In [9]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device, max_length=max_length):
        super().__init__()

        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, source, target, teacher_forcing_ratio=0.5):
        input_length = source.size(0)
        batch_size = target.shape[1]
        target_length = target.shape[0]
        vocab_size = self.decoder.output_dim
        outputs = torch.zeros(target_length, batch_size, vocab_size).to(self.device)

        for i in range(input_length):
            _, encounter_hidden = self.encoder(source[i])
        decoder_hidden = encounter_hidden.to(device)
        decoder_input = torch.tensor([sos_token], device=device)
        for t in range(target_length):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
            outputs[t] = decoder_output
            teacher_force = random.random() < teacher_forcing_ratio
            _, topi = decoder_output.topk(1)
            input = (target[t] if teacher_force else topi)
            if (teacher_force == False and input.item() == eos_token):
                break

        return outputs

In [10]:
teacher_forcing_ratio = 0.5

def clacModel(model, input_tensor, target_tensor, model_optimizer, criterion):
    model_optimizer.zero_grad()
    # input_length = input_tensor.size(0)
    loss = 0
    epoch_loss = 0

    output = model(input_tensor, target_tensor)

    num_iter = output.size(0)

    for ot in range(num_iter):
        loss += criterion(output[ot], target_tensor[ot])

    loss.backward()
    model_optimizer.step()
    epoch_loss = loss.item()/num_iter

    return epoch_loss

In [11]:
def train_model(model, source, target, pairs, num_iteration=20000):
    model.train()
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    criterion = nn.NLLLoss() # best used in conjunction with log softmax activation, i.e., negative log likelihood loss
    total_loss_iterations = 0

    training_pairs = [tensors_from_pair(source, target, random.choice(pairs)) for i in range(num_iteration)]

    for iter in tqdm(range(1, num_iteration+1)):
        training_pair = training_pairs[iter-1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = clacModel(model, input_tensor, target_tensor, optimizer, criterion)

        total_loss_iterations += loss

        if iter%5000 == 0:
            average_loss = total_loss_iterations/5000
            total_loss_iterations = 0
            print(f"{iter} - {average_loss:.4f}")

    torch.save(model.state_dict(), 'seq2seq.pt')

    return model

In [12]:
def evaluate(model, input_lang, output_lang, sentences, max_length=max_length):
    with torch.no_grad():
        input_tensor = tensor_from_sentence(input_lang, sentences[0])
        output_tensor = tensor_from_sentence(output_lang, sentences[1])

        decoded_words = []

        output = model(input_tensor, output_tensor)
        for ot in range(output.size(0)):
            _, topi = output[ot].topk(1)
            print(topi)
            if topi[0].item() == eos_token:
                decoded_words.append("<EOS>")
                break
            else:
                decoded_words.append(output_lang.index2word[topi[0].item()])

    return decoded_words

def evaluate_randomly(model, source, target, pairs, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print(f"Source: {pair[0]}")
        print(f"Target: {pair[1]}")

        output_words = evaluate(model, source, target, pair)

        output_sentence = " ".join(output_words)

        print(f"Predicted: {output_sentence}")

In [13]:
lang1 = "eng"
lang2 = "jp"

source, target, pairs = process_data(lang1, lang2)

Read 104785 Sentence Pairs


  sentence = sentence.str.replace('[^A-Za-z\s]+', "")
  sentence = sentence.str.replace('[^ぁ-んァ-ン一-龯ー 　]+', "")


In [14]:
source.__dict__["word2index"]["hi"], source.__dict__["n_words"] # english words

(3, 11664)

In [15]:
target.__dict__["index2word"][11], target.__dict__["n_words"] # japanese words

('ワォ', 86712)

In [16]:
pairs[:10]

[['go', '行け'],
 ['go', '行きなさい'],
 ['hi', 'こんにちは'],
 ['hi', 'もしもし'],
 ['hi', 'やっほー'],
 ['hi', 'こんにちは'],
 ['run', '走れ'],
 ['run', '走って'],
 ['who', '誰'],
 ['wow', 'すごい']]

In [17]:
randomize = random.choice(pairs)
print(f"Random Sentence: {randomize}")

Random Sentence: ['i have but one wish', '僕のお願いは一つだけだよ']


In [18]:
input_size = source.n_words
output_size = target.n_words

print(f"Input: {input_size}, Output: {output_size}")

Input: 11664, Output: 86712


In [19]:
embed_size = 256
hidden_size = 512
num_layers = 1
num_iteration = 1000000

encoder = Encoder(input_size, hidden_size, embed_size, num_layers)
decoder = Decoder(output_size, hidden_size, embed_size, num_layers)

model = Seq2Seq(encoder, decoder, device).to(device)

print(encoder)
print(decoder)

Encoder(
  (embedding): Embedding(11664, 256)
  (gru): GRU(256, 512)
)
Decoder(
  (embedding): Embedding(86712, 256)
  (gru): GRU(256, 512)
  (out): Linear(in_features=512, out_features=86712, bias=True)
  (softmax): LogSoftmax(dim=1)
)


In [20]:
model = train_model(model, source, target, pairs, num_iteration)

  1%|          | 5009/1000000 [01:09<3:56:56, 69.99it/s]

5000 - 5.7097


  1%|          | 10008/1000000 [02:17<3:44:32, 73.48it/s]

10000 - 5.6990


  2%|▏         | 15013/1000000 [03:25<3:16:27, 83.56it/s]

15000 - 5.6955


  2%|▏         | 20012/1000000 [04:33<3:30:23, 77.63it/s]

20000 - 5.6954


  3%|▎         | 25013/1000000 [05:41<3:28:47, 77.83it/s]

25000 - 5.6886


  3%|▎         | 30009/1000000 [06:49<3:35:58, 74.86it/s]

30000 - 5.6874


  4%|▎         | 35013/1000000 [07:57<3:43:22, 72.00it/s]

35000 - 5.6934


  4%|▍         | 40012/1000000 [09:04<3:56:21, 67.69it/s]

40000 - 5.6836


  5%|▍         | 45014/1000000 [10:13<3:47:44, 69.89it/s]

45000 - 5.6875


  5%|▌         | 50011/1000000 [11:21<3:24:44, 77.33it/s]

50000 - 5.6862


  6%|▌         | 55017/1000000 [12:28<3:11:07, 82.40it/s]

55000 - 5.6873


  6%|▌         | 60009/1000000 [13:37<3:21:28, 77.76it/s]

60000 - 5.6835


  7%|▋         | 65008/1000000 [14:46<3:53:16, 66.80it/s]

65000 - 5.6817


  7%|▋         | 70009/1000000 [15:55<3:36:36, 71.56it/s]

70000 - 5.6839


  8%|▊         | 75010/1000000 [17:02<3:20:00, 77.08it/s]

75000 - 5.6822


  8%|▊         | 80015/1000000 [18:10<3:14:06, 78.99it/s]

80000 - 5.6820


  9%|▊         | 85011/1000000 [19:19<3:22:02, 75.48it/s]

85000 - 5.6799


  9%|▉         | 90012/1000000 [20:27<3:40:48, 68.69it/s]

90000 - 5.6834


 10%|▉         | 95007/1000000 [21:35<3:32:20, 71.03it/s]

95000 - 5.6823


 10%|█         | 100012/1000000 [22:44<3:15:19, 76.79it/s]

100000 - 5.6784


 11%|█         | 105014/1000000 [23:52<3:11:25, 77.92it/s]

105000 - 5.6746


 11%|█         | 110014/1000000 [25:01<3:22:48, 73.14it/s]

110000 - 5.6744


 12%|█▏        | 115011/1000000 [26:10<3:33:30, 69.08it/s]

115000 - 5.6762


 12%|█▏        | 120014/1000000 [27:18<3:07:39, 78.16it/s]

120000 - 5.6756


 13%|█▎        | 125007/1000000 [28:25<3:02:17, 80.00it/s]

125000 - 5.6748


 13%|█▎        | 130014/1000000 [29:33<3:21:39, 71.91it/s]

130000 - 5.6761


 14%|█▎        | 135008/1000000 [30:41<3:12:35, 74.85it/s]

135000 - 5.6753


 14%|█▍        | 140015/1000000 [31:49<3:10:37, 75.19it/s]

140000 - 5.6755


 15%|█▍        | 145011/1000000 [32:58<3:22:08, 70.50it/s]

145000 - 5.6741


 15%|█▌        | 150014/1000000 [34:08<3:10:18, 74.44it/s]

150000 - 5.6724


 16%|█▌        | 155012/1000000 [35:17<3:17:12, 71.41it/s]

155000 - 5.6697


 16%|█▌        | 160014/1000000 [36:26<3:02:03, 76.90it/s]

160000 - 5.6820


 17%|█▋        | 165010/1000000 [37:35<3:07:38, 74.17it/s]

165000 - 5.6719


 17%|█▋        | 170010/1000000 [38:43<3:04:35, 74.94it/s]

170000 - 5.6734


 18%|█▊        | 175009/1000000 [39:51<2:56:53, 77.73it/s]

175000 - 5.6725


 18%|█▊        | 180012/1000000 [41:00<3:03:10, 74.61it/s]

180000 - 5.6710


 19%|█▊        | 185008/1000000 [42:11<3:39:14, 61.96it/s]

185000 - 5.6684


 19%|█▉        | 190011/1000000 [43:21<2:50:27, 79.19it/s]

190000 - 5.6732


 20%|█▉        | 195007/1000000 [44:31<3:09:04, 70.96it/s]

195000 - 5.6689


 20%|██        | 200010/1000000 [45:43<3:17:00, 67.68it/s]

200000 - 5.6684


 21%|██        | 205009/1000000 [46:51<3:04:16, 71.90it/s]

205000 - 5.6768


 21%|██        | 210014/1000000 [48:00<3:04:00, 71.55it/s]

210000 - 5.6716


 22%|██▏       | 215015/1000000 [49:08<2:51:31, 76.27it/s]

215000 - 5.6706


 22%|██▏       | 220009/1000000 [50:18<3:37:00, 59.90it/s]

220000 - 5.6725


 23%|██▎       | 225008/1000000 [51:29<2:54:46, 73.91it/s]

225000 - 5.6751


 23%|██▎       | 230012/1000000 [52:38<3:08:21, 68.13it/s]

230000 - 5.6648


 24%|██▎       | 235013/1000000 [53:53<3:13:53, 65.76it/s]

235000 - 5.6707


 24%|██▍       | 240011/1000000 [55:09<2:47:14, 75.74it/s]

240000 - 5.6683


 25%|██▍       | 245007/1000000 [56:20<3:11:44, 65.62it/s]

245000 - 5.6663


 25%|██▌       | 250014/1000000 [57:29<2:44:09, 76.15it/s]

250000 - 5.6690


 26%|██▌       | 255012/1000000 [58:37<2:46:41, 74.49it/s]

255000 - 5.6658


 26%|██▌       | 260012/1000000 [59:50<2:33:21, 80.42it/s]

260000 - 5.6644


 27%|██▋       | 265010/1000000 [1:01:03<3:21:43, 60.73it/s]

265000 - 5.6669


 27%|██▋       | 270007/1000000 [1:02:18<3:10:52, 63.74it/s]

270000 - 5.6665


 28%|██▊       | 275008/1000000 [1:03:31<3:20:24, 60.29it/s]

275000 - 5.6668


 28%|██▊       | 280011/1000000 [1:04:49<3:36:37, 55.39it/s]

280000 - 5.6627


 29%|██▊       | 285008/1000000 [1:06:10<2:31:01, 78.90it/s]

285000 - 5.6690


 29%|██▉       | 290012/1000000 [1:07:23<3:17:32, 59.90it/s]

290000 - 5.6684


 30%|██▉       | 295014/1000000 [1:08:33<2:47:21, 70.21it/s]

295000 - 5.6691


 30%|███       | 300012/1000000 [1:09:43<2:41:59, 72.02it/s]

300000 - 5.6681


 31%|███       | 305010/1000000 [1:10:59<3:26:26, 56.11it/s]

305000 - 5.6708


 31%|███       | 310012/1000000 [1:12:12<2:43:10, 70.47it/s]

310000 - 5.6669


 32%|███▏      | 315011/1000000 [1:13:20<2:34:50, 73.73it/s]

315000 - 5.6668


 32%|███▏      | 320011/1000000 [1:14:32<3:11:15, 59.25it/s]

320000 - 5.6658


 33%|███▎      | 325014/1000000 [1:15:43<2:27:15, 76.39it/s]

325000 - 5.6672


 33%|███▎      | 330009/1000000 [1:16:53<2:51:14, 65.21it/s]

330000 - 5.6645


 34%|███▎      | 335008/1000000 [1:18:07<3:06:17, 59.50it/s]

335000 - 5.6679


 34%|███▍      | 340006/1000000 [1:19:28<3:05:07, 59.42it/s]

340000 - 5.6611


 35%|███▍      | 345007/1000000 [1:20:48<3:01:36, 60.11it/s]

345000 - 5.6686


 35%|███▌      | 350007/1000000 [1:22:02<3:00:15, 60.10it/s]

350000 - 5.6618


 36%|███▌      | 355010/1000000 [1:23:19<3:00:53, 59.43it/s]

355000 - 5.6660


 36%|███▌      | 360009/1000000 [1:24:35<2:35:05, 68.77it/s]

360000 - 5.6703


 37%|███▋      | 365007/1000000 [1:25:45<2:56:00, 60.13it/s]

365000 - 5.6631


 37%|███▋      | 370008/1000000 [1:26:53<2:19:01, 75.52it/s]

370000 - 5.6653


 38%|███▊      | 375009/1000000 [1:28:04<2:23:19, 72.67it/s]

375000 - 5.6612


 38%|███▊      | 380013/1000000 [1:29:13<2:16:05, 75.92it/s]

380000 - 5.6672


 39%|███▊      | 385011/1000000 [1:30:21<2:34:36, 66.29it/s]

385000 - 5.6624


 39%|███▉      | 390006/1000000 [1:31:35<2:52:14, 59.02it/s]

390000 - 5.6639


 40%|███▉      | 395012/1000000 [1:32:52<2:39:39, 63.15it/s]

395000 - 5.6696


 40%|████      | 400011/1000000 [1:34:01<2:16:30, 73.25it/s]

400000 - 5.6654


 41%|████      | 405010/1000000 [1:35:14<2:13:41, 74.18it/s]

405000 - 5.6693


 41%|████      | 410009/1000000 [1:36:23<2:46:53, 58.92it/s]

410000 - 5.6614


 42%|████▏     | 415008/1000000 [1:37:44<2:19:30, 69.88it/s]

415000 - 5.6635


 42%|████▏     | 420010/1000000 [1:38:59<2:05:03, 77.30it/s]

420000 - 5.6618


 43%|████▎     | 425011/1000000 [1:40:13<2:50:47, 56.11it/s]

425000 - 5.6578


 43%|████▎     | 430007/1000000 [1:41:34<2:35:03, 61.27it/s]

430000 - 5.6684


 44%|████▎     | 435009/1000000 [1:42:45<2:42:44, 57.86it/s]

435000 - 5.6691


 44%|████▍     | 440010/1000000 [1:44:03<2:37:46, 59.15it/s]

440000 - 5.6647


 45%|████▍     | 445007/1000000 [1:45:24<2:07:39, 72.46it/s]

445000 - 5.6657


 45%|████▌     | 450010/1000000 [1:46:42<2:04:29, 73.63it/s]

450000 - 5.6631


 46%|████▌     | 455012/1000000 [1:47:54<2:13:51, 67.86it/s]

455000 - 5.6579


 46%|████▌     | 460008/1000000 [1:49:11<2:37:34, 57.12it/s]

460000 - 5.6620


 47%|████▋     | 465009/1000000 [1:50:32<2:26:03, 61.05it/s]

465000 - 5.6570


 47%|████▋     | 470012/1000000 [1:51:57<2:26:46, 60.18it/s]

470000 - 5.6615


 48%|████▊     | 475009/1000000 [1:53:17<2:24:20, 60.62it/s]

475000 - 5.6618


 48%|████▊     | 480011/1000000 [1:54:42<2:25:09, 59.70it/s]

480000 - 5.6658


 49%|████▊     | 485011/1000000 [1:56:05<2:31:43, 56.57it/s]

485000 - 5.6602


 49%|████▉     | 490013/1000000 [1:57:20<1:54:24, 74.29it/s]

490000 - 5.6621


 50%|████▉     | 495007/1000000 [1:58:33<2:24:06, 58.41it/s]

495000 - 5.6658


 50%|█████     | 500007/1000000 [1:59:53<2:18:23, 60.22it/s]

500000 - 5.6611


 51%|█████     | 505008/1000000 [2:01:12<2:18:02, 59.76it/s]

505000 - 5.6594


 51%|█████     | 510010/1000000 [2:02:34<2:27:54, 55.21it/s]

510000 - 5.6533


 52%|█████▏    | 515008/1000000 [2:03:49<1:58:29, 68.22it/s]

515000 - 5.6626


 52%|█████▏    | 520010/1000000 [2:05:00<2:23:36, 55.70it/s]

520000 - 5.6647


 53%|█████▎    | 525009/1000000 [2:06:21<2:12:37, 59.69it/s]

525000 - 5.6652


 53%|█████▎    | 530011/1000000 [2:07:41<1:42:01, 76.78it/s]

530000 - 5.6611


 54%|█████▎    | 535013/1000000 [2:08:57<1:43:20, 74.99it/s]

535000 - 5.6663


 54%|█████▍    | 540006/1000000 [2:10:21<2:08:44, 59.55it/s]

540000 - 5.6597


 55%|█████▍    | 545011/1000000 [2:11:44<2:08:52, 58.84it/s]

545000 - 5.6616


 55%|█████▌    | 550007/1000000 [2:13:08<2:18:46, 54.04it/s]

550000 - 5.6658


 56%|█████▌    | 555012/1000000 [2:14:28<2:04:13, 59.70it/s]

555000 - 5.6591


 56%|█████▌    | 560011/1000000 [2:15:51<2:02:17, 59.96it/s]

560000 - 5.6657


 57%|█████▋    | 565011/1000000 [2:17:12<2:10:54, 55.38it/s]

565000 - 5.6610


 57%|█████▋    | 570011/1000000 [2:18:31<1:58:55, 60.26it/s]

570000 - 5.6643


 58%|█████▊    | 575006/1000000 [2:19:46<1:56:35, 60.75it/s]

575000 - 5.6589


 58%|█████▊    | 580010/1000000 [2:21:10<1:58:24, 59.11it/s]

580000 - 5.6593


 59%|█████▊    | 585012/1000000 [2:22:31<1:54:30, 60.40it/s]

585000 - 5.6646


 59%|█████▉    | 590006/1000000 [2:23:46<1:52:44, 60.61it/s]

590000 - 5.6623


 60%|█████▉    | 595010/1000000 [2:25:06<2:03:58, 54.44it/s]

595000 - 5.6629


 60%|██████    | 600009/1000000 [2:26:30<1:51:50, 59.61it/s]

600000 - 5.6643


 61%|██████    | 605009/1000000 [2:27:54<1:48:35, 60.62it/s]

605000 - 5.6633


 61%|██████    | 610008/1000000 [2:29:17<1:48:36, 59.84it/s]

610000 - 5.6570


 62%|██████▏   | 615012/1000000 [2:30:41<1:46:14, 60.40it/s]

615000 - 5.6617


 62%|██████▏   | 620010/1000000 [2:32:05<1:44:37, 60.53it/s]

620000 - 5.6604


 63%|██████▎   | 625007/1000000 [2:33:28<1:43:06, 60.61it/s]

625000 - 5.6628


 63%|██████▎   | 630009/1000000 [2:34:51<1:41:25, 60.80it/s]

630000 - 5.6612


 64%|██████▎   | 635010/1000000 [2:36:14<1:51:58, 54.32it/s]

635000 - 5.6591


 64%|██████▍   | 640010/1000000 [2:37:38<1:40:03, 59.97it/s]

640000 - 5.6641


 65%|██████▍   | 645009/1000000 [2:39:02<1:37:22, 60.76it/s]

645000 - 5.6539


 65%|██████▌   | 650010/1000000 [2:40:20<1:16:59, 75.76it/s]

650000 - 5.6641


 66%|██████▌   | 655009/1000000 [2:41:34<1:30:57, 63.22it/s]

655000 - 5.6637


 66%|██████▌   | 660006/1000000 [2:42:55<1:40:20, 56.47it/s]

660000 - 5.6608


 67%|██████▋   | 665010/1000000 [2:44:18<1:33:02, 60.00it/s]

665000 - 5.6584


 67%|██████▋   | 670012/1000000 [2:45:41<1:09:27, 79.19it/s]

670000 - 5.6634


 68%|██████▊   | 675011/1000000 [2:47:01<1:28:52, 60.94it/s]

675000 - 5.6639


 68%|██████▊   | 680012/1000000 [2:48:24<1:31:08, 58.51it/s]

680000 - 5.6624


 69%|██████▊   | 685009/1000000 [2:49:48<1:27:22, 60.09it/s]

685000 - 5.6547


 69%|██████▉   | 690011/1000000 [2:51:12<1:25:24, 60.49it/s]

690000 - 5.6594


 70%|██████▉   | 695007/1000000 [2:52:34<1:24:04, 60.46it/s]

695000 - 5.6624


 70%|███████   | 700007/1000000 [2:53:58<1:22:57, 60.27it/s]

700000 - 5.6612


 71%|███████   | 705009/1000000 [2:55:22<1:21:19, 60.45it/s]

705000 - 5.6625


 71%|███████   | 710013/1000000 [2:56:42<1:13:53, 65.40it/s]

710000 - 5.6591


 72%|███████▏  | 715011/1000000 [2:58:01<1:19:05, 60.06it/s]

715000 - 5.6627


 72%|███████▏  | 720011/1000000 [2:59:25<1:17:04, 60.54it/s]

720000 - 5.6594


 73%|███████▎  | 725009/1000000 [3:00:48<1:16:45, 59.71it/s]

725000 - 5.6595


 73%|███████▎  | 730011/1000000 [3:02:06<1:15:15, 59.79it/s]

730000 - 5.6580


 74%|███████▎  | 735006/1000000 [3:03:30<1:12:39, 60.78it/s]

735000 - 5.6578


 74%|███████▍  | 740010/1000000 [3:04:54<1:11:51, 60.30it/s]

740000 - 5.6603


 75%|███████▍  | 745007/1000000 [3:06:18<57:28, 73.94it/s]

745000 - 5.6613


 75%|███████▌  | 750012/1000000 [3:07:38<1:09:03, 60.33it/s]

750000 - 5.6596


 76%|███████▌  | 755012/1000000 [3:09:01<1:08:15, 59.81it/s]

755000 - 5.6574


 76%|███████▌  | 760009/1000000 [3:10:25<1:05:41, 60.88it/s]

760000 - 5.6622


 77%|███████▋  | 765009/1000000 [3:11:44<1:05:24, 59.88it/s]

765000 - 5.6629


 77%|███████▋  | 770008/1000000 [3:13:06<54:34, 70.23it/s]

770000 - 5.6561


 78%|███████▊  | 775012/1000000 [3:14:17<1:02:19, 60.17it/s]

775000 - 5.6652


 78%|███████▊  | 780010/1000000 [3:15:40<56:47, 64.56it/s]

780000 - 5.6603


 79%|███████▊  | 785008/1000000 [3:17:04<59:05, 60.64it/s]

785000 - 5.6589


 79%|███████▉  | 790008/1000000 [3:18:28<57:18, 61.07it/s]

790000 - 5.6633


 80%|███████▉  | 795012/1000000 [3:19:52<56:32, 60.43it/s]

795000 - 5.6541


 80%|████████  | 800009/1000000 [3:21:16<54:35, 61.06it/s]

800000 - 5.6588


 81%|████████  | 805006/1000000 [3:22:37<49:48, 65.25it/s]

805000 - 5.6591


 81%|████████  | 810012/1000000 [3:23:51<47:36, 66.50it/s]

810000 - 5.6577


 82%|████████▏ | 815006/1000000 [3:25:10<50:31, 61.02it/s]

815000 - 5.6616


 82%|████████▏ | 820010/1000000 [3:26:32<48:32, 61.81it/s]

820000 - 5.6564


 83%|████████▎ | 825009/1000000 [3:27:56<47:39, 61.19it/s]

825000 - 5.6634


 83%|████████▎ | 830009/1000000 [3:29:18<51:03, 55.49it/s]

830000 - 5.6618


 84%|████████▎ | 835008/1000000 [3:30:40<45:47, 60.05it/s]

835000 - 5.6655


 84%|████████▍ | 840012/1000000 [3:32:03<44:01, 60.56it/s]

840000 - 5.6596


 85%|████████▍ | 845012/1000000 [3:33:27<42:57, 60.13it/s]

845000 - 5.6525


 85%|████████▌ | 850005/1000000 [3:34:51<41:30, 60.22it/s]

850000 - 5.6551


 86%|████████▌ | 855008/1000000 [3:36:14<40:32, 59.61it/s]

855000 - 5.6611


 86%|████████▌ | 860010/1000000 [3:37:38<39:02, 59.77it/s]

860000 - 5.6584


 87%|████████▋ | 865011/1000000 [3:39:02<40:48, 55.13it/s]

865000 - 5.6623


 87%|████████▋ | 870010/1000000 [3:40:22<35:44, 60.63it/s]

870000 - 5.6579


 88%|████████▊ | 875009/1000000 [3:41:46<34:41, 60.04it/s]

875000 - 5.6581


 88%|████████▊ | 880011/1000000 [3:43:10<33:11, 60.25it/s]

880000 - 5.6627


 89%|████████▊ | 885007/1000000 [3:44:33<31:27, 60.92it/s]

885000 - 5.6578


 89%|████████▉ | 890009/1000000 [3:45:57<28:34, 64.14it/s]

890000 - 5.6565


 90%|████████▉ | 895008/1000000 [3:47:19<28:59, 60.37it/s]

895000 - 5.6582


 90%|█████████ | 900006/1000000 [3:48:43<27:24, 60.79it/s]

900000 - 5.6492


 91%|█████████ | 905010/1000000 [3:50:04<26:27, 59.85it/s]

905000 - 5.6534


 91%|█████████ | 910010/1000000 [3:51:28<24:58, 60.07it/s]

910000 - 5.6591


 92%|█████████▏| 915009/1000000 [3:52:52<25:39, 55.21it/s]

915000 - 5.6517


 92%|█████████▏| 920006/1000000 [3:54:16<22:51, 58.31it/s]

920000 - 5.6578


 93%|█████████▎| 925011/1000000 [3:55:40<20:48, 60.08it/s]

925000 - 5.6567


 93%|█████████▎| 930012/1000000 [3:57:04<19:17, 60.49it/s]

930000 - 5.6619


 94%|█████████▎| 935012/1000000 [3:58:28<17:47, 60.88it/s]

935000 - 5.6533


 94%|█████████▍| 940011/1000000 [3:59:52<16:34, 60.33it/s]

940000 - 5.6548


 95%|█████████▍| 945008/1000000 [4:01:14<15:05, 60.72it/s]

945000 - 5.6565


 95%|█████████▌| 950007/1000000 [4:02:38<14:43, 56.58it/s]

950000 - 5.6575


 96%|█████████▌| 955011/1000000 [4:04:00<12:20, 60.78it/s]

955000 - 5.6572


 96%|█████████▌| 960009/1000000 [4:05:23<11:13, 59.41it/s]

960000 - 5.6589


 97%|█████████▋| 965008/1000000 [4:06:47<09:39, 60.38it/s]

965000 - 5.6606


 97%|█████████▋| 970012/1000000 [4:08:11<08:17, 60.24it/s]

970000 - 5.6594


 98%|█████████▊| 975012/1000000 [4:09:35<06:48, 61.16it/s]

975000 - 5.6576


 98%|█████████▊| 980007/1000000 [4:10:59<05:30, 60.43it/s]

980000 - 5.6601


 99%|█████████▊| 985006/1000000 [4:12:20<04:07, 60.54it/s]

985000 - 5.6568


 99%|█████████▉| 990010/1000000 [4:13:44<02:45, 60.19it/s]

990000 - 5.6525


100%|█████████▉| 995012/1000000 [4:15:07<01:22, 60.60it/s]

995000 - 5.6597


100%|██████████| 1000000/1000000 [4:16:31<00:00, 64.97it/s]


1000000 - 5.6566


In [21]:
evaluate_randomly(model, source, target, pairs)

Source: i like taking pictures
Target: 写真を撮るのが好きです
tensor([[62]], device='cuda:0')
tensor([[1]], device='cuda:0')
Predicted: 落ち着いて <EOS>
Source: its impossible for me to finish my term paper by tomorrow
Target: 明日までに期末レポートを仕上げるなんて不可能だよ
tensor([[62]], device='cuda:0')
tensor([[1]], device='cuda:0')
Predicted: 落ち着いて <EOS>
Source: tom lives here alone
Target: トムはここに独りで住んでいる
tensor([[62]], device='cuda:0')
tensor([[1]], device='cuda:0')
Predicted: 落ち着いて <EOS>
Source: please put this box away for me
Target: この箱を片付けておいてくれ
tensor([[62]], device='cuda:0')
tensor([[1]], device='cuda:0')
Predicted: 落ち着いて <EOS>
Source: i woke up in the hospital
Target: 目が覚めると病院だった
tensor([[62]], device='cuda:0')
tensor([[1]], device='cuda:0')
Predicted: 落ち着いて <EOS>
Source: we heard a knock on the door
Target: ドアをノックする音が聞こえました
tensor([[62]], device='cuda:0')
tensor([[1]], device='cuda:0')
Predicted: 落ち着いて <EOS>
Source: im too busy to go
Target: 私は忙しくて行けない
tensor([[62]], device='cuda:0')
tensor([[1]], device='cuda: