In [12]:
import random
import time

import numpy as np

import torch
import torch.nn as nn

from torchtext import data

from seq2seq.transformers import helpers
from seq2seq import utils

In [24]:
# Build model and initialize
DATASET_NAME = "miguel"  # multi30k, miguel
DATASET_PATH = f"../.data/{DATASET_NAME}"
ALLOW_DATA_PARALLELISM = False
MAX_SRC_LENGTH = 100 + 2  # Doesn't include <sos>, <eos>
MAX_TRG_LENGTH = 100 + 2  # Doesn't include <sos>, <eos>
MAX_TRG_LENGTH_TEST = int(MAX_TRG_LENGTH * 1.0)  # len>1.0 is not supported by all models
BATCH_SIZE = 32
CHECKPOINT_PATH = f'checkpoints/31.57_checkpoint_simple_transformer.pt'
TS_RATIO = 1.0
SOS_WORD = '<sos>'
EOS_WORD = '<eos>'
EVALUATE = True
BLUE = True

In [14]:
# Set fields
SRC = data.Field(tokenize='spacy', tokenizer_language="en", init_token=SOS_WORD, eos_token=EOS_WORD, lower=True, batch_first=True)
TRG = data.Field(tokenize='spacy', tokenizer_language="es", init_token=SOS_WORD, eos_token=EOS_WORD, lower=True, batch_first=True)
fields = [('src', SRC), ('trg', TRG)]

In [15]:
# Load vocabulary
src_vocab = utils.load_vocabulary(f'{DATASET_PATH}/tokenized/src_vocab.pkl')
trg_vocab = utils.load_vocabulary(f'{DATASET_PATH}/tokenized/trg_vocab.pkl')
print("Vocabularies loaded!")

# Add vocabularies to fields
SRC.vocab = src_vocab
TRG.vocab = trg_vocab

print(f"Unique tokens in source (en) vocabulary: {len(SRC.vocab)}")
print(f"Unique tokens in target (es) vocabulary: {len(TRG.vocab)}")

Vocabularies loaded!
Unique tokens in source (en) vocabulary: 10000
Unique tokens in target (es) vocabulary: 10000


In [16]:
# Load examples
test_data = utils.load_dataset(f"{DATASET_PATH}/tokenized/test.json", fields, TS_RATIO)
print(f"Number of testing examples: {len(test_data.examples)}")

100%|██████████| 3001/3001 [00:00<00:00, 33657.07it/s]


Load dataset: [Total time= 0.09099745750427246; Num. examples=3001]
Number of testing examples: 3001


In [17]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(utils.gpu_info())

# Set iterator (this is where words are replaced by indices, and <sos>/<eos> tokens are appended
test_iter = data.BucketIterator(test_data, batch_size=BATCH_SIZE, device=device, sort=False)

- Using GPU: True
- No. devices: 1
- Device name (0): GeForce GTX 1070


In [18]:
from seq2seq.models import s2s_6_transfomer as builder
model = builder.make_model(src_field=SRC, trg_field=TRG,
                           max_src_len=MAX_SRC_LENGTH, max_trg_len=MAX_TRG_LENGTH, device=device,
                           data_parallelism=ALLOW_DATA_PARALLELISM)

The model has 11,695,888 trainable parameters
Data parallelism: False


In [19]:
# Set loss (ignore when the target token is <pad>)
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

In [20]:
# Load best model
model.load_state_dict(torch.load(CHECKPOINT_PATH))
print("Model loaded!")

Model loaded!


In [21]:
# Evaluate best model

start = time.time()
test_loss = helpers.evaluate(model, test_iter, criterion)
helpers.summary_report(test_loss=test_loss, start_time=start, testing=True)

100%|██████████| 94/94 [00:01<00:00, 54.18it/s]


Epoch: 00 | Time: 0m 1s
	 Test Loss: 1.884 |  Test PPL:   6.581


In [22]:
# Calculate BLEU score

start = time.time()
bleu_score = utils.calculate_bleu(model, test_iter, max_trg_len=MAX_TRG_LENGTH_TEST)

end_time = time.time()
epoch_mins, epoch_secs = utils.epoch_time(start, end_time)
print(f'BLEU score = {bleu_score * 100:.2f} | Time: {epoch_mins}m {epoch_secs}s')


100%|██████████| 3001/3001 [04:27<00:00, 11.22it/s]


BLEU score = 31.57 | Time: 4m 31s
