In [1]:
import numpy as np
import torch
import torch.utils.data
import torch.nn as nn
import torch.optim as optim

import torchdiffeq

from tensorboard_utils import Tensorboard
from tensorboard_utils import tensorboard_event_accumulator

import transformer.Constants as Constants
from transformer.Layers import EncoderLayer, DecoderLayer
from transformer.Modules import ScaledDotProductAttention
from transformer.Models import Decoder, get_attn_key_pad_mask, get_non_pad_mask, get_sinusoid_encoding_table
from transformer.SubLayers import PositionwiseFeedForward

import dataset

import model_process
import checkpoints
from node_transformer import NodeTransformer

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib notebook  
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

print("Torch Version", torch.__version__)

%load_ext autoreload
%autoreload 2

Torch Version 1.1.0


In [2]:
seed = 1
torch.manual_seed(seed)
device = torch.device("cuda")
print("device", device)

device cuda


In [3]:
data = torch.load("/home/mandubian/datasets/multi30k/multi30k.atok.low.pt")

In [4]:
max_token_seq_len = data['settings'].max_token_seq_len
print(max_token_seq_len)

52


In [5]:
train_loader, val_loader = dataset.prepare_dataloaders(data, batch_size=16)

### Create an experiment with a name and a unique ID

In [6]:
exp_name = "transformer_6_layers_multi30k"
unique_id = "2019-06-07_1000"


### Create Model

In [7]:
model = None

In [8]:

src_vocab_sz = train_loader.dataset.src_vocab_size
print("src_vocab_sz", src_vocab_sz)
tgt_vocab_sz = train_loader.dataset.tgt_vocab_size
print("tgt_vocab_sz", tgt_vocab_sz)

if model:
    del model
    
model = NodeTransformer(
    n_src_vocab=max(src_vocab_sz, tgt_vocab_sz),
    n_tgt_vocab=max(src_vocab_sz, tgt_vocab_sz),
    len_max_seq=max_token_seq_len,
    n_layers=6,    
    #emb_src_tgt_weight_sharing=False,
    #d_word_vec=128, d_model=128, d_inner=512,
    n_head=8, method='dopri5-ext', rtol=1e-3, atol=1e-3,
    has_node_encoder=False, has_node_decoder=False)

model = model.to(device)

src_vocab_sz 9795
tgt_vocab_sz 17989


### Create basic optimizer

In [9]:
optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.995), eps=1e-9)


### Restore best checkpoint (to restart past training)

In [10]:
state = checkpoints.restore_best_checkpoint(
    exp_name, unique_id, "validation", model, optimizer)

print("accuracy", state["acc"])
print("loss", state["loss"])
model = model.to(device)

Extracting state from checkpoints/transformer_6_layers_multi30k_2019-06-07_1000_validation_best.pth
Loading model state_dict from state found in checkpoints/transformer_6_layers_multi30k_2019-06-07_1000_validation_best.pth
Loading optimizer state_dict from state found in checkpoints/transformer_6_layers_multi30k_2019-06-07_1000_validation_best.pth
accuracy 0.5533347661007235
loss 3.2005433952221685


In [17]:
fst = next(iter(val_loader))
print(fst)
en = ' '.join([val_loader.dataset.src_idx2word[idx] for idx in fst[0][0].numpy()])
ge = ' '.join([val_loader.dataset.tgt_idx2word[idx] for idx in fst[2][0].numpy()])
print(en)
print(ge)

(tensor([[   2, 5572, 4113, 8034, 1523, 3968,  995, 7521, 9726, 5572, 8218,    3,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0],
        [   2, 5572, 4185, 7761, 6389, 5572, 5008, 9291, 1160, 5572, 7354, 5955,
            3,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0],
        [   2, 5572, 4112, 5604, 1057, 5597, 1160, 5572, 9392, 3470, 1384, 5955,
            3,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0],
        [   2,  113, 1523, 8412, 1079, 5572, 2589, 5654, 4474, 3301, 1160,  719,
         7091, 6113, 8616,    3,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0],
        [   2, 5572, 7703, 4185, 5604, 5572, 3307, 4898, 2278, 6919, 3192, 6389,
         5572,  963, 6016, 5955,    3,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0],
        [   2, 5572, 3160, 6389, 5572, 3307, 8755

In [18]:
timesteps = np.linspace(0., 1, num=6)
timesteps = torch.from_numpy(timesteps).float().to(device)

qs = fst[0]
qs_pos = fst[1]
resp = model_process.predict_single(qs, qs_pos, model, timesteps, device, max_token_seq_len)


In [20]:
idx = 5
print("score", resp[idx]["score"])
en = ' '.join([val_loader.dataset.src_idx2word[idx] for idx in qs[idx].cpu().numpy()])
ge = ' '.join([val_loader.dataset.tgt_idx2word[idx] for idx in resp[idx]["resp"]])
print("[EN]", en)
print("[GE]", ge)

score -0.0947723388671875
[EN] <s> a lady in a red coat , holding a bluish hand bag likely of asian descent , jumping off the ground for a <unk> . </s>
[GE] eine frau in einem roten mantel hält einen roten eimer in der hand , während eine andere frau mit einem eimer von der hand . </s>


In [11]:
import itertools
import codecs

timesteps = np.linspace(0., 1, num=6)
timesteps = torch.from_numpy(timesteps).float().to(device)

resps = []
f = codecs.open(f"{exp_name}_{unique_id}_prediction.txt","w+", "utf-8")

def cb(batch_idx, batch, all_hyp, all_scores):
    for i, idx_seqs in enumerate(all_hyp):
        for j, idx_seq in enumerate(idx_seqs):
            s = all_scores[i][j].cpu().item()
            b = batch[0][i].cpu().numpy()
            b = list(filter(lambda x: x != Constants.BOS and x!=Constants.EOS and x!=Constants.PAD, b))

            idx_seq = list(filter(lambda x: x != Constants.BOS and x!=Constants.EOS and x!=Constants.PAD, idx_seq))

            en = ' '.join([val_loader.dataset.src_idx2word[idx] for idx in b])
            ge = ' '.join([val_loader.dataset.tgt_idx2word[idx] for idx in idx_seq])
            resps.append({"en":en, "ge":ge, "score":s})
            f.write(ge + "\n")            
    
resp = model_process.predict_dataset(val_loader, model, timesteps, device,
                                     cb, max_token_seq_len)

f.close()