In [2]:
import sys
sys.path.append("../")

import numpy as np
import torch
import torch.utils.data
import torch.nn as nn
import torch.optim as optim

import torchdiffeq

from tensorboard_utils import Tensorboard
from tensorboard_utils import tensorboard_event_accumulator

import transformer.Constants as Constants
from transformer.Layers import EncoderLayer, DecoderLayer
from transformer.Modules import ScaledDotProductAttention
from transformer.Models import Decoder, get_attn_key_pad_mask, get_non_pad_mask, get_sinusoid_encoding_table
from transformer.SubLayers import PositionwiseFeedForward

import dataset

import model_process
import checkpoints
from node_transformer import NodeTransformer

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib notebook  
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

print("Torch Version", torch.__version__)

%load_ext autoreload
%autoreload 2

Torch Version 1.1.0


In [3]:
seed = 1
torch.manual_seed(seed)
device = torch.device("cuda")
print("device", device)

device cuda


In [4]:
data = torch.load("/home/mandubian/datasets/multi30k/multi30k.atok.low.pt")

In [5]:
max_token_seq_len = data['settings'].max_token_seq_len
print(max_token_seq_len)

52


In [6]:
train_loader, val_loader = dataset.prepare_dataloaders(data, batch_size=16)

### Create an experiment with a name and a unique ID

In [7]:
exp_name = "node_transformer_separated_dopri5_multi30k"
unique_id = "2019-06-15_1500"
#exp_name = "node_transformer_full_multi30k"
#unique_id = "2019-06-07_2100"


### Create Model

In [8]:
model = None

In [21]:

src_vocab_sz = train_loader.dataset.src_vocab_size
print("src_vocab_sz", src_vocab_sz)
tgt_vocab_sz = train_loader.dataset.tgt_vocab_size
print("tgt_vocab_sz", tgt_vocab_sz)

if model:
    del model
    
model = NodeTransformer(
    n_src_vocab=max(src_vocab_sz, tgt_vocab_sz),
    n_tgt_vocab=max(src_vocab_sz, tgt_vocab_sz),
    len_max_seq=max_token_seq_len,
    #emb_src_tgt_weight_sharing=False,
    #d_word_vec=64, d_model=64, d_inner=256,
    n_head=8, method='dopri5-ext', rtol=1e-2, atol=1e-2,
    has_node_encoder=True, has_node_decoder=True, has_separated_node_decoder=True)

model = model.to(device)

src_vocab_sz 9795
tgt_vocab_sz 17989


### Create basic optimizer

In [18]:
#optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.995), eps=1e-9)

optimizer = torch.optim.SGD(model.parameters(), lr=0.00001, momentum=0.9)


### Restore best checkpoint (to restart past training)

In [22]:
state = checkpoints.restore_best_checkpoint(
    exp_name, unique_id, "validation", model, optimizer)

print("accuracy", state["acc"])
print("loss", state["loss"])
model = model.to(device)

Extracting state from checkpoints/node_transformer_separated_dopri5_multi30k_2019-06-15_1500_validation_best.pth
Loading model state_dict from state found in checkpoints/node_transformer_separated_dopri5_multi30k_2019-06-15_1500_validation_best.pth
Loading optimizer state_dict from state found in checkpoints/node_transformer_separated_dopri5_multi30k_2019-06-15_1500_validation_best.pth
accuracy 0.5101368292857654
loss 3.6928281387120316


In [None]:
fst = next(iter(val_loader))
print(fst)
en = ' '.join([val_loader.dataset.src_idx2word[idx] for idx in fst[0][0].numpy()])
ge = ' '.join([val_loader.dataset.tgt_idx2word[idx] for idx in fst[2][0].numpy()])
print(en)
print(ge)

In [18]:
timesteps = np.linspace(0., 1, num=2)
timesteps = torch.from_numpy(timesteps).float().to(device)

qs = fst[0]
qs_pos = fst[1]
resp = model_process.predict_single(qs, qs_pos, model, timesteps, device, max_token_seq_len)


In [19]:
idx = 1
print("score", resp[idx]["score"])
en = ' '.join([val_loader.dataset.src_idx2word[idx] for idx in qs[idx].cpu().numpy()])
ge = ' '.join([val_loader.dataset.tgt_idx2word[idx] for idx in resp[idx]["resp"]])
print("[EN]", en)
print("[GE]", ge)

score -0.2026519775390625
[EN] <s> a man sleeping in a green room on a couch . </s> <blank> <blank> <blank> <blank> <blank> <blank> <blank> <blank> <blank> <blank> <blank> <blank> <blank> <blank>
[GE] ein mann in einem blauen auf einem personen . </s>


In [23]:
import itertools
import codecs

timesteps = np.linspace(0., 1, num=2)
timesteps = torch.from_numpy(timesteps).float().to(device)

resps = []
f = codecs.open(f"{exp_name}_{unique_id}_prediction.txt","w+", "utf-8")

def cb(batch_idx, batch, all_hyp, all_scores):
    for i, idx_seqs in enumerate(all_hyp):
        for j, idx_seq in enumerate(idx_seqs):
            s = all_scores[i][j].cpu().item()
            b = batch[0][i].cpu().numpy()
            b = list(filter(lambda x: x != Constants.BOS and x!=Constants.EOS and x!=Constants.PAD, b))

            idx_seq = list(filter(lambda x: x != Constants.BOS and x!=Constants.EOS and x!=Constants.PAD, idx_seq))

            en = ' '.join([val_loader.dataset.src_idx2word[idx] for idx in b])
            ge = ' '.join([val_loader.dataset.tgt_idx2word[idx] for idx in idx_seq])
            resps.append({"en":en, "ge":ge, "score":s})
            f.write(ge + "\n")            
    
resp = model_process.predict_dataset(val_loader, model, timesteps, device,
                                     cb, max_token_seq_len)

f.close()

AssertionError: underflow in dt nan

In [64]:
resps

[{'en': 'a group of men are loading cotton onto a truck',
  'ge': 'eine gruppe männer in einer einer in einem .',
  'score': -0.19371414184570312},
 {'en': 'a man sleeping in a green room on a couch .',
  'ge': 'ein mann in einem boot auf einem kind .',
  'score': -0.9595947265625},
 {'en': "a boy wearing headphones sits on a woman 's shoulders .",
  'ge': 'ein junge mit einem auf einem .',
  'score': -0.24339675903320312},
 {'en': 'two men setting up a blue ice fishing hut on an iced over lake',
  'ge': 'zwei männer in einem blauen einem blauen auf einem blauen auf einem blauen auf einem blauen in einem blauen , die die auf .',
  'score': -0.7785491943359375},
 {'en': 'a balding man wearing a red life jacket is sitting in a small boat .',
  'ge': 'ein mann in einer roten mit einer roten , der .',
  'score': -1.166595458984375},
 {'en': 'a lady in a red coat , holding a bluish hand bag likely of asian descent , jumping off the ground for a <unk> .',
  'ge': 'eine frau in einem roten ro