In [1]:
from model import Transformer
from util import PointedIndex
from util_io import encode, decode
import numpy as np
import tensorflow as tf

In [2]:
len_cap = 256
trial = 'm'
ckpt = 629820

In [3]:
model = Transformer.new().data(len_cap= len_cap)
forcing = model.forcing(trainable= False) # the model in teacher forcing mode
autoreg = model.autoreg(trainable= False) # the model in autoregressive mode

In [4]:
saver = tf.train.Saver()
sess = tf.InteractiveSession()
saver.restore(sess, "trial/model/{}{}".format(trial, ckpt))

INFO:tensorflow:Restoring parameters from trial/model/m629820


In [5]:
idx_src = PointedIndex(np.load("trial/data/index_src.npy").item())
idx_tgt = PointedIndex(np.load("trial/data/index_tgt.npy").item())

def auto(s, m= autoreg, idx_src= idx_src, idx_tgt= idx_tgt, len_cap= len_cap):
    # encode the sentence as a numpy array
    # it's automatically padded at the beginning and the end
    src = np.array(encode(idx_src, s))
    # reshape the array into a batch with one instance
    src.shape = 1, -1
    # fetch the prediction
    # tgt needs to be fed the first step (the padding at the beginning)
    # len_tgt is the maximum steps to unroll
    pred = m.pred.eval({m.src: src, m.tgt: src[:,:1], m.len_tgt: len_cap})
    # take the only instance from the batch
    pred = pred[0]
    # decode the prediction
    return decode(idx_tgt, pred)

# to run the forcing model, remember to feed a encoded target sentence
# and DON'T feed len_tgt
# cuz that's simply the length of the encoded target minus the beginning

In [6]:
auto("Sodium iodate ( NaIO3 ) is the sodium salt of iodic acid .")

'Sodium iodate is the sodium salt of iodic acid .'

the attention tensors in the teacher forcing model are these:
```
encode_forcing/layer1/att/attention/Reshape_1:0
encode_forcing/layer2/att/attention/Reshape_1:0
decode_forcing/layer1/csl/attention/Reshape_1:0
decode_forcing/layer1/att/attention/Reshape_1:0
decode_forcing/layer2/csl/attention/Reshape_1:0
decode_forcing/layer2/att/attention/Reshape_1:0
```

the attention tensors in the autoregressive model are these:
```
encode_autoreg/layer1/att/attention/Reshape_1:0
encode_autoreg/layer2/att/attention/Reshape_1:0
decode_autoreg/autoreg/layer1/csl/attention/Reshape_1:0
decode_autoreg/autoreg/layer1/att/attention/Reshape_1:0
decode_autoreg/autoreg/layer2/csl/attention/Reshape_1:0
decode_autoreg/autoreg/layer2/att/attention/Reshape_1:0
```

however tensorflow forbids the decoder attentions to be fetched, cuz they are inside of a loop.
if you want these weights, you can run the forcing model repeatedly, feeding its previous predictions back each time, and accumulate the results.