In [2]:
import os
os.chdir("/tf/src/examples/greetings")

from greetings import Dataset
import tensorflow as tf
import numpy as np
import logging
from greetings import GreetingModel
from greetings import cfg
logging.basicConfig(
        level=cfg.LOG_LVL,
        filename=cfg.LOG_FILENAME,
        format='%(message)s')

In [3]:
data = Dataset()

In [4]:
model = GreetingModel()

In [5]:
RNG_SEED = 11
to_gpu = tf.data.experimental.copy_to_device("/gpu:0")
train = data.train.shuffle(
    buffer_size=10000, seed=RNG_SEED, reshuffle_each_iteration=False)\
    .batch(512).apply(to_gpu)
val = data.test.batch(512).apply(to_gpu)
with tf.device("/gpu:0"):
    train = train.prefetch(3)
    val = val.prefetch(3)

In [6]:
X, y = next(val.as_numpy_iterator())
print("source idx from source vocab\n", X[0][:3])
print("\nsource idx from target vocab\n", X[1][:3])
print("\ntarget idx from target vocab\n", y[0][:3])
print("\ntarget idx from source vocab\n", y[1][:3])

source idx from source vocab
 [[   2   24    6    5    1    1    3    0    0    0]
 [   2   22 2239    3    0    0    0    0    0    0]
 [   2   26   27    6    5 2042    4    3    0    0]]

source idx from target vocab
 [[2 1 1 1 1 1 3 0 0 0]
 [2 1 1 3 0 0 0 0 0 0]
 [2 1 1 1 1 1 1 3 0 0]]

target idx from target vocab
 [[2 4 5 6 7 8 1 1 9 3]
 [2 4 5 6 7 8 1 9 3 0]
 [2 4 5 6 7 8 1 9 3 0]]

target idx from source vocab
 [[   2    1    1    1    1    8    1    1    1    3]
 [   2    1    1    1    1    8 2239    1    3    0]
 [   2    1    1    1    1    8 2042    1    3    0]]


In [7]:
model.fit(train, cfg.EPOCHS, cfg.MODEL_SAVE, val)

Epoch 1/5: 10it [00:29,  2.94s/it, train-loss=8.6540, train-bleu=0.662, train-bleu-smooth=0.662, val-loss=6.9328, val-bleu=0.854, val-bleu-smooth=0.854]
Epoch 2/5: 10it [00:11,  1.16s/it, train-loss=0.9742, train-bleu=0.929, train-bleu-smooth=0.929, val-loss=5.3846, val-bleu=0.883, val-bleu-smooth=0.883]
Epoch 3/5: 10it [00:11,  1.18s/it, train-loss=0.2539, train-bleu=0.981, train-bleu-smooth=0.981, val-loss=6.2810, val-bleu=0.981, val-bleu-smooth=0.981]
Epoch 4/5: 10it [00:11,  1.16s/it, train-loss=0.0750, train-bleu=0.997, train-bleu-smooth=0.997, val-loss=7.6251, val-bleu=0.948, val-bleu-smooth=0.948]
Epoch 5/5: 10it [00:11,  1.17s/it, train-loss=0.0353, train-bleu=0.999, train-bleu-smooth=0.999, val-loss=8.2821, val-bleu=0.953, val-bleu-smooth=0.953]


In [8]:
def idx2str(pred_y, X):
    ret = []
    vocab_len = model.vocab.get_vocab_size("target")
    for idx in pred_y:
        if idx < vocab_len:
            ret.append(model.vocab.get_token_text(idx, "target"))
        else:
            ret.append(model.vocab.get_token_text(X[idx-vocab_len], "source"))
    return ret

In [9]:
pred, pred_proba = model.predict(train)
for i, Xy in enumerate(train.unbatch().take(10)):
    X, y = Xy
    s = model.vocab.inverse_transform(X[0].numpy()[np.newaxis, :], "source")[0]
    t = model.vocab.inverse_transform(y[0].numpy()[np.newaxis, :], "target")[0]
    print(f"Source: {' '.join(s)}\nTarget: {' '.join(t)}\n")
    for j in range(3):
        p = idx2str(pred[i][j].numpy(), X[0].numpy())
        print(f"Predicted: {' '.join(p)}\tProba: {pred_proba[i][j]}")
    print("")

Source: <S> Hi there . It 's Nyla Wais . EOS
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , Nyla Wais ! EOS	Proba: -0.009473783895373344
Predicted: Nice to meet you Nyla Wais ! EOS EOS	Proba: -4.835021018981934
Predicted: Nice to meet you Nyla Wais ! EOS EOS	Proba: -4.835021018981934

Source: <S> Its Roseann , actually . EOS PAD PAD PAD
Target: <S> Nice to meet you , Roseann ! EOS PAD

Predicted: Nice to meet you , Roseann ! EOS EOS	Proba: -0.01785992830991745
Predicted: Nice to meet you , Roseann ! EOS EOS	Proba: -0.01785992830991745
Predicted: Nice to meet you , Roseann ! EOS EOS	Proba: -0.01785992830991745

Source: <S> My friends call me Darrin Palsgrove EOS PAD PAD
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , Darrin Palsgrove ! EOS	Proba: -0.0024111715611070395
Predicted: Nice to meet you , Darrin ! EOS EOS	Proba: -6.734346866607666
Predicted: Nice to meet you , Darrin ! EOS EOS	Proba: -6.734346866607666

S

In [10]:
pred, pred_proba = model.predict(val.unbatch().take(50).batch(5))
for i, Xy in enumerate(val.unbatch().take(10)):
    X, y = Xy
    s = model.vocab.inverse_transform(X[0].numpy()[np.newaxis, :], "source")[0]
    t = model.vocab.inverse_transform(y[0].numpy()[np.newaxis, :], "target")[0]
    print(f"Source: {' '.join(s)}\nTarget: {' '.join(t)}\n")
    for j in range(3):
        p = idx2str(pred[i][j].numpy(), X[0].numpy())
        print(f"Predicted: {' '.join(p)}\tProba: {pred_proba[i][j]}")
    print("")

Source: <S> Please call me UNKNOWN UNKNOWN EOS PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , UNKNOWN UNKNOWN ! EOS	Proba: -0.07300668954849243
Predicted: Nice to meet you , UNKNOWN UNKNOWN UNKNOWN !	Proba: -3.0839779376983643
Predicted: Nice to meet you , UNKNOWN ! EOS EOS	Proba: -4.4598164558410645

Source: <S> Its Albert EOS PAD PAD PAD PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Albert ! EOS EOS	Proba: -0.007965894415974617
Predicted: Nice to meet you , Albert ! EOS EOS	Proba: -0.007965894415974617
Predicted: Nice to meet you , Albert ! EOS EOS	Proba: -0.007965894415974617

Source: <S> You can call me Tennille . EOS PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Tennille ! EOS EOS	Proba: -0.004344363231211901
Predicted: Nice to meet you , Tennille ! EOS EOS	Proba: -0.004344363231211901
Predicted: Nice to meet you , Tennille ! EOS EOS	Proba: -0.00