In [1]:
import os
os.chdir("/tf/src/examples/greetings")

from greetings import Dataset
import tensorflow as tf
import numpy as np
import logging
from greetings import GreetingModel
from greetings import cfg
logging.basicConfig(
        level=cfg.LOG_LVL,
        filename=cfg.LOG_FILENAME,
        format='%(message)s')

In [2]:
data = Dataset()

In [3]:
model = GreetingModel()

In [4]:
RNG_SEED = 11
to_gpu = tf.data.experimental.copy_to_device("/gpu:0")
train = data.train.shuffle(
    buffer_size=10000, seed=RNG_SEED, reshuffle_each_iteration=False)\
    .batch(128).apply(to_gpu)
val = data.test.batch(128).apply(to_gpu)
with tf.device("/gpu:0"):
    train = train.prefetch(3)
    val = val.prefetch(3)

In [5]:
X, y = next(val.as_numpy_iterator())
print("source idx from source vocab\n", X[0][:3])
print("\nsource idx from target vocab\n", X[1][:3])
print("\ntarget idx from target vocab\n", y[0][:3])
print("\ntarget idx from source vocab\n", y[1][:3])

source idx from source vocab
 [[   2   24    6    5    1    1    3    0    0    0]
 [   2   22 2239    3    0    0    0    0    0    0]
 [   2   26   27    6    5 2042    4    3    0    0]]

source idx from target vocab
 [[2 1 1 1 1 1 3 0 0 0]
 [2 1 1 3 0 0 0 0 0 0]
 [2 1 1 1 1 1 1 3 0 0]]

target idx from target vocab
 [[2 4 5 6 7 8 1 1 9 3]
 [2 4 5 6 7 8 1 9 3 0]
 [2 4 5 6 7 8 1 9 3 0]]

target idx from source vocab
 [[   2    1    1    1    1    8    1    1    1    3]
 [   2    1    1    1    1    8 2239    1    3    0]
 [   2    1    1    1    1    8 2042    1    3    0]]


In [6]:
model.fit(train, cfg.EPOCHS, cfg.MODEL_SAVE, val)

Epoch 1/5: 40it [00:24,  1.61it/s, train-loss=2.4523, train-bleu=0.908, train-bleu-smooth=0.908, val-loss=10.3067, val-bleu=0.963, val-bleu-smooth=0.963]
Epoch 2/5: 40it [00:07,  5.10it/s, train-loss=0.0225, train-bleu=0.999, train-bleu-smooth=0.999, val-loss=10.9067, val-bleu=0.992, val-bleu-smooth=0.992]
Epoch 3/5: 40it [00:07,  5.13it/s, train-loss=0.0097, train-bleu=1, train-bleu-smooth=1, val-loss=10.9314, val-bleu=0.992, val-bleu-smooth=0.992]
Epoch 4/5: 40it [00:07,  5.10it/s, train-loss=0.0034, train-bleu=1, train-bleu-smooth=1, val-loss=12.3868, val-bleu=0.999, val-bleu-smooth=0.999]
Epoch 5/5: 40it [00:07,  5.20it/s, train-loss=0.0013, train-bleu=1, train-bleu-smooth=1, val-loss=13.0729, val-bleu=0.999, val-bleu-smooth=0.999]


In [7]:
def idx2str(pred_y, X):
    ret = []
    vocab_len = model.vocab.get_vocab_size("target")
    for idx in pred_y:
        if idx < vocab_len:
            ret.append(model.vocab.get_token_text(idx, "target"))
        else:
            ret.append(model.vocab.get_token_text(X[idx-vocab_len], "source"))
    return ret

In [8]:
pred, pred_proba = model.predict(train)
for i, Xy in enumerate(train.unbatch().take(10)):
    X, y = Xy
    s = model.vocab.inverse_transform(X[0].numpy()[np.newaxis, :], "source")[0]
    t = model.vocab.inverse_transform(y[0].numpy()[np.newaxis, :], "target")[0]
    print(f"Source: {' '.join(s)}\nTarget: {' '.join(t)}\n")
    for j in range(3):
        p = idx2str(pred[i][j].numpy(), X[0].numpy())
        print(f"Predicted: {' '.join(p)}\tProba: {pred_proba[i][j]}")
    print("")

Source: <S> Hi there . It 's Nyla Wais . EOS
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , Nyla Wais ! EOS	Proba: -0.00021838594693690538
Predicted: Nice to you , Nyla Wais ! EOS EOS	Proba: -9.860431671142578
Predicted: Nice to meet , Nyla Wais ! EOS EOS	Proba: -10.3339204788208

Source: <S> Its Roseann , actually . EOS PAD PAD PAD
Target: <S> Nice to meet you , Roseann ! EOS PAD

Predicted: Nice to meet you , Roseann ! EOS EOS	Proba: -0.0016534019960090518
Predicted: Nice to meet you , Roseann Roseann ! EOS	Proba: -7.990521430969238
Predicted: Nice to meet you , Roseann Yuonne EOS EOS	Proba: -8.873027801513672

Source: <S> My friends call me Darrin Palsgrove EOS PAD PAD
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , Darrin Palsgrove ! EOS	Proba: -0.00036083016311749816
Predicted: Nice to meet you , Darrin Palsgrove Palsgrove !	Proba: -9.105873107910156
Predicted: Nice to you , Darrin Palsgrove ! EOS EOS	Proba: 

In [9]:
pred, pred_proba = model.predict(val.unbatch().take(50).batch(5))
for i, Xy in enumerate(val.unbatch().take(10)):
    X, y = Xy
    s = model.vocab.inverse_transform(X[0].numpy()[np.newaxis, :], "source")[0]
    t = model.vocab.inverse_transform(y[0].numpy()[np.newaxis, :], "target")[0]
    print(f"Source: {' '.join(s)}\nTarget: {' '.join(t)}\n")
    for j in range(3):
        p = idx2str(pred[i][j].numpy(), X[0].numpy())
        print(f"Predicted: {' '.join(p)}\tProba: {pred_proba[i][j]}")
    print("")

Source: <S> Please call me UNKNOWN UNKNOWN EOS PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , UNKNOWN UNKNOWN ! EOS	Proba: -0.012995580211281776
Predicted: Nice to meet you , UNKNOWN ! EOS EOS	Proba: -4.3874640464782715
Predicted: Nice to meet you , me UNKNOWN ! EOS	Proba: -8.57729434967041

Source: <S> Its Albert EOS PAD PAD PAD PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Albert ! EOS EOS	Proba: -0.0006339882384054363
Predicted: Nice to meet you , Albert ! EOS !	Proba: -7.825721263885498
Predicted: Nice meet you , Albert ! EOS EOS EOS	Proba: -9.978647232055664

Source: <S> You can call me Tennille . EOS PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Tennille ! EOS EOS	Proba: -0.0008330679265782237
Predicted: Nice to meet you , Tennille Tennille ! EOS	Proba: -7.635164260864258
Predicted: Nice to meet you , Tennille . ! EOS	Proba: -9.008454322814941

