In [1]:
import os
os.chdir("/tf/src/examples/greetings")

from greetings import Dataset
import tensorflow as tf
import numpy as np
import logging
from greetings import GreetingModel
from greetings import cfg
logging.basicConfig(
        level=cfg.LOG_LVL,
        filename=cfg.LOG_FILENAME,
        format='%(message)s')

In [2]:
data = Dataset()

In [3]:
model = GreetingModel()

In [4]:
RNG_SEED = 11
to_gpu = tf.data.experimental.copy_to_device("/gpu:0")
train = data.train.shuffle(
    buffer_size=10000, seed=RNG_SEED, reshuffle_each_iteration=False)\
    .batch(128).apply(to_gpu)
val = data.test.batch(128).apply(to_gpu)
with tf.device("/gpu:0"):
    train = train.prefetch(3)
    val = val.prefetch(3)

In [5]:
X, y = next(val.as_numpy_iterator())
print("source idx from source vocab\n", X[0][:3])
print("\nsource idx from target vocab\n", X[1][:3])
print("\ntarget idx from target vocab\n", y[0][:3])
print("\ntarget idx from source vocab\n", y[1][:3])

source idx from source vocab
 [[   2   24    6    5    1    1    3    0    0    0]
 [   2   22 2239    3    0    0    0    0    0    0]
 [   2   26   27    6    5 2042    4    3    0    0]]

source idx from target vocab
 [[2 1 1 1 1 1 3 0 0 0]
 [2 1 1 3 0 0 0 0 0 0]
 [2 1 1 1 1 1 1 3 0 0]]

target idx from target vocab
 [[2 4 5 6 7 8 1 1 9 3]
 [2 4 5 6 7 8 1 9 3 0]
 [2 4 5 6 7 8 1 9 3 0]]

target idx from source vocab
 [[   2    1    1    1    1    8    1    1    1    3]
 [   2    1    1    1    1    8 2239    1    3    0]
 [   2    1    1    1    1    8 2042    1    3    0]]


In [6]:
model.fit(train, cfg.EPOCHS, cfg.MODEL_SAVE, val)

Epoch 1/5: 40it [00:25,  1.56it/s, train-loss=2.5483, train-bleu=0.897, train-bleu-smooth=0.897, val-loss=10.0835, val-bleu=0.94, val-bleu-smooth=0.94]
Epoch 2/5: 40it [00:07,  5.09it/s, train-loss=0.0240, train-bleu=0.999, train-bleu-smooth=0.999, val-loss=11.1784, val-bleu=0.976, val-bleu-smooth=0.976]
Epoch 3/5: 40it [00:07,  5.13it/s, train-loss=0.0091, train-bleu=1, train-bleu-smooth=1, val-loss=11.9602, val-bleu=0.97, val-bleu-smooth=0.97]
Epoch 4/5: 40it [00:07,  5.19it/s, train-loss=0.0035, train-bleu=1, train-bleu-smooth=1, val-loss=12.1264, val-bleu=0.991, val-bleu-smooth=0.991]
Epoch 5/5: 40it [00:07,  5.19it/s, train-loss=0.0018, train-bleu=1, train-bleu-smooth=1, val-loss=12.3812, val-bleu=0.999, val-bleu-smooth=0.999]


In [10]:
def idx2str(pred_y, X):
    ret = []
    vocab_len = model.vocab.get_vocab_size("target")
    for idx in pred_y:
        if idx < vocab_len:
            ret.append(model.vocab.get_token_text(idx, "target"))
        else:
            ret.append(model.vocab.get_token_text(X[idx-vocab_len], "source"))
    return ret

In [11]:
pred, pred_proba = model.predict(train)
for i, Xy in enumerate(train.unbatch().take(10)):
    X, y = Xy
    s = model.vocab.inverse_transform(X[0].numpy()[np.newaxis, :], "source")[0]
    t = model.vocab.inverse_transform(y[0].numpy()[np.newaxis, :], "target")[0]
    print(f"Source: {' '.join(s)}\nTarget: {' '.join(t)}\n")
    for j in range(3):
        p = idx2str(pred[i][j].numpy(), X[0].numpy())
        print(f"Predicted: {' '.join(p)}\tProba: {pred_proba[i][j]}")
    print("")

Source: <S> Hi there . It 's Nyla Wais . EOS
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , Nyla Wais ! EOS	Proba: -0.001263560843653977
Predicted: Nice to meet you Nyla Wais ! EOS EOS	Proba: -6.898665904998779
Predicted: Nice to meet , Nyla Wais ! EOS EOS	Proba: -9.572776794433594

Source: <S> Its Roseann , actually . EOS PAD PAD PAD
Target: <S> Nice to meet you , Roseann ! EOS PAD

Predicted: Nice to meet you , Roseann ! EOS EOS	Proba: -0.00297151948325336
Predicted: Nice to meet you , Roseann ! EOS !	Proba: -6.465734481811523
Predicted: Nice to meet you , Roseann ! EOS ,	Proba: -8.400186538696289

Source: <S> My friends call me Darrin Palsgrove EOS PAD PAD
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , Darrin Palsgrove ! EOS	Proba: -0.0003844376187771559
Predicted: Nice to meet you , Darrin ! EOS EOS	Proba: -9.637002944946289
Predicted: Nice to meet , Darrin Palsgrove ! EOS EOS	Proba: -9.655153274536133

Sourc

In [12]:
pred, pred_proba = model.predict(val.unbatch().take(50).batch(5))
for i, Xy in enumerate(val.unbatch().take(10)):
    X, y = Xy
    s = model.vocab.inverse_transform(X[0].numpy()[np.newaxis, :], "source")[0]
    t = model.vocab.inverse_transform(y[0].numpy()[np.newaxis, :], "target")[0]
    print(f"Source: {' '.join(s)}\nTarget: {' '.join(t)}\n")
    for j in range(3):
        p = idx2str(pred[i][j].numpy(), X[0].numpy())
        print(f"Predicted: {' '.join(p)}\tProba: {pred_proba[i][j]}")
    print("")

Source: <S> Please call me UNKNOWN UNKNOWN EOS PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , UNKNOWN UNKNOWN ! EOS	Proba: -0.04744230955839157
Predicted: Nice to meet you , UNKNOWN ! EOS EOS	Proba: -3.6490304470062256
Predicted: Nice to meet you , me UNKNOWN ! EOS	Proba: -3.9467403888702393

Source: <S> Its Albert EOS PAD PAD PAD PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Albert ! EOS EOS	Proba: -0.0012906715273857117
Predicted: Nice to meet you , Albert ! EOS !	Proba: -7.297268867492676
Predicted: Nice to meet you , Albert ! EOS ,	Proba: -9.085673332214355

Source: <S> You can call me Tennille . EOS PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Tennille ! EOS EOS	Proba: -0.0012629377888515592
Predicted: Nice to meet you , Tennille Tennille ! EOS	Proba: -7.73637056350708
Predicted: Nice to meet you , Tennille . ! EOS	Proba: -7.7923712730407715

So