In [1]:
import os
os.chdir("/tf/src/examples/greetings")

from greetings import Dataset
import tensorflow as tf
import numpy as np
import logging
from greetings import GreetingModel
from greetings import cfg
logging.basicConfig(
        level=cfg.LOG_LVL,
        filename=cfg.LOG_FILENAME,
        format='%(message)s')

In [2]:
data = Dataset()

In [3]:
model = GreetingModel()

In [4]:
RNG_SEED = 11
to_gpu = tf.data.experimental.copy_to_device("/gpu:0")
train = data.train.shuffle(
    buffer_size=10000, seed=RNG_SEED, reshuffle_each_iteration=False)\
    .batch(128).apply(to_gpu)
val = data.test.batch(128).apply(to_gpu)
with tf.device("/gpu:0"):
    train = train.prefetch(3)
    val = val.prefetch(3)

In [5]:
X, y = next(val.as_numpy_iterator())
print("source idx from source vocab\n", X[0][:3])
print("\nsource idx from target vocab\n", X[1][:3])
print("\ntarget idx from target vocab\n", y[0][:3])
print("\ntarget idx from source vocab\n", y[1][:3])

source idx from source vocab
 [[   2   24    6    5    1    1    3    0    0    0]
 [   2   22 2239    3    0    0    0    0    0    0]
 [   2   26   27    6    5 2042    4    3    0    0]]

source idx from target vocab
 [[2 1 1 1 1 1 3 0 0 0]
 [2 1 1 3 0 0 0 0 0 0]
 [2 1 1 1 1 1 1 3 0 0]]

target idx from target vocab
 [[2 4 5 6 7 8 1 1 9 3]
 [2 4 5 6 7 8 1 9 3 0]
 [2 4 5 6 7 8 1 9 3 0]]

target idx from source vocab
 [[   2    1    1    1    1    8    1    1    1    3]
 [   2    1    1    1    1    8 2239    1    3    0]
 [   2    1    1    1    1    8 2042    1    3    0]]


In [6]:
model.fit(train, cfg.EPOCHS, cfg.MODEL_SAVE, val)

Epoch 1/5: 40it [00:16,  2.37it/s, train-loss=2.4375, val-loss=11.1625]
Epoch 2/5: 40it [00:02, 18.48it/s, train-loss=0.0248, val-loss=12.0321]
Epoch 3/5: 40it [00:02, 18.74it/s, train-loss=0.0104, val-loss=12.0904]
Epoch 4/5: 40it [00:02, 18.54it/s, train-loss=0.0053, val-loss=12.7375]
Epoch 5/5: 40it [00:02, 18.58it/s, train-loss=0.0020, val-loss=14.0248]


In [7]:
def idx2str(pred_y, X):
    ret = []
    vocab_len = model.vocab.get_vocab_size("target")
    for idx in pred_y:
        if idx < vocab_len:
            ret.append(model.vocab.get_token_text(idx, "target"))
        else:
            ret.append(model.vocab.get_token_text(X[idx-vocab_len], "source"))
    return ret

In [8]:
pred, pred_proba = model.predict(train)
for i, Xy in enumerate(train.unbatch()):
    X, y = Xy
    s = model.vocab.inverse_transform(X[0].numpy()[np.newaxis, :], "source")[0]
    t = model.vocab.inverse_transform(y[0].numpy()[np.newaxis, :], "target")[0]
    print(f"Source: {' '.join(s)}\nTarget: {' '.join(t)}\n")
    for j in range(3):
        p = idx2str(pred[i][j].numpy(), X[0].numpy())
        print(f"Predicted: {' '.join(p)}\tProba: {pred_proba[i][j]}")
    print("")

ce to meet you , Soledad ! EOS EOS EOS	Proba: -0.6988925933837891
Predicted: Nice to meet you , Soledad ! EOS EOS !	Proba: -0.7136460542678833
Predicted: Nice to meet you , Soledad ! EOS EOS Nice	Proba: -4.597904205322266

Source: <S> Call me Jordan EOS PAD PAD PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Jordan ! EOS EOS EOS	Proba: -0.25408777594566345
Predicted: Nice to meet you , Jordan ! EOS EOS !	Proba: -1.523960828781128
Predicted: Nice to meet you , Jordan ! EOS EOS Nice	Proba: -5.331370830535889

Source: <S> Hey call me Carlos . EOS PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Carlos ! EOS EOS EOS	Proba: -0.0724610909819603
Predicted: Nice to meet you , Carlos ! EOS EOS !	Proba: -2.724607467651367
Predicted: Nice to meet you , Carlos ! EOS EOS Nice	Proba: -5.808969974517822

Source: <S> Hi , I 'm Tiesha Ledue EOS PAD PAD
Target: <S> Nice to meet you , Tiesha UNKNOWN ! EOS

Predicted: N

In [9]:
pred, pred_proba = model.predict(val.unbatch().take(50).batch(5))
for i, Xy in enumerate(val.unbatch().take(50)):
    X, y = Xy
    s = model.vocab.inverse_transform(X[0].numpy()[np.newaxis, :], "source")[0]
    t = model.vocab.inverse_transform(y[0].numpy()[np.newaxis, :], "target")[0]
    print(f"Source: {' '.join(s)}\nTarget: {' '.join(t)}\n")
    for j in range(3):
        p = idx2str(pred[i][j].numpy(), X[0].numpy())
        print(f"Predicted: {' '.join(p)}\tProba: {pred_proba[i][j]}")
    print("")

Source: <S> Please call me UNKNOWN UNKNOWN EOS PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN UNKNOWN ! EOS

Predicted: Nice to meet you , UNKNOWN UNKNOWN ! EOS EOS	Proba: -0.029365282505750656
Predicted: Nice to meet you , UNKNOWN ! EOS EOS EOS	Proba: -3.6214375495910645
Predicted: Nice to meet you , UNKNOWN ! EOS EOS !	Proba: -6.426690101623535

Source: <S> Its Albert EOS PAD PAD PAD PAD PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Albert ! EOS EOS !	Proba: -0.34492751955986023
Predicted: Nice to meet you , Albert ! EOS EOS EOS	Proba: -1.3922357559204102
Predicted: Nice to meet you , Albert ! EOS EOS Nice	Proba: -3.34482741355896

Source: <S> You can call me Tennille . EOS PAD PAD
Target: <S> Nice to meet you , UNKNOWN ! EOS PAD

Predicted: Nice to meet you , Tennille ! EOS EOS EOS	Proba: -0.21208012104034424
Predicted: Nice to meet you , Tennille ! EOS EOS !	Proba: -1.7689634561538696
Predicted: Nice to meet you , Tennille ! EOS EOS Nic