<p><b>1. imports</b></p>

In [2]:
import pickle
import tensorflow as tf
import tensorflow_datasets as tfds

from transformer import model
from transformer.dataset import HyperParameters
from transformer.dataset import get_dataset, preprocess_sentence

<p><b>2.1 Define a custom learning-rate scheduler.</b></p>

In [3]:
class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model: int, warmup_steps: int = 4000):
        super(LearningRateSchedule, self).__init__()
        self.d_model = tf.cast(d_model, dtype=tf.float32)
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * self.warmup_steps**-1.5
        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

<p><b>2.2 Define hyper-parameters.</b></p>

In [4]:
hparams = HyperParameters(
    max_sample=50000,
    max_length=40,
    batch_size=32,
    num_layers=2,
    num_units=512,
    d_model=256,
    num_heads=8,
    dropout=0.1,
    activation="relu",
)

for property, value in vars(hparams).items():
    print(property, ":", value)

max_sample : 50000
max_length : 40
batch_size : 32
num_layers : 2
num_units : 512
d_model : 256
num_heads : 8
dropout : 0.1
activation : relu


<p><b>3. Load dataset and tokenizer.</b></p>

In [5]:
# load dataset and tokenizer(then save tokenizer too)
dataset, tokenizer = get_dataset(hparams)

# define a function to load pretrained tokenizer
def load_tokenizer(path="./transformer/tokenizer"):
    tokenizer = tfds.deprecated.text.SubwordTextEncoder.load_from_file(path)
    return tokenizer

Downloading data from http://www.cs.cornell.edu/~cristian/data/cornell_movie_dialogs_corpus.zip
loading conversations ... 


 22%|██▏       | 18638/83097 [00:03<00:12, 5116.37it/s]


initializing tokenizer ...
tokenization ... 


50000it [00:03, 13671.93it/s]


<p><b>4. Define loss, optimizer and metric(s).</b></p>

In [6]:
optimizer = tf.keras.optimizers.Adam(
        LearningRateSchedule(d_model=hparams.d_model), beta_1=0.9, beta_2=0.98, epsilon=1e-9
)

cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction="none"
)

def loss_function(y_true, y_pred):
    y_true = tf.reshape(y_true, shape=(-1, hparams.max_length - 1))
    loss = cross_entropy(y_true, y_pred)
    mask = tf.cast(tf.not_equal(y_true, 0), dtype=tf.float32)
    loss = tf.multiply(loss, mask)
    return tf.reduce_mean(loss)

def accuracy(y_true, y_pred):
    y_true = tf.reshape(y_true, shape=(-1, hparams.max_length - 1))
    return tf.keras.metrics.sparse_categorical_accuracy(y_true, y_pred)

<p><b>4. Build and train the model.</b></p>

In [9]:
# build new model
tf.keras.utils.set_random_seed(1234)
chatbot_model = model.transformer(hparams)

# load pretrained model : for fine tuning
#chatbot_model = tf.keras.models.load_model(
#    "model.h5",
#    custom_objects={
#            "PositionalEncoding": model.PositionalEncoding,
#            "MultiHeadAttentionLayer": model.MultiHeadAttentionLayer,
#        },
#    compile=False
#)
print("-model built")

-model built


In [12]:
chatbot_model.compile(optimizer, loss=loss_function, metrics=[accuracy])

print("train start")
history = chatbot_model.fit(dataset, epochs=20)

train start
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<p><b>5. Save the model.</b></p>

In [13]:
saving_model = "model.h5"
hparams.save_model = saving_model
print(f"-saving Model to: {saving_model}")

tf.keras.models.save_model(
        chatbot_model, filepath=hparams.save_model, include_optimizer=False)


with open('hyper-parameters.pkl', 'wb') as hp:
    pickle.dump(hparams, hp, pickle.HIGHEST_PROTOCOL)

-saving Model to: model.h5


<p><b>6. Evaluate the Model.</b></p>

In [14]:
def inference(hp, model, tokenizer, sentence):
    sentence = preprocess_sentence(sentence)
    sentence = tf.expand_dims(
        hparams.start_token + tokenizer.encode(sentence) + hparams.end_token, axis=0)
    
    output = tf.expand_dims(hparams.start_token, 0)

    for i in range(hparams.max_length):
        predictions = model(inputs=[sentence, output], training=False)
        predictions = predictions[:, -1:, :]
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

        if tf.equal(predicted_id, hparams.end_token[0]):
            break
        
        output = tf.concat([output, predicted_id], axis=-1)
    
    return tf.squeeze(output, axis=0)

def generate_response(hp, model, tokenizer, sentence):
    prediction = inference(hp, model, tokenizer, sentence)
    predicted_sentence = tokenizer.decode(
        [i for i in prediction if i < tokenizer.vocab_size]
    )
    return predicted_sentence

def evaluate(hparams, model, tokenizer, inputs):
    print("-evaluating ...")
    response = "what are you going to do?"

    for user_sentnece in inputs:
        if user_sentnece != None:
            print(f"\nInput: {user_sentnece}")
            response = generate_response(hparams, model, tokenizer, user_sentnece)
            print(f"Output: {response}")
        
        else:
            print(f"\nInput: {response}")
            response = generate_response(hparams, model, tokenizer, response)
            print(f"Output: {response}")

In [15]:
# loading saved componentes (model, hyper-parameters, tokenizer)
with open('hyper-parameters.pkl', 'rb') as hp:
    hparams_loaded = pickle.load(hp)

tokenizer_loaded = load_tokenizer()

chatbot = tf.keras.models.load_model(
    hparams_loaded.save_model,
    custom_objects={
            "PositionalEncoding": model.PositionalEncoding,
            "MultiHeadAttentionLayer": model.MultiHeadAttentionLayer,
        },
    compile=False
)

sentences = [
    "Hello, my name is Omid . what about you?",
    "how was your day",
    "How old are you?",
    "where have you been",
    "do you like pizza?",
    "my favourite color is blue",
    None, None, None
]

In [16]:
evaluate(hparams_loaded, chatbot, tokenizer_loaded, sentences)

-evaluating ...

Input: Hello, my name is Omid . what about you?
Output: i am getting out of here . if you do not mind .

Input: how was your day
Output: it is not a deal .

Input: How old are you?
Output: i am fine .

Input: where have you been
Output: i am going to see him .

Input: do you like pizza?
Output: yes , sir .

Input: my favourite color is blue
Output: i am not sure . i was so sure

Input: i am not sure . i was so sure
Output: i am sorry . i just wanted to talk to you .

Input: i am sorry . i just wanted to talk to you .
Output: i am sure it is okay .

Input: i am sure it is okay .
Output: you are a policeman .
