In [1]:
# This notebook is intented to test, some of the
# results validation of T5 model 

import sys
sys.path.append("/home/sidhu/Projects/tf-transformers/src/")

In [2]:
import tensorflow as tf
from tf_transformers.models import T5Model

In [3]:
# Check TF conversion

!rm -rf /tmp/tf_transformers_cache/t5-base

model_name = 't5-base'
model, config = T5Model.get_model(model_name=model_name, convert_fn_type='tf')

INFO:absl:Successful: Converted model using TF HF
INFO:absl:Successful: Saved model at /tmp/tf_transformers_cache/t5-base/ckpt-1
INFO:absl:Successful: Asserted and Converted `t5-base` from HF and saved it in cache folder /tmp/tf_transformers_cache/t5-base


In [4]:
# Chec
!rm -rf /tmp/tf_transformers_cache/t5-base

model_name = 't5-base'
model, config = T5Model.get_model(model_name=model_name, convert_fn_type='pt')

INFO:absl:Successful: Converted model using PT HF
INFO:absl:Successful: Saved model at /tmp/tf_transformers_cache/t5-base/ckpt-1
INFO:absl:Successful: Asserted and Converted `t5-base` from HF and saved it in cache folder /tmp/tf_transformers_cache/t5-base


In [4]:
import numpy as np
from transformers import T5Tokenizer

tokenizer = T5Tokenizer.from_pretrained(model_name)

In [5]:
# T5 text generation without caching
text = "summarize: studies have shown that owning a dog is good for you"

inputs_hf = tokenizer(text, return_tensors='tf')
inputs = {}
inputs['encoder_input_ids'] = inputs_hf['input_ids']
inputs['encoder_input_mask'] = inputs_hf['attention_mask']
inputs['decoder_input_ids']  = tf.constant([[0]])

predictions_non_auto_regressive = []
predictions_prob_non_auto_regressive = []

for i in range(10):
    outputs = model(inputs)
    predicted_ids = tf.cast(tf.expand_dims(tf.argmax(outputs["last_token_logits"], axis=1), 1), tf.int32)
    inputs["decoder_input_ids"] = tf.concat([inputs["decoder_input_ids"], predicted_ids], axis=1)
    predictions_non_auto_regressive.append(predicted_ids)
    predictions_prob_non_auto_regressive.append(
        tf.expand_dims(tf.reduce_max(outputs["last_token_logits"], axis=1), 1)
    )
predictions_non_auto_regressive = tf.concat(predictions_non_auto_regressive, axis=1)
predictions_prob_non_auto_regressive = tf.concat(predictions_prob_non_auto_regressive, axis=1)

# Text generation with cache
model, config = T5Model.get_model(model_name=model_name, convert_fn_type='pt', use_auto_regressive=True)

encoder_input_ids = inputs_hf['input_ids']
encoder_input_mask = inputs_hf['attention_mask']

batch_size = tf.shape(encoder_input_ids)[0]
seq_length = tf.shape(encoder_input_ids)[1]

decoder_input_ids  = tf.reshape([0] * batch_size, (batch_size,1))


encoder_hidden_dim = config['embedding_size']
num_hidden_layers  = config['num_hidden_layers']
num_attention_heads = config['num_attention_heads']
attention_head_size = config['attention_head_size']

encoder_hidden_states = tf.zeros((batch_size, seq_length, encoder_hidden_dim))

decoder_all_cache_key = tf.zeros((num_hidden_layers, 
                                  batch_size, 
                                  num_attention_heads, 
                                  seq_length, 
                                  attention_head_size))
decoder_all_cahce_value = tf.zeros((num_hidden_layers, 
                                  batch_size, 
                                  num_attention_heads, 
                                  seq_length, 
                                  attention_head_size))


inputs = {}
inputs['encoder_input_ids'] = encoder_input_ids
inputs['encoder_input_mask']= encoder_input_mask
inputs['decoder_input_ids'] = decoder_input_ids
inputs['encoder_hidden_states'] = encoder_hidden_states
inputs['decoder_all_cache_key'] = decoder_all_cache_key
inputs['decoder_all_cache_value'] = decoder_all_cahce_value

predictions_auto_regressive = []
predictions_prob_auto_regressive = []

for i in range(10):
    outputs = model(inputs)
    predicted_ids = tf.cast(tf.expand_dims(tf.argmax(outputs["last_token_logits"], axis=1), 1), tf.int32)
    inputs["decoder_input_ids"] = predicted_ids
    inputs["decoder_all_cache_key"] = outputs["decoder_all_cache_key"]
    inputs["decoder_all_cache_value"] = outputs["decoder_all_cache_value"]
    inputs["encoder_hidden_states"] = outputs["encoder_hidden_states"]
    predictions_auto_regressive.append(predicted_ids)
    predictions_prob_auto_regressive.append(
        tf.expand_dims(tf.reduce_max(outputs["last_token_logits"], axis=1), 1)
    )
predictions_auto_regressive = tf.concat(predictions_auto_regressive, axis=1)
predictions_prob_auto_regressive = tf.concat(predictions_prob_auto_regressive, axis=1)

#----------------------------------------------------------------------------------------#
tf.assert_equal(predictions_non_auto_regressive, predictions_auto_regressive)
assert(np.allclose(predictions_prob_non_auto_regressive.numpy(), 
            predictions_prob_auto_regressive.numpy()) == True)


Two checkpoint references resolved to different objects (<tf_transformers.models.encoder_decoder.encoder_decoder.EncoderDecoder object at 0x7f1840cc0c10> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x7f1840e7e340>).



Two checkpoint references resolved to different objects (<tf_transformers.models.encoder_decoder.encoder_decoder.EncoderDecoder object at 0x7f1840cc0c10> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x7f1840e7e340>).
INFO:absl:Successful: Model checkpoints matched and loaded from /tmp/tf_transformers_cache/t5-base


In [6]:
# Text generation using saved_model with TextDecoder

import tempfile
import shutil
from tf_transformers.text import TextDecoderSeq2Seq

text = "summarize: studies have shown that owning a dog is good for you"

saved_model_dir = tempfile.mkdtemp()
model.save_as_serialize_module(saved_model_dir, overwrite=True)

loaded   = tf.saved_model.load(saved_model_dir)
decoder  = TextDecoderSeq2Seq(
    model = loaded, 
    decoder_start_token_id = 0 # for t5
)

inputs_hf = tokenizer(text, return_tensors='tf')
inputs = {}
inputs['encoder_input_ids'] = inputs_hf['input_ids']
inputs['encoder_input_mask'] = inputs_hf['attention_mask']

decoder_results = decoder.decode(inputs, 
               mode='greedy', 
               max_iterations=10, 
               eos_id=-100)

expected_ids = [[[ 293,   53,    3,    9, 1782,   19,  207,   21,   25,    6]]]
assert(decoder_results['predicted_ids'].numpy().tolist() == expected_ids)



INFO:tensorflow:Assets written to: /tmp/tmpj4_wsrz6/assets


INFO:tensorflow:Assets written to: /tmp/tmpj4_wsrz6/assets


In [22]:
# Text generation using saved_model with TextDecoderSerializable

import tempfile
import shutil
#from tf_transformers.text import TextDecoderSerializableSeq2Seq


# loaded   = tf.saved_model.load(saved_model_dir)
decoder  = TextDecoderSerializableSeq2Seq(
    model = model,
    decoder_start_token_id = 0,
    max_iterations=10,
    mode="greedy",
    do_sample=False,
    eos_id=-100
)

# Save
decoder_model = decoder.get_model()
decoder_model.save_serialized(saved_model_dir, overwrite=True)

# Load
loaded_decoder   = tf.saved_model.load(saved_model_dir)
model_pb_decoder = loaded_decoder.signatures['serving_default']

text = "summarize: studies have shown that owning a dog is good for you"
inputs_hf = tokenizer(text, return_tensors='tf')
inputs = {}
inputs['encoder_input_ids'] = inputs_hf['input_ids']
inputs['encoder_input_mask'] = inputs_hf['attention_mask']


decoder_results_serialized = model_pb_decoder(**inputs)

np.allclose(decoder_results_serialized['predicted_ids'].numpy(), expected_ids)



INFO:tensorflow:Assets written to: /tmp/tmpj4_wsrz6/assets


INFO:tensorflow:Assets written to: /tmp/tmpj4_wsrz6/assets
