In [1]:
# This notebook is intented to test, some of the
# results validation of T5 model 



In [2]:
import tensorflow as tf
import numpy as np
from tf_transformers.models import  T5Model
from transformers import T5Tokenizer

In [3]:
# Globals
model_name = 't5-small'
DECODER_START_ID = 0
DECODER_EOS_ID = 1
tokenizer = T5Tokenizer.from_pretrained(model_name)

### 1. Check TF Conversion

In [4]:
# Check TF conversion

# !rm -rf /tmp/tf_transformers_cache/t5-base

model = T5Model.from_pretrained(model_name=model_name, convert_fn_type='tf')

You are using a model of type t5 to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
INFO:absl:Successful ✅: Converted model using TF HF
INFO:absl:Successful: Saved model at /tmp/tf_transformers_cache/t5-small/ckpt-1
INFO:absl:Successful ✅: Asserted and Converted `t5-small` from HF and saved it in cache folder /tmp/tf_transformers_cache/t5-small


### 2.Check PT Conversion

In [5]:
# Check PT conversion

# !rm -rf /tmp/tf_transformers_cache/t5-base

model = T5Model.from_pretrained(model_name=model_name, convert_fn_type='pt')

You are using a model of type t5 to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
INFO:absl:Successful: Model checkpoints matched and loaded from /tmp/tf_transformers_cache/t5-small


In [6]:
# Load model auto regressive
model_ar, config = T5Model.from_pretrained(model_name=model_name,
                                       decoder_kwargs={'use_auto_regressive': True},
                                       return_config=True)

You are using a model of type t5 to instantiate a model of type . This is not supported for all configurations of models and can yield errors.



Two checkpoint references resolved to different objects (<tf_transformers.models.encoder_decoder.encoder_decoder.EncoderDecoder object at 0x7fd6af666430> and <keras.engine.input_layer.InputLayer object at 0x7fd6af4bf3a0>).



Two checkpoint references resolved to different objects (<tf_transformers.models.encoder_decoder.encoder_decoder.EncoderDecoder object at 0x7fd6af666430> and <keras.engine.input_layer.InputLayer object at 0x7fd6af4bf3a0>).
INFO:absl:Successful: Model checkpoints matched and loaded from /tmp/tf_transformers_cache/t5-small


### 3. Test T5 with and without caching (Greedy)

In [9]:
# T5 text generation without caching
text = "translate English to German: The house is wonderful and we wish to be here :)"

# Create inputs
inputs_hf = tokenizer(text, return_tensors='tf')
inputs = {}
inputs['encoder_input_ids'] = inputs_hf['input_ids']
inputs['encoder_input_mask'] = inputs_hf['attention_mask']
inputs['decoder_input_ids']  = tf.constant([[DECODER_START_ID]])

# Iterate
predictions_non_auto_regressive = []
predictions_prob_non_auto_regressive = []

for i in range(13):
    outputs = model(inputs)
    predicted_ids = tf.cast(tf.expand_dims(tf.argmax(outputs["last_token_logits"], axis=1), 1), tf.int32)
    inputs["decoder_input_ids"] = tf.concat([inputs["decoder_input_ids"], predicted_ids], axis=1)
    predictions_non_auto_regressive.append(predicted_ids)
    predictions_prob_non_auto_regressive.append(
        tf.expand_dims(tf.reduce_max(outputs["last_token_logits"], axis=1), 1)
    )
predictions_non_auto_regressive = tf.concat(predictions_non_auto_regressive, axis=1)
predictions_prob_non_auto_regressive = tf.concat(predictions_prob_non_auto_regressive, axis=1)

#-------------------------------------------------------------------------------------------------------------#
# Text generation with cache
encoder_hidden_dim = config['embedding_size']
num_hidden_layers  = config['num_hidden_layers']
num_attention_heads = config['num_attention_heads']
attention_head_size = config['attention_head_size']

# Inputs
inputs_hf = tokenizer(text, return_tensors='tf')
encoder_input_ids = inputs_hf['input_ids']
encoder_input_mask = inputs_hf['attention_mask']

batch_size = tf.shape(encoder_input_ids)[0]
seq_length = tf.shape(encoder_input_ids)[1]

decoder_input_ids  = tf.reshape([0] * batch_size.numpy(), (batch_size,1))

encoder_hidden_states = tf.zeros((batch_size, seq_length, encoder_hidden_dim))
decoder_all_cache_key = tf.zeros((num_hidden_layers, 
                                  batch_size, 
                                  num_attention_heads, 
                                  seq_length, 
                                  attention_head_size))
decoder_all_cahce_value = tf.zeros((num_hidden_layers, 
                                  batch_size, 
                                  num_attention_heads, 
                                  seq_length, 
                                  attention_head_size))


inputs = {}
inputs['encoder_input_ids'] = encoder_input_ids
inputs['encoder_input_mask']= encoder_input_mask
inputs['decoder_input_ids'] = decoder_input_ids
inputs['encoder_hidden_states'] = encoder_hidden_states
inputs['decoder_all_cache_key'] = decoder_all_cache_key
inputs['decoder_all_cache_value'] = decoder_all_cahce_value

# Iterate
predictions_auto_regressive = []
predictions_prob_auto_regressive = []

for i in range(13):
    outputs = model_ar(inputs)
    predicted_ids = tf.cast(tf.expand_dims(tf.argmax(outputs["last_token_logits"], axis=1), 1), tf.int32)
    inputs["decoder_input_ids"] = predicted_ids
    inputs["decoder_all_cache_key"] = outputs["decoder_all_cache_key"]
    inputs["decoder_all_cache_value"] = outputs["decoder_all_cache_value"]
    inputs["encoder_hidden_states"] = outputs["encoder_hidden_states"]
    predictions_auto_regressive.append(predicted_ids)
    predictions_prob_auto_regressive.append(
        tf.expand_dims(tf.reduce_max(outputs["last_token_logits"], axis=1), 1)
    )
predictions_auto_regressive = tf.concat(predictions_auto_regressive, axis=1)
predictions_prob_auto_regressive = tf.concat(predictions_prob_auto_regressive, axis=1)

#----------------------------------------------------------------------------------------#
expected_outputs = [[  644,  4598,   229, 19250,    64,   558,  7805,  1382,  1110,
            3,    10,    61,     1]]
tf.assert_equal(predictions_non_auto_regressive, predictions_auto_regressive)
assert(np.allclose(predictions_prob_non_auto_regressive.numpy(), 
            predictions_prob_auto_regressive.numpy(),expected_outputs) == True)
print("Success")

Success


### 4. Test T5 with TextDecoder Saved Model (Greedy)

In [7]:
# Text generation using saved_model with TextDecoder

import tempfile
import shutil
from tf_transformers.text import TextDecoder
text = "translate English to German: The house is wonderful and we wish to be here :)"

# Save as saved model
saved_model_dir = tempfile.mkdtemp()
model_ar.save_as_serialize_module(saved_model_dir, overwrite=True)

# Load saved model
loaded   = tf.saved_model.load(saved_model_dir)
decoder  = TextDecoder(
    model = loaded, 
    decoder_start_token_id = DECODER_START_ID # for t5
)

# Inputs
inputs_hf = tokenizer(text, return_tensors='tf')
inputs = {}
inputs['encoder_input_ids'] = inputs_hf['input_ids']
inputs['encoder_input_mask'] = inputs_hf['attention_mask']

decoder_results = decoder.decode(inputs, 
               mode='greedy', 
               max_iterations=13, 
               eos_id=1)

expected_outputs = [[  644,  4598,   229, 19250,    64,   558,  7805,  1382,  1110,
            3,    10,    61,     1]]
assert(decoder_results['predicted_ids'].numpy().tolist()[0] == expected_outputs)



INFO:tensorflow:Assets written to: /tmp/tmpxvbn3gk2/assets


INFO:tensorflow:Assets written to: /tmp/tmpxvbn3gk2/assets


### 5. Test T5 with TextDecoder Saved Model (Beam)

In [8]:
# Beam check
decoder_results = decoder.decode(inputs, 
               mode='beam',
               num_beams=3,
               max_iterations=13, 
               eos_id=1)
top_prediction = decoder_results['predicted_ids'].numpy().tolist()[0][0]
assert([top_prediction] == expected_outputs)

### 6. Test T5 with TextDecoder Saved Model (Top K top P)

In [9]:
decoder_results = decoder.decode(inputs, 
               mode='top_k_top_p',
               num_return_sequences=1,
                                 top_k=100,
                                 top_p=0.6,
               max_iterations=13, 
               eos_id=1)

### 7. Test T5 with TextDecoderSerializable (Greedy)


In [24]:
# Text generation using saved_model with TextDecoderSerializable

import tempfile
import shutil
from tf_transformers.text import TextDecoderSerializable

# loaded   = tf.saved_model.load(saved_model_dir)
decoder  = TextDecoderSerializable(
    model = model_ar,
    decoder_start_token_id = DECODER_START_ID,
    max_iterations=15,
    mode="greedy",
    do_sample=False,
    eos_id=DECODER_EOS_ID
)

# Save
decoder_model = decoder.get_model()
decoder_model.save_serialized(saved_model_dir, overwrite=True)

# Load
loaded_decoder   = tf.saved_model.load(saved_model_dir)
model_pb_decoder = loaded_decoder.signatures['serving_default']

text = "translate English to German: The house is wonderful and we wish to be here :)"
inputs_hf = tokenizer(text, return_tensors='tf')
inputs = {}
inputs['encoder_input_ids'] = inputs_hf['input_ids']
inputs['encoder_input_mask'] = inputs_hf['attention_mask']


decoder_results_serialized = model_pb_decoder(**inputs)
expected_outputs = [[  644,  4598,   229, 19250,    64,   558,  7805,  1382,  1110,
            3,    10,    61,     1]]
assert(decoder_results_serialized['predicted_ids'].numpy().tolist()[0] == expected_outputs)



INFO:tensorflow:Assets written to: /tmp/tmpxvbn3gk2/assets


INFO:tensorflow:Assets written to: /tmp/tmpxvbn3gk2/assets


### 8. Test T5 with TextDecoderSerializable (Beam)


In [None]:
# Text generation using saved_model with TextDecoderSerializable

import tempfile
import shutil
from tf_transformers.text import TextDecoderSerializable

# loaded   = tf.saved_model.load(saved_model_dir)
decoder  = TextDecoderSerializable(
    model = model_ar,
    decoder_start_token_id = DECODER_START_ID,
    max_iterations=15,
    num_beams=3,
    mode="beam",
    do_sample=False,
    eos_id=DECODER_EOS_ID
)

# Save
decoder_model = decoder.get_model()
decoder_model.save_serialized(saved_model_dir, overwrite=True)

# Load
loaded_decoder   = tf.saved_model.load(saved_model_dir)
model_pb_decoder = loaded_decoder.signatures['serving_default']

text = "translate English to German: The house is wonderful and we wish to be here :)"
inputs_hf = tokenizer(text, return_tensors='tf')
inputs = {}
inputs['encoder_input_ids'] = inputs_hf['input_ids']
inputs['encoder_input_mask'] = inputs_hf['attention_mask']


decoder_results_serialized = model_pb_decoder(**inputs)
top_prediction = decoder_results_serialized['predicted_ids'].numpy().tolist()[0][0]
assert([top_prediction] == expected_outputs)


### 9. Test T5 with TextDecoderSerializable (Top K top P)


In [31]:
import tempfile
import shutil
from tf_transformers.text import TextDecoderSerializable

# loaded   = tf.saved_model.load(saved_model_dir)
decoder  = TextDecoderSerializable(
    model = model_ar,
    decoder_start_token_id = DECODER_START_ID,
    max_iterations=15,
    top_k=100,
    top_p=0.7,
    mode="top_k_top_p",
    do_sample=False,
    eos_id=DECODER_EOS_ID
)

# Save
decoder_model = decoder.get_model()
decoder_model.save_serialized(saved_model_dir, overwrite=True)

# Load
loaded_decoder   = tf.saved_model.load(saved_model_dir)
model_pb_decoder = loaded_decoder.signatures['serving_default']

text = "translate English to German: The house is wonderful and we wish to be here :)"
inputs_hf = tokenizer(text, return_tensors='tf')
inputs = {}
inputs['encoder_input_ids'] = inputs_hf['input_ids']
inputs['encoder_input_mask'] = inputs_hf['attention_mask']


decoder_results_serialized = model_pb_decoder(**inputs)



INFO:tensorflow:Assets written to: /tmp/tmpxvbn3gk2/assets


INFO:tensorflow:Assets written to: /tmp/tmpxvbn3gk2/assets


### 10. Test T5 lite


In [None]:
model = Model.from_pretrained(
    model_name=MODEL_NAME,
    convert_fn_type='tf',
    encoder_kwargs={'batch_size': 1, 'sequence_length': 32},
    decoder_kwargs={'batch_size': 1, 'sequence_length': 32},
)

tempdir = tempfile.mkdtemp()
model.save_serialized(tempdir, overwrite=True)

converter = tf.lite.TFLiteConverter.from_saved_model("{}".format(tempdir))  # path to the SavedModel directory
converter.experimental_new_converter = True

tflite_model = converter.convert()
open("{}/converted_model.tflite".format(tempdir), "wb").write(tflite_model)

# Load the TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="{}/converted_model.tflite".format(tempdir))
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Get result
# encoder input_ids
interpreter.set_tensor(
    input_details[0]['index'],
    tf.random.uniform(input_details[0]['shape'], minval=0, maxval=100, dtype=tf.int32),
)
# input_mask
interpreter.set_tensor(input_details[1]['index'], tf.ones(input_details[1]['shape'], dtype=tf.int32))

# decoder input ids
interpreter.set_tensor(
    input_details[2]['index'],
    tf.random.uniform(input_details[2]['shape'], minval=0, maxval=100, dtype=tf.int32),
)
interpreter.invoke()
tflite_output = interpreter.get_tensor(output_details[-1]['index'])

tf.debugging.assert_equal(tflite_output.shape, (1, 32, 32128))
logging.info("Test: TFlite Conversion. ✅")
shutil.rmtree(tempdir)