In [1]:
import tensorflow as tf

from tf_transformers.models import T5Encoder
from tf_transformers.models import EncoderDecoder

from tf_transformers.core import LegacyModule

from tf_transformers.utils import convert_t5_hf_to_tf_transformers
from tf_transformers.data import pad_dataset

from tf_transformers.text import TextDecoderSeq2Seq
from tf_transformers.text import TextDecoderSerializableSeq2Seq

import json
import time
import os

# Load tf_transformers Model

Configs are in the `model_configs` folder in the root of the repo.


We will be using `convert_albert_hf_to_tf_transformers` function.

Always use `is_training=False` to load the model and pass this model for conversion.

Do not enablle `pipeline_mode='auto-regressive` while converting. Because, variable name

differs due to `tf.cond` usage. 

# Steps:

1. Load a model using **`is_training=False`**

2. Convert it using conversion functions from `tf_transformers.utils`

3. Save the `checkpoint` .

4. For auto-regressive tasks (text generation) use **`pipeline_mode='auto-regressive`**

   along with **`is_training=False`** and load from the checkpoint.

### 1. Default Way of loading a model

In [2]:
from tf_transformers.models import T5Model
model = T5Model(model_name='t5-small')

AttributeError: 'T5Encoder' object has no attribute 'bidirectional'

# T5 Specifics
1. T5 is an **Seq2Seq ( Encoder Decoder ) Model** . It has an encoder part and decoder part.

2. In tf_transformers, you can convert any **Encoder to Decoder** with few keyword arguments (`is_decoder=True,`).

In [4]:
# Load tf_transformers model
# Most config we will be providing

# Default configs for the model

model_config_dir = '/Users/PRVATE/Documents/tf_transformers/model_configs/'
model_name = 't5_small'
config_location = os.path.join(model_config_dir, model_name, 'config.json')
config = json.load(open(config_location))

# Always do this



# Encoder

config["bidirectional"] = True
config["mask_mode"] = "user_defined"
encoder_layer = T5Encoder(
    config=config, mask_mode=config["mask_mode"], is_training=False, name="t5_encoder"
)


# Decoder

# T5 needs bidirectional = False
# Decoder mask mode has to be causal (auto-regressive)
# encoder_embedding_layer (share embedding from encoder)
config["bidirectional"] = False
config["mask_mode"] = "causal"
decoder_layer = T5Encoder(
    config=config,
    name="t5_decoder",
    mask_mode=config["mask_mode"],
    is_decoder=True,
    is_training=False,
    share_encoder_embeddings=True,
    encoder_embedding_layer=encoder_layer._embedding_layer,
)

INFO:absl:We are overwriding `is_training` is False to `is_training` to                     True with `use_dropout` is False, no effects on your inference pipeline
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:input_mask ---> Tensor("input_mask:0", shape=(None, None), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:We are overwriding `is_training` is False to `is_training` to                     True with `use_dropout` is False, no effects on your inference pipeline
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("decoder_input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:encoder_hidden_states ---> Tensor("encoder_hidden_states:0", shape=(None, None, 512), dtype=float32)
INFO:absl:decoder_encoder_mask ---> Tensor("decoder_encoder_mask:0", shape=(None, None, None), dtype=float32)
INFO:absl:Initialized Variables


# Create T5 (Encoder Decoder) model
We will use above initialized (randomly) encoder and decoder layer and pass it to `EncoderDecoder` API

**Note: If you want to use EncoderDecoder, for text-generation tasks, make sure `decoder_layer` must
have `is_training=False` and `pipeline_mode='auto-regressive'` enabled. 
After that, set `is_training=False` in `EncoderDecoder` model**

In [5]:
# Train mode (Keras Layer/Legacy Layer)

enc_dec_model = EncoderDecoder(
    encoder=encoder_layer,
    decoder=decoder_layer,
    is_training=True,
    name="t5_small",
    use_dropout=False,
)

enc_dec_model = enc_dec_model.get_model()

INFO:absl:Inputs -->
INFO:absl:encoder_input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:encoder_input_mask ---> Tensor("input_mask:0", shape=(None, None), dtype=int32)
INFO:absl:decoder_input_ids ---> Tensor("decoder_input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:encoder_input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:encoder_input_mask ---> Tensor("input_mask:0", shape=(None, None), dtype=int32)
INFO:absl:decoder_input_ids ---> Tensor("decoder_input_ids:0", shape=(None, None), dtype=int32)


In [6]:
# Convert
convert_t5_hf_to_tf_transformers(model_hf, enc_dec_model, config)

INFO:absl:Deleteing huggingface model for saving memory
INFO:absl:Done assigning ENCODER variables weights 51
INFO:absl:Deleteing huggingface model for saving memory
INFO:absl:Done assigning DECODER variables weights 81


# Save checkpoints

In [7]:
# If you want to save the model as checkpoints

checkpoint_dir = 'model_ckpt'
checkpoint = tf.train.Checkpoint(model=enc_dec_model)
manager = tf.train.CheckpointManager(
    checkpoint, directory=checkpoint_dir, max_to_keep=1
)
manager.save()
print("Saved at {}".format(manager.latest_checkpoint))

Saved at model_ckpt/ckpt-1


# Check for reference

Have a look at `tf_transformers/extra/*.py` for reference values, to make sure model

has loaded correctly

In [8]:
# Please have a look at tf_transformers/extra/*.py for reference values

inputs_sample = {
    "encoder_input_ids": tf.constant([[8774, 6, 82, 1782, 19, 5295]]),
    "encoder_input_mask": tf.constant([[1, 1, 1, 1, 1, 1]]),
    "decoder_input_ids": tf.constant([[8774, 6, 82, 1782, 19, 5295]]),
}

res = enc_dec_model(inputs_sample)
for k, v in res.items():
    print(k, tf.reduce_sum(v))

# Reference

# token_embeddings tf.Tensor(-126.0661, shape=(), dtype=float32)
# token_logits tf.Tensor(-104059470.0, shape=(), dtype=float32)
# last_token_logits tf.Tensor(-19631904.0, shape=(), dtype=float32)

token_embeddings tf.Tensor(-126.0661, shape=(), dtype=float32)
all_layer_token_embeddings tf.Tensor(-192505.6, shape=(), dtype=float32)
token_logits tf.Tensor(-104059470.0, shape=(), dtype=float32)
last_token_logits tf.Tensor(-19631904.0, shape=(), dtype=float32)


# Load Model for Auto-Regressive tasks

**Text generation / Auto regressive decoding*** requires caching of `K` and `V` values.

This, means for the model to make use of serialization, thsese values has to be a part of the model.

So, K and V are extra inputs required for inference **(only in the case of text generation)**.

As a result, training and testing needs different pipleines (for auto regressive tasks)

**`Note: We have necessary wrappers to do all these, user doesnt has to worry about any of these`**

In [9]:
# Encoder layer (Nothing to cache)
config["mask_mode"] = "user_defined"
config["bidirectional"] = True
encoder_layer = T5Encoder(
    config=config, mask_mode=config["mask_mode"], is_training=False, name="t5_encoder"
)

# Decoder
# Set pipeline_mode = 'auto-regressive'
# Only by that, we can enable caching in decoder side


config["bidirectional"] = False
config["mask_mode"] = "causal"
decoder_layer = T5Encoder(
    config=config,
    name="t5_decoder",
    mask_mode="causal",
    is_decoder=True,
    is_training=False,
    use_dropout=False,
    pipeline_mode="auto-regressive",
    share_encoder_embeddings=True,
    encoder_embedding_layer=encoder_layer._embedding_layer,
)


# Train mode
enc_dec_model = EncoderDecoder(
    encoder=encoder_layer,
    decoder=decoder_layer,
    is_training=False,
    name="t5_small",
    use_dropout=False,
)
enc_dec_model = enc_dec_model.get_model()

# And now load the checkpints from previously saved model

checkpoint = tf.train.Checkpoint(model=enc_dec_model)
manager = tf.train.CheckpointManager(
    checkpoint, directory=checkpoint_dir, max_to_keep=1
)
status = checkpoint.restore(manager.latest_checkpoint)

# Important
if status.assert_existing_objects_matched():
    print("Model checkpoints matched")

INFO:absl:We are overwriding `is_training` is False to `is_training` to                     True with `use_dropout` is False, no effects on your inference pipeline
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:input_mask ---> Tensor("input_mask:0", shape=(None, None), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("decoder_input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:encoder_hidden_states ---> Tensor("encoder_hidden_states:0", shape=(None, None, 512), dtype=float32)
INFO:absl:decoder_encoder_mask ---> Tensor("decoder_encoder_mask:0", shape=(None, None, None), dtype=float32)
INFO:absl:all_cache_key ---> Tensor("all_cache_key:0", shape=(None, None, 8, None, 64), dtype=float32)
INFO:absl:all_cache_value ---> Tensor("all_cache_value:0", shape=(None, None, 8, None, 64), dtype=float32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:encoder_input_ids 


Two checkpoint references resolved to different objects (<tf_transformers.models.encoder_decoder.EncoderDecoder object at 0x14a3a4fd0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x14f3ee4f0>).



Two checkpoint references resolved to different objects (<tf_transformers.models.encoder_decoder.EncoderDecoder object at 0x14a3a4fd0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x14f3ee4f0>).


Model checkpoints matched


# Save the model .pb (saved_model)

To make use of the benefits of serialization, we have to save the model.

Now, why don't `model_tf_transformers.save("model_pb", save_format='tf')` . 

Reason is when we save the model using above, TF will somehow ignore the proper output node names.

It will assign some random names like `['gpt_output1, ect...]`. 

To preserve the names in the `saved_model` , we have small wrapper function called `LegacyModule`

In [10]:
# Save the model to .pb for make use of proper serialization
saved_model_dir = "model_pb"
tf_transformers_module = LegacyModule(enc_dec_model)
tf_transformers_module.save(saved_model_dir)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


INFO:tensorflow:Assets written to: model_pb/assets


INFO:tensorflow:Assets written to: model_pb/assets


# Tokenizer 
LegcayAI has in-built tokenizer. You can either use it. (Not recommended).

The main difference is how we handle `SPECIAL TOKENS`. Apart from that its the same.

Recommended use **HuggingFace tokenizer**

For tf_transformers tokenizer usage check **`tf_transformers/tests/notebooks/tokenizers`**

In [11]:
from transformers import T5Tokenizer
tokenizer = T5Tokenizer.from_pretrained("t5-small")


# Convert text to tokens (for T5 Model)
@pad_dataset
def tokenizer_fn(tokenizer, text_list):
    """Tokenizer fn should return a dict (no padding is required).
    Make sure, you pass all primary keys required to the model

    text_list: a list of text

    {'input_ids': tf.constant([[1, 2]]),
     'input_mask': tf.constant([[1, 1]]),
     'input_type_ids': tf.constant([[1, 0]])}


    """
    input_ids = []
    input_mask = []
    for text in text_list:
        input_ids.append(tokenizer.encode(text))
        input_mask.append(tf.ones_like(input_ids[-1]).numpy().tolist())
    inputs = {"encoder_input_ids": input_ids}
    inputs["encoder_input_mask"] = input_mask
    return inputs


# Text Generation APIs (Seq2Seq)

We will benchmark on gpt2 model with following approaches.

1. Use the model in `saved_model_dir` with **`TextDecoder`** API. This API will consume the model,

   **(suppprts `saved_model`, `tf.keras.Model`, `hub.KerasLayer`)** . Recommended is `saved_model` 
   
   or `hub.KerasLayer` . **`TextDecoder`** API is **pure python function, which has for loops for decoding
   
   
2. We will use **`TextDecoderSerializable`** . This `tf.while_loop` implementation . The advantage is

   we can serialize the entire operation as it is, so you decoding method will be a part of `saved_model`
   
   graph.
   

In [40]:
# Load TextDecoderSeq2Seq (For loop over saved model)

# You can pass either tf.keras.Model or you can load saved_model and pass that also.
# Recommended is for performance <model_pb>

# This is saved model of T5
loaded   = tf.saved_model.load(saved_model_dir)
model_pb = loaded.signatures['serving_default']

decoder = TextDecoderSeq2Seq(
    model = model_pb,
    decode_start_token_id = 0,
    encoder_hidden_size = 512,
    decoder_hidden_size = 512,
    decoder_num_attention_heads=8,
    decoder_num_layers=6,
)

# Save Greedy Decoding (serialized model)

1. We can do, `model=beam` and `model=top_k_top_p` for Beam search and top K nucleus sampling

   respectively

In [13]:
# Save the model for greedy decoding
saved_model_dir_strategy = 'model_pb_temp'

decoder_layer = TextDecoderSerializableSeq2Seq(
    model = enc_dec_model,
    decode_start_token_id = 0,
    encoder_hidden_size = 512,
    decoder_hidden_size = 512,
    max_iterations=None,
    decoder_num_attention_heads=8,
    decoder_num_layers=6,
    mode="greedy",
    do_sample=False,
    eos_id=-100,
)
# Convert whole operation to a model
decoder_model  = decoder_layer.get_model()


decoder_module = LegacyModule(decoder_model)
decoder_module.save(saved_model_dir_strategy)
print("Saved")



INFO:tensorflow:Assets written to: model_pb_temp/assets


INFO:tensorflow:Assets written to: model_pb_temp/assets


Saved


# Test Greedy Decoding

Lets test, whether the results we obtained using **`TextDecoderSeq2Seq`** matches with **`TextDecoderSerializableSeq2Seq`**

In [43]:
text_list = ["summarize: studies have shown that owning a dog is good for you", 
             "translate: I love you so much"]
inputs = tokenizer_fn(tokenizer, text_list)

decoder_results = decoder.decode(inputs, 
               mode='greedy', 
               max_iterations=25, 
               eos_id=-100)

# # This is T5 model saved along with greedy decoder (as it is tf.while loop)
# # we can save it together .
loaded_decoder   = tf.saved_model.load(saved_model_dir_strategy)
model_pb_decoder = loaded_decoder.signatures['serving_default']


inputs_for_decoder = inputs.copy()
# We saved by passing max_iterations = None in TextDecoderSerializableSeq2Seq
# So, we need to pass iterations everytime
inputs_for_decoder['iterations'] = tf.constant([[25]])

decoder_results_serialized = model_pb_decoder(**inputs_for_decoder)

tf.assert_equal( tf.cast(decoder_results['predicted_ids'], tf.int32)
                ,decoder_results_serialized['predicted_ids'])

print("Sucess")

Sucess


# Beam Decoding

In [44]:
# Save the model for greedy decoding
saved_model_dir_strategy = 'model_pb_temp'

decoder_layer = TextDecoderSerializableSeq2Seq(
    model = enc_dec_model,
    decode_start_token_id = 0,
    encoder_hidden_size = 512,
    decoder_hidden_size = 512,
    max_iterations=None,
    decoder_num_attention_heads=8,
    decoder_num_layers=6,
    mode="beam",
    beam_size = 2,
    do_sample=False,
    eos_id=-100,
)
# Convert whole operation to a model
decoder_model  = decoder_layer.get_model()


decoder_module = LegacyModule(decoder_model)
decoder_module.save(saved_model_dir_strategy)
print("Saved")



INFO:tensorflow:Assets written to: model_pb_temp/assets


INFO:tensorflow:Assets written to: model_pb_temp/assets


Saved


# Test beam Decoding

Lets test, whether the results we obtained using **`TextDecoderSeq2Seq`** matches with **`TextDecoderSerializableSeq2Seq`**

In [45]:
text_list = ["summarize: studies have shown that owning a dog is good for you", 
             "translate: I love you so much"]
inputs = tokenizer_fn(tokenizer, text_list)

decoder_results = decoder.decode(inputs, 
               mode='beam', 
               max_iterations=25,
               beam_size = 2,
               eos_id=-100)

# # This is T5 model saved along with greedy decoder (as it is tf.while loop)
# # we can save it together .
loaded_decoder   = tf.saved_model.load(saved_model_dir_strategy)
model_pb_decoder = loaded_decoder.signatures['serving_default']


inputs_for_decoder = inputs.copy()
# We saved by passing max_iterations = None in TextDecoderSerializableSeq2Seq
# So, we need to pass iterations everytime
inputs_for_decoder['iterations'] = tf.constant([[25]])

decoder_results_serialized = model_pb_decoder(**inputs_for_decoder)

tf.assert_equal( tf.cast(decoder_results['predicted_ids'], tf.int32)
                ,decoder_results_serialized['predicted_ids'])

print("Sucess")

Sucess


# top K top P

In [46]:
# Save the model for greedy decoding
saved_model_dir_strategy = 'model_pb_temp'

decoder_layer = TextDecoderSerializableSeq2Seq(
    model = enc_dec_model,
    decode_start_token_id = 0,
    encoder_hidden_size = 512,
    decoder_hidden_size = 512,
    max_iterations=None,
    decoder_num_attention_heads=8,
    decoder_num_layers=6,
    mode="top_k_top_p",
    top_k = 25,
    top_p = 0.75,
    do_sample=False,
    eos_id=-100,
)
# Convert whole operation to a model
decoder_model  = decoder_layer.get_model()


decoder_module = LegacyModule(decoder_model)
decoder_module.save(saved_model_dir_strategy)
print("Saved")

INFO:tensorflow:Assets written to: model_pb_temp/assets


INFO:tensorflow:Assets written to: model_pb_temp/assets


Saved


# Test top k top p Decoding

Lets test, whether the results we obtained using **`TextDecoderSeq2Seq`** matches with **`TextDecoderSerializableSeq2Seq`**

In [47]:
text_list = ["summarize: studies have shown that owning a dog is good for you", 
             "translate: I love you so much"]
inputs = tokenizer_fn(tokenizer, text_list)

decoder_results = decoder.decode(inputs, 
               mode='top_k_top_p', 
               max_iterations=25,
               top_k = 25,
               top_p = 0.75,
               eos_id=-100)

# # This is T5 model saved along with greedy decoder (as it is tf.while loop)
# # we can save it together .
loaded_decoder   = tf.saved_model.load(saved_model_dir_strategy)
model_pb_decoder = loaded_decoder.signatures['serving_default']


inputs_for_decoder = inputs.copy()
# We saved by passing max_iterations = None in TextDecoderSerializableSeq2Seq
# So, we need to pass iterations everytime
inputs_for_decoder['iterations'] = tf.constant([[25]])

decoder_results_serialized = model_pb_decoder(**inputs_for_decoder)

tf.assert_equal( tf.cast(decoder_results['predicted_ids'], tf.int32)
                ,decoder_results_serialized['predicted_ids'])

print("Sucess")

Sucess
