In [1]:
import sys

sys.path.append("/Users/PRVATE/Documents/tf_transformers/src/")

In [2]:
import tensorflow as tf
import tensorflow_hub as hub

from transformers import TFAlbertModel
from tf_transformers.models import AlbertEncoder

from tf_transformers.core import LegacyModule
from transformers import AlbertTokenizer

from tf_transformers.utils import convert_albert_hf_to_tf_transformers

from tf_transformers.data import pad_dataset
from tf_transformers.text import TextDecoder
from tf_transformers.text import TextDecoderSerializable

import json
import time
import os

In [3]:
import inspect
inspect.getfile(AlbertEncoder)

'/Users/PRVATE/Documents/tf_transformers/src/tf_transformers/models/albert.py'

# Load HuggingFace Model

We will load huggingface Model and use it to assign variables to the tf_transformers model.


We will be using `convert_albert_hf_to_tf_transformers` function.


In [3]:
# Load HF model
# Always do this


local_dir = "/Users/PRVATE/HUggingFace_Models/"
hf_model_name = "albert-base-v2"
if local_dir:
    hf_model_location = local_dir + hf_model_name

model_hf = TFAlbertModel.from_pretrained(hf_model_location)

All model checkpoint layers were used when initializing TFAlbertModel.

All the layers of TFAlbertModel were initialized from the model checkpoint at /Users/PRVATE/HUggingFace_Models/albert-base-v2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFAlbertModel for predictions without further training.


# Load tf_transformers Model

Configs are in the `model_configs` folder in the root of the repo.


We will be using `convert_albert_hf_to_tf_transformers` function.

Always use `is_training=False` to load the model and pass this model for conversion.

Do not enablle `pipeline_mode='auto-regressive` while converting. Because, variable name

differs due to `tf.cond` usage. 

# Steps:

1. Load a model using **`is_training=False`**

2. Convert it using conversion functions from `tf_transformers.utils`

3. Save the `checkpoint` .

4. For auto-regressive tasks (text generation) use **`pipeline_mode='auto-regressive`**

   along with **`is_training=False`** and load from the checkpoint.

In [4]:
# Load tf_transformers model
# Most config we will be providing

# Default configs for the model

model_config_dir = '/Users/PRVATE/Documents/tf_transformers/model_configs/'
model_name = 'albert_base_v2'
config_location = os.path.join(model_config_dir, model_name, 'config.json')
config = json.load(open(config_location))

# Always do this


# tf_transformers Layer (an extension of Keras Layer)
# This is not Keras model, but extension of keras Layer

# Save as saved_model
# If you want to use the model for Auto Regressive tasks ( text-generation ),
# you have to enable pipeline_mode='auto-regressive'.
# Because TF needs extra cache inputs in the saved_model format for doing efficient caching

model_layer = AlbertEncoder(
    config=config,
    name="albert",
    mask_mode=config["mask_mode"],
    is_training=False,
)

# Convert to tf.keras.Model
model_tf_transformers = model_layer.get_and_load_model(model_dir=None)
convert_albert_hf_to_tf_transformers(model_hf, model_tf_transformers, config)

INFO:absl:We are overwriding `is_training` is False to `is_training` to                     True with `use_dropout` is False, no effects on your inference pipeline
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:input_mask ---> Tensor("input_mask:0", shape=(None, None), dtype=int32)
INFO:absl:input_type_ids ---> Tensor("input_type_ids:0", shape=(None, None), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids_1:0", shape=(None, None), dtype=int32)
INFO:absl:input_mask ---> Tensor("input_mask_1:0", shape=(None, None), dtype=int32)
INFO:absl:input_type_ids ---> Tensor("input_type_ids_1:0", shape=(None, None), dtype=int32)
INFO:absl:Deleteing huggingface model for saving memory
INFO:absl:Done assigning variables weights. Total 25


# Save checkpoints

In [5]:
# If you want to save the model as checkpoints

checkpoint_dir = 'model_ckpt'
checkpoint = tf.train.Checkpoint(model=model_tf_transformers)
manager = tf.train.CheckpointManager(
    checkpoint, directory=checkpoint_dir, max_to_keep=1
)
manager.save()
print("Saved at {}".format(manager.latest_checkpoint))

Saved at model_ckpt/ckpt-1


# Check for reference

Have a look at `tf_transformers/extra/*.py` for reference values, to make sure model

has loaded correctly

In [6]:
# Please have a look at tf_transformers/extra/*.py for reference values

input_ids = tf.constant([[1, 9, 10, 11, 23], [1, 22, 234, 432, 2349]])
input_mask = tf.ones_like(input_ids)
input_type_ids = tf.ones_like(input_ids)

inputs = {
    "input_ids": input_ids,
    "input_mask": input_mask,
    "input_type_ids": input_type_ids,
}

results_tf_transformers = model_tf_transformers(inputs)
for k, r in results_tf_transformers.items():
    if isinstance(r, list):
        continue
    print(k, "-->", tf.reduce_sum(r), "-->", r.shape)

cls_output --> tf.Tensor(12.337963, shape=(), dtype=float32) --> (2, 768)
token_embeddings --> tf.Tensor(-193.53201, shape=(), dtype=float32) --> (2, 5, 768)
token_logits --> tf.Tensor(-23480.06, shape=(), dtype=float32) --> (2, 5, 30000)
last_token_logits --> tf.Tensor(-4466.8125, shape=(), dtype=float32) --> (2, 30000)


# Load Albert for Auto-Regressive tasks

**Text generation / Auto regressive decoding*** requires caching of `K` and `V` values.

This, means for the model to make use of serialization, thsese values has to be a part of the model.

So, K and V are extra inputs required for inference **(only in the case of text generation)**.

As a result, training and testing needs different pipleines (for auto regressive tasks)

**`Note: We have necessary wrappers to do all these, user doesnt has to worry about any of these`**

In [7]:
model_layer = AlbertEncoder(
    config=config,
    name="albert",
    mask_mode=config["mask_mode"],
    is_training=False,
    pipeline_mode="auto-regressive",
)

# Convert to tf.keras.Model
model_tf_transformers = model_layer.get_and_load_model(model_dir=None)

# And now load the checkpints from previously saved model

checkpoint = tf.train.Checkpoint(model=model_tf_transformers)
manager = tf.train.CheckpointManager(
    checkpoint, directory=checkpoint_dir, max_to_keep=1
)
status = checkpoint.restore(manager.latest_checkpoint)

# Important
if status.assert_existing_objects_matched():
    print("Model checkpoints matched")

INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids_2:0", shape=(None, None), dtype=int32)
INFO:absl:input_mask ---> Tensor("input_mask_2:0", shape=(None, None), dtype=int32)
INFO:absl:input_type_ids ---> Tensor("input_type_ids_2:0", shape=(None, None), dtype=int32)
INFO:absl:all_cache_key ---> Tensor("all_cache_key:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:all_cache_value ---> Tensor("all_cache_value:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:past_length ---> Tensor("past_length:0", shape=(1, None), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids_3:0", shape=(None, None), dtype=int32)
INFO:absl:input_mask ---> Tensor("input_mask_3:0", shape=(None, None), dtype=int32)
INFO:absl:input_type_ids ---> Tensor("input_type_ids_3:0", shape=(None, None), dtype=int32)
INFO:absl:all_cache_key ---> Tensor("all_cache_key_1:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:a


Two checkpoint references resolved to different objects (<tf_transformers.models.albert.AlbertEncoder object at 0x145cc75b0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x145cdac70>).



Two checkpoint references resolved to different objects (<tf_transformers.models.albert.AlbertEncoder object at 0x145cc75b0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x145cdac70>).


Model checkpoints matched


# Save the model .pb (saved_model)

To make use of the benefits of serialization, we have to save the model.

Now, why don't `model_tf_transformers.save("model_pb", save_format='tf')` . 

Reason is when we save the model using above, TF will somehow ignore the proper output node names.

It will assign some random names like `['gpt_output1, ect...]`. 

To preserve the names in the `saved_model` , we have small wrapper function called `LegacyModule`

In [8]:
# Save the model to .pb for make use of proper serialization
saved_model_dir = "model_pb"
tf_transformers_module = LegacyModule(model_tf_transformers)
tf_transformers_module.save(saved_model_dir)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


INFO:tensorflow:Assets written to: model_pb/assets


INFO:tensorflow:Assets written to: model_pb/assets


In [19]:
@pad_dataset
def tokenizer_fn(tokenizer, text_list):
    """Tokenizer fn should return a dict (no padding is required).
    Make sure, you pass all primary keys required to the model

    text_list: a list of text

    {'input_ids': tf.constant([[1, 2]]),
     'input_mask': tf.constant([[1, 1]]),
     'input_type_ids': tf.constant([[1, 0]])}


    """
    input_ids = []
    for text in text_list:
        input_ids.append(tokenizer.encode(text))
    input_ids = tf.ragged.constant(input_ids)
    inputs = {"input_ids": input_ids}
    inputs["input_mask"] = tf.ones_like(input_ids).numpy().tolist()
    inputs["input_type_ids"] = tf.zeros_like(input_ids).numpy().tolist()
    inputs["input_ids"] = input_ids.numpy().tolist()
    return inputs

# Tokenizer 
LegcayAI has in-built tokenizer. You can either use it. (Not recommended).

The main difference is how we handle `SPECIAL TOKENS`. Apart from that its the same.

Recommended use **HuggingFace tokenizer**

For tf_transformers tokenizer usage check **`tf_transformers/tests/notebooks/tokenizers`**

In [11]:
from transformers import AlbertTokenizer
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")


# Convert text to tokens (for Albert Model)
@pad_dataset
def tokenizer_fn(tokenizer, text_list):
    """Tokenizer fn should return a dict (no padding is required).
    Make sure, you pass all primary keys required to the model

    text_list: a list of text

    {'input_ids': tf.constant([[1, 2]]),
     'input_mask': tf.constant([[1, 1]]),
     'input_type_ids': tf.constant([[1, 0]])}


    """
    input_ids = []
    input_mask = []
    input_type_ids = []
    for text in text_list:
        input_ids.append(tokenizer.encode(text))
        input_mask.append(tf.ones_like(input_ids[-1]).numpy().tolist())
        input_type_ids.append(tf.zeros_like(input_ids[-1]).numpy().tolist())
    inputs = {"input_ids": input_ids}
    inputs["input_mask"] = input_mask
    inputs['input_type_ids'] = input_type_ids
    return inputs


# Text Generation APIs (Seq2Seq)

We will benchmark on gpt2 model with following approaches.

1. Use the model in `saved_model_dir` with **`TextDecoder`** API. This API will consume the model,

   **(suppprts `saved_model`, `tf.keras.Model`, `hub.KerasLayer`)** . Recommended is `saved_model` 
   
   or `hub.KerasLayer` . **`TextDecoder`** API is **pure python function, which has for loops for decoding
   
   
2. We will use **`TextDecoderSerializable`** . This `tf.while_loop` implementation . The advantage is

   we can serialize the entire operation as it is, so you decoding method will be a part of `saved_model`
   
   graph.
   

In [28]:
# Load TextDecoderSeq2Seq (For loop over saved model)

# You can pass either tf.keras.Model or you can load saved_model and pass that also.
# Recommended is for performance <model_pb>

# This is saved model of T5
loaded   = tf.saved_model.load(saved_model_dir)
model_pb = loaded.signatures['serving_default']

decoder = TextDecoder(
    model = model_pb,
    hidden_dimension = 768,
    num_attention_heads=12,
    num_layers=12
)

In [42]:
text_list = ['Sachin Tendulkar is one of the finest',
             'I like to walk with my dog']


# Save Greedy Decoding (serialized model)

1. We can do, `model=beam` and `model=top_k_top_p` for Beam search and top K nucleus sampling

   respectively

In [17]:
# Save the model for greedy decoding
saved_model_dir_strategy = 'model_pb_temp'

decoder_layer = TextDecoderSerializable(
    model = model_tf_transformers,
    hidden_dimension = 768,
    num_attention_heads=12,
    num_layers=12,
    max_iterations=None,
    mode="greedy",
    do_sample=False,
    eos_id=-100,
    input_mask_ids = 1,
    input_type_ids = 0
)
# Convert whole operation to a model
decoder_model  = decoder_layer.get_model()


decoder_module = LegacyModule(decoder_model)
decoder_module.save(saved_model_dir_strategy)
print("Saved")



INFO:tensorflow:Assets written to: model_pb_temp/assets


INFO:tensorflow:Assets written to: model_pb_temp/assets


Saved


# Test Greedy Decoding

Lets test, whether the results we obtained using **`TextDecoderSeq2Seq`** matches with **`TextDecoderSerializableSeq2Seq`**

In [30]:
inputs = tokenizer_fn(tokenizer, text_list)

decoder_results = decoder.decode(inputs, 
               mode='greedy', 
               max_iterations=25, 
               eos_id=-100)

# # # This is Albert Model saved along with greedy decoder (as it is tf.while loop)
# # # we can save it together .
loaded_decoder   = tf.saved_model.load(saved_model_dir_strategy)
model_pb_decoder = loaded_decoder.signatures['serving_default']


inputs_for_decoder = inputs.copy()
# We saved by passing max_iterations = None in TextDecoderSerializableSeq2Seq
# So, we need to pass iterations everytime
inputs_for_decoder['iterations'] = tf.constant([[25]])

decoder_results_serialized = model_pb_decoder(**inputs_for_decoder)

tf.assert_equal( tf.cast(decoder_results['predicted_ids'], tf.int32)
                ,decoder_results_serialized['predicted_ids'])

print("Sucess")

Sucess


# Beam Decoding

In [35]:
# Save the model for greedy decoding
saved_model_dir_strategy = 'model_pb_temp'

decoder_layer = TextDecoderSerializable(
    model = model_tf_transformers,
    hidden_dimension = 768,
    num_attention_heads=12,
    num_layers=12,
    max_iterations=None,
    mode="beam",
    beam_size = 2,
    do_sample=False,
    eos_id=-100,
    input_mask_ids = 1,
    input_type_ids = 0
)
# Convert whole operation to a model
decoder_model  = decoder_layer.get_model()


decoder_module = LegacyModule(decoder_model)
decoder_module.save(saved_model_dir_strategy)
print("Saved")

INFO:tensorflow:Assets written to: model_pb_temp/assets


INFO:tensorflow:Assets written to: model_pb_temp/assets


Saved


# Test Beam Decoding

Lets test, whether the results we obtained using **`TextDecoderSeq2Seq`** matches with **`TextDecoderSerializableSeq2Seq`**

In [36]:
inputs = tokenizer_fn(tokenizer, text_list)

decoder_results = decoder.decode(inputs, 
               mode='beam', 
               max_iterations=25, 
               beam_size = 2,
               eos_id=-100)

# # # This is Albert Model saved along with greedy decoder (as it is tf.while loop)
# # # we can save it together .
loaded_decoder   = tf.saved_model.load(saved_model_dir_strategy)
model_pb_decoder = loaded_decoder.signatures['serving_default']


inputs_for_decoder = inputs.copy()
# We saved by passing max_iterations = None in TextDecoderSerializableSeq2Seq
# So, we need to pass iterations everytime
inputs_for_decoder['iterations'] = tf.constant([[25]])

decoder_results_serialized = model_pb_decoder(**inputs_for_decoder)

tf.assert_equal( tf.cast(decoder_results['predicted_ids'], tf.int32)
                ,decoder_results_serialized['predicted_ids'])

print("Sucess")

Sucess


# top K top P

In [37]:
# Save the model for greedy decoding
saved_model_dir_strategy = 'model_pb_temp'

decoder_layer = TextDecoderSerializable(
    model = model_tf_transformers,
    hidden_dimension = 768,
    num_attention_heads=12,
    num_layers=12,
    max_iterations=None,
    mode="top_k_top_p",
    top_k = 25,
    top_p = 0.75,
    do_sample=False,
    eos_id=-100,
    input_mask_ids = 1,
    input_type_ids = 0
)
# Convert whole operation to a model
decoder_model  = decoder_layer.get_model()


decoder_module = LegacyModule(decoder_model)
decoder_module.save(saved_model_dir_strategy)
print("Saved")

INFO:tensorflow:Assets written to: model_pb_temp/assets


INFO:tensorflow:Assets written to: model_pb_temp/assets


Saved


# Test top K top P Decoding

Lets test, whether the results we obtained using **`TextDecoderSeq2Seq`** matches with **`TextDecoderSerializableSeq2Seq`**

In [38]:
inputs = tokenizer_fn(tokenizer, text_list)

decoder_results = decoder.decode(inputs, 
               mode='top_k_top_p', 
               max_iterations=25,
               top_k = 25,
               top_p =0.75,
               eos_id=-100)

# # # This is Albert Model saved along with greedy decoder (as it is tf.while loop)
# # # we can save it together .
loaded_decoder   = tf.saved_model.load(saved_model_dir_strategy)
model_pb_decoder = loaded_decoder.signatures['serving_default']


inputs_for_decoder = inputs.copy()
# We saved by passing max_iterations = None in TextDecoderSerializableSeq2Seq
# So, we need to pass iterations everytime
inputs_for_decoder['iterations'] = tf.constant([[25]])

decoder_results_serialized = model_pb_decoder(**inputs_for_decoder)

tf.assert_equal( tf.cast(decoder_results['predicted_ids'], tf.int32)
                ,decoder_results_serialized['predicted_ids'])

print("Sucess")

Sucess
