In [1]:
import tensorflow as tf

from tf_transformers.models import GPT2Encoder, GPT2Model
from tf_transformers.core import LegacyModule
from transformers import GPT2Tokenizer

from tf_transformers.data import pad_dataset
from tf_transformers.text import TextDecoder
from tf_transformers.text import TextDecoderSerializable

import json
import time
import os

# Load tf_transformers Model

Configs are in the `model_configs` folder in the root of the repo.


We will be using `convert_albert_hf_to_tf_transformers` function.

Always use `is_training=False` to load the model and pass this model for conversion.

Do not enablle `pipeline_mode='auto-regressive` while converting. Because, variable name

differs due to `tf.cond` usage. 

# Steps:

1. Load a model using **`is_training=False`**

2. Convert it using conversion functions from `tf_transformers.utils`

3. Save the `checkpoint` .

4. For auto-regressive tasks (text generation) use **`pipeline_mode='auto-regressive`**

   along with **`is_training=False`** and load from the checkpoint.

In [2]:
# Load tf_transformers model
# Most config we will be providing

# Default configs for the model
from tf_transformers.models.model_configs.gpt2 import gpt2 as config
config = config.config

# Always do this


# tf_transformers Layer (an extension of Keras Layer)
# This is not Keras model, but extension of keras Layer

# Save as saved_model
# If you want to use the model for Auto Regressive tasks ( text-generation ),
# you have to enable pipeline_mode='auto-regressive'.
# Because TF needs extra cache inputs in the saved_model format for doing efficient caching

model = GPT2Encoder(
    config=config,
    name="gpt2",
    mask_mode=config["mask_mode"],
    is_training=False,
    pipeline_mode="auto-regressive",
)

model = model.get_model()
model.load_checkpoint("/Users/PRVATE/tf_transformers_models/gpt2/")

INFO:absl:Initialized Variables



Two checkpoint references resolved to different objects (<tf_transformers.models.gpt2.GPT2Encoder object at 0x109db96a0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x16f8a6430>).



Two checkpoint references resolved to different objects (<tf_transformers.models.gpt2.GPT2Encoder object at 0x109db96a0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x16f8a6430>).
INFO:absl:Succesful: Model checkpoints matched


In [3]:
from tf_transformers.models import GPT2Model
model_layer, model, model_config = GPT2Model(
    model_name='gpt2',
    is_training=False,
    pipeline_mode="auto-regressive",
)

model.load_checkpoint("/Users/PRVATE/tf_transformers_models/gpt2/")

INFO:absl:Initialized Variables



Two checkpoint references resolved to different objects (<tf_transformers.models.gpt2.GPT2Encoder object at 0x14ce765e0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x15b5ab610>).



Two checkpoint references resolved to different objects (<tf_transformers.models.gpt2.GPT2Encoder object at 0x14ce765e0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x15b5ab610>).
INFO:absl:Succesful: Model checkpoints matched


# Save the model .pb (saved_model)

To make use of the benefits of serialization, we have to save the model.

Now, why don't `model_tf_transformers.save("model_pb", save_format='tf')` . 

Reason is when we save the model using above, TF will somehow ignore the proper output node names.

It will assign some random names like `['gpt_output1, ect...]`. 

To preserve the names in the `saved_model` , we have small wrapper function called `LegacyModule`

In [4]:
# Save the model to .pb for make use of proper serialization
saved_model_dir = "model_pb"
model.save_as_serialize_module(saved_model_dir, overwrite=True)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


INFO:tensorflow:Assets written to: model_pb/assets


INFO:tensorflow:Assets written to: model_pb/assets


# Tokenizer 
LegcayAI has in-built tokenizer. You can either use it. (Not recommended).

The main difference is how we handle `SPECIAL TOKENS`. Apart from that its the same.

Recommended use **HuggingFace tokenizer**

For tf_transformers tokenizer usage check **`tf_transformers/tests/notebooks/tokenizers`**

In [6]:
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")


# Convert text to tokens (for GPT2 Model)
@pad_dataset
def tokenizer_fn(tokenizer, text_list):
    """Tokenizer fn should return a dict (no padding is required).
    Make sure, you pass all primary keys required to the model

    text_list: a list of text

    {'input_ids': tf.constant([[1, 2]]),
     'input_mask': tf.constant([[1, 1]]),
     'input_type_ids': tf.constant([[1, 0]])}


    """
    input_ids = []
    input_mask = []
    input_type_ids = []
    for text in text_list:
        input_ids.append(tokenizer.encode(text))
    inputs = {"input_ids": input_ids}
    return inputs


# Text Generation APIs (Seq2Seq)

We will benchmark on gpt2 model with following approaches.

1. Use the model in `saved_model_dir` with **`TextDecoder`** API. This API will consume the model,

   **(suppprts `saved_model`, `tf.keras.Model`, `hub.KerasLayer`)** . Recommended is `saved_model` 
   
   or `hub.KerasLayer` . **`TextDecoder`** API is **pure python function, which has for loops for decoding
   
   
2. We will use **`TextDecoderSerializable`** . This `tf.while_loop` implementation . The advantage is

   we can serialize the entire operation as it is, so you decoding method will be a part of `saved_model`
   
   graph.
   

In [7]:
# Load TextDecoderSeq2Seq (For loop over saved model)

# You can pass either tf.keras.Model or you can load saved_model and pass that also.
# Recommended is for performance <model_pb>

# This is saved model of T5
loaded   = tf.saved_model.load(saved_model_dir)

decoder = TextDecoder(
    model = loaded
)

# Save Greedy Decoding (serialized model)

1. We can do, `model=beam` and `model=top_k_top_p` for Beam search and top K nucleus sampling

   respectively

In [8]:
# Save the model for greedy decoding
saved_model_dir_strategy = 'model_pb_temp'

decoder_layer = TextDecoderSerializable(
    model = model,
    max_iterations=None,
    mode="greedy",
    do_sample=False,
    eos_id=-100
)
# Convert whole operation to a model
decoder_model  = decoder_layer.get_model()
decoder_module = LegacyModule(decoder_model)
decoder_module.save(saved_model_dir_strategy)
print("Saved")

INFO:tensorflow:Assets written to: model_pb_temp/assets


INFO:tensorflow:Assets written to: model_pb_temp/assets


Saved


In [9]:
text_list = ['Sachin Tendulkar is one of the finest',
             'I like to walk with my dog']


# Test Greedy Decoding

Lets test, whether the results we obtained using **`TextDecoderSeq2Seq`** matches with **`TextDecoderSerializableSeq2Seq`**

In [10]:
inputs = tokenizer_fn(tokenizer, text_list)

decoder_results = decoder.decode(inputs, 
               mode='greedy', 
               max_iterations=25, 
               eos_id=-100)

# # # This is GPT2 Model saved along with greedy decoder (as it is tf.while loop)
# # # we can save it together .
loaded_decoder   = tf.saved_model.load(saved_model_dir_strategy)
model_pb_decoder = loaded_decoder.signatures['serving_default']


inputs_for_decoder = inputs.copy()
# We saved by passing max_iterations = None in TextDecoderSerializableSeq2Seq
# So, we need to pass iterations everytime
inputs_for_decoder['iterations'] = tf.constant([[25]])

decoder_results_serialized = model_pb_decoder(**inputs_for_decoder)

tf.assert_equal( tf.cast(decoder_results['predicted_ids'], tf.int32)
                ,decoder_results_serialized['predicted_ids'])

print("Sucess")

Sucess


# Beam Decoding

In [12]:
# Save the model for greedy decoding
saved_model_dir_strategy = 'model_pb_temp'

decoder_layer = TextDecoderSerializable(
    model = model,
    max_iterations=None,
    mode="beam",
    beam_size = 2,
    do_sample=False,
    eos_id=-100
)
# Convert whole operation to a model
decoder_model  = decoder_layer.get_model()


decoder_module = LegacyModule(decoder_model)
decoder_module.save(saved_model_dir_strategy)
print("Saved")



INFO:tensorflow:Assets written to: model_pb_temp/assets


INFO:tensorflow:Assets written to: model_pb_temp/assets


Saved


# Test Beam Decoding

Lets test, whether the results we obtained using **`TextDecoderSeq2Seq`** matches with **`TextDecoderSerializableSeq2Seq`**

In [13]:
inputs = tokenizer_fn(tokenizer, text_list)

decoder_results = decoder.decode(inputs, 
               mode='beam', 
               max_iterations=25, 
               beam_size = 2,
               eos_id=-100)

# # # This is GPT2 Model saved along with greedy decoder (as it is tf.while loop)
# # # we can save it together .
loaded_decoder   = tf.saved_model.load(saved_model_dir_strategy)
model_pb_decoder = loaded_decoder.signatures['serving_default']


inputs_for_decoder = inputs.copy()
# We saved by passing max_iterations = None in TextDecoderSerializableSeq2Seq
# So, we need to pass iterations everytime
inputs_for_decoder['iterations'] = tf.constant([[25]])

decoder_results_serialized = model_pb_decoder(**inputs_for_decoder)

tf.assert_equal( tf.cast(decoder_results['predicted_ids'], tf.int32)
                ,decoder_results_serialized['predicted_ids'])

print("Sucess")

Sucess


# top K top P

In [14]:
# Save the model for greedy decoding
saved_model_dir_strategy = 'model_pb_temp'

decoder_layer = TextDecoderSerializable(
    model = model,
    max_iterations=None,
    mode="top_k_top_p",
    top_k = 35,
    top_p = 0.79,
    do_sample=False,
    eos_id=-100
)
# Convert whole operation to a model
decoder_model  = decoder_layer.get_model()
decoder_module = LegacyModule(decoder_model)
decoder_module.save(saved_model_dir_strategy)
print("Saved")

INFO:tensorflow:Assets written to: model_pb_temp/assets


INFO:tensorflow:Assets written to: model_pb_temp/assets


Saved


# Test top K top P Decoding

Lets test, whether the results we obtained using **`TextDecoderSeq2Seq`** matches with **`TextDecoderSerializableSeq2Seq`**

In [15]:
inputs = tokenizer_fn(tokenizer, text_list)

decoder_results = decoder.decode(inputs, 
               mode='top_k_top_p', 
               max_iterations=25,
               top_k = 35,
               top_p =0.79,
               eos_id=-100)

# # # This is GPT2 Model saved along with greedy decoder (as it is tf.while loop)
# # # we can save it together .
loaded_decoder   = tf.saved_model.load(saved_model_dir_strategy)
model_pb_decoder = loaded_decoder.signatures['serving_default']


inputs_for_decoder = inputs.copy()
# We saved by passing max_iterations = None in TextDecoderSerializableSeq2Seq
# So, we need to pass iterations everytime
inputs_for_decoder['iterations'] = tf.constant([[25]])

decoder_results_serialized = model_pb_decoder(**inputs_for_decoder)

tf.assert_equal( tf.cast(decoder_results['predicted_ids'], tf.int32)
                ,decoder_results_serialized['predicted_ids'])

print("Sucess")

Sucess


In [19]:
# !rm -r model_ckpt/
# !rm -r model_pb/
# !rm -r model_pb_temp/
# !rm -rf dummy/