In [1]:
import sys

sys.path.append("/Users/PRVATE/Documents/tf_transformers/src/")

In [2]:
import tensorflow as tf
import tensorflow_hub as hub

from transformers import TFGPT2Model
from tf_transformers.models import GPT2Encoder

from tf_transformers.core import LegacyModule
from tf_transformers.tokenizer import GPT2Tokenizer

import json
import time

In [3]:
# Load HF model

# Always do this
tf.keras.backend.clear_session()

local_dir = "/Users/PRVATE/HUggingFace_Models/"
hf_model_name = "gpt2"
if local_dir:
    hf_model_location = local_dir + hf_model_name

model_hf = TFGPT2Model.from_pretrained(hf_model_location)

All model checkpoint layers were used when initializing TFGPT2Model.

All the layers of TFGPT2Model were initialized from the model checkpoint at /Users/PRVATE/HUggingFace_Models/gpt2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2Model for predictions without further training.


In [4]:
# Load tf_transformers model
# Most config we will be providing

# Default configs for the model
config_location = "../../configs/model_configs/" + "gpt2_base/" + "gpt2_config.json"
config = json.load(open(config_location))

# Always do this
tf.keras.backend.clear_session()

# tf_transformers Layer (an extension of Keras Layer)
# This is not Keras model, but extension of keras Layer

# Save as saved_model
# If you want to use the model for Auto Regressive tasks ( text-generation ),
# you have to enable pipeline_mode='auto-regressive'.
# Because TF needs extra cache inputs in the saved_model format for doing efficient caching

model_layer = GPT2Encoder(
    config=config,
    name="gpt2",
    mask_mode=config["mask_mode"],
    is_training=False,
    pipeline_mode="auto-regressive",
)

# Convert to tf.keras.Model
model_tf_transformers = model_layer.get_and_load_model(model_dir=None)

# And now load the checkpints from previously saved model

checkpoint = tf.train.Checkpoint(model=model_tf_transformers)
manager = tf.train.CheckpointManager(checkpoint, directory="model_ckpt", max_to_keep=1)
status = checkpoint.restore(manager.latest_checkpoint)

# Important
status.assert_existing_objects_matched()

INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:all_cache_key ---> Tensor("all_cache_key:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:all_cache_value ---> Tensor("all_cache_value:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:past_length ---> Tensor("past_length:0", shape=(1, None), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids_1:0", shape=(None, None), dtype=int32)
INFO:absl:all_cache_key ---> Tensor("all_cache_key_1:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:all_cache_value ---> Tensor("all_cache_value_1:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:past_length ---> Tensor("past_length_1:0", shape=(1, None), dtype=int32)



Two checkpoint references resolved to different objects (<tf_transformers.models.gpt2.GPT2Encoder object at 0x14a46e5b0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x14ac41ee0>).



Two checkpoint references resolved to different objects (<tf_transformers.models.gpt2.GPT2Encoder object at 0x14a46e5b0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x14ac41ee0>).


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x14aa712b0>

In [5]:
# This tokenizer is not necessary
# We can use amazing HuggingFace tokenizer library also

merges_file_path = "../../tokenizer/tokenizer_vocab_models/gpt2/merges.txt"
vocab_path = "../../tokenizer/tokenizer_vocab_models/gpt2/vocab.json"
reserved_vocab_path = (
    "../../tokenizer/tokenizer_vocab_models/gpt2/reserved_tokens_map.json"
)
tokenizer = GPT2Tokenizer(
    merges_file_path=merges_file_path,
    vocab_path=vocab_path,
    reserved_tokens_map_file=reserved_vocab_path,
)

INFO:absl:Succesfully loaded reserved token vocab


In [6]:
def tokenizer_fn(text_list):
    """Tokenizer fn should return a dict (no padding is required).
    Make sure, you pass all primary keys required to the model

    text_list: a list of text

    {'input_ids': tf.constant([[1, 2]]),
     'input_mask': tf.constant([[1, 1]]),
     'input_type_ids': tf.constant([[1, 0]])}


    """
    input_ids = []
    for text in text_list:
        input_ids.append(tokenizer.encode(text)[1])
    inputs = {"input_ids": input_ids}
    return inputs

In [10]:
from tf_transformers.text import TextDecoder
from tf_transformers.text import TextDecoderSerializable

In [26]:
decoder_layer_beam = TextDecoder(
    tokenizer_fn=tokenizer_fn,
    model=model_tf_transformers,
    num_attention_heads=12,
    num_layers=12,
    attention_state=64,
    input_mask_ids=None,
    input_type_ids=None,
)

In [41]:
text_list = ["Sachin Tendulkar is one of the finest", 
            "I like to walk with my dog"]

In [42]:
# Beam Search

start_time = time.time()
result_beam = decoder_layer_beam.decode(
    text_list, max_iterations=25, beam_size=2, mode="beam", do_sample=False, eos_id=None
)
# for i in range(len(result["input_ids"])):
#     for beam_predicted_ids in result["predicted_ids"][i]:
#         print(
#             tokenizer.decode(
#                 tf.concat([result_beam["input_ids"][i], beam_predicted_ids], axis=0).numpy()
#             )
#         )
#         print("--------------")
end_time = time.time()
print("Time taken {} seconds".format(end_time - start_time))
print('_______________________________________________________')




Time taken 3.284972667694092 seconds
_______________________________________________________


In [43]:
# Greedy Search

start_time = time.time()
result_greedy = decoder_layer_beam.decode(
    text_list, max_iterations=25, mode="greedy", do_sample=False, eos_id=None
)
# for i in range(len(result["input_ids"])):
#     for beam_predicted_ids in result["predicted_ids"][i]:
#         print(
#             tokenizer.decode(
#                 tf.concat([result_beam["input_ids"][i], beam_predicted_ids], axis=0).numpy()
#             )
#         )
#         print("--------------")
end_time = time.time()
print("Time taken {} seconds".format(end_time - start_time))
print('_______________________________________________________')


Time taken 2.6476550102233887 seconds
_______________________________________________________


In [44]:
result_greedy

{'iterations': 24,
 'input_ids': [[50, 620, 259, 48664, 12171, 283, 318, 530, 286, 262, 18822],
  [40, 588, 284, 2513, 351, 616, 3290]],
 'predicted_ids': <tf.Tensor: shape=(2, 1, 25), dtype=int32, numpy=
 array([[[1938,  287,  262,  995,   13,  679,  318,  257,  845,  922,
          2137,   11,  475,  339,  318,  407,  257, 1049, 2137,   13,
           679,  318,  257,  845,  922]],
 
        [[  11,  475,  314,  836,  470,  588,  284, 2513,  351,  616,
          3290,   13,  314,  588,  284, 2513,  351,  616, 3290,   11,
           475,  314,  836,  470,  588]]], dtype=int32)>,
 'matched_eos_pos': <tf.Tensor: shape=(2,), dtype=int32, numpy=array([-1, -1], dtype=int32)>}

In [45]:
# Top K top P Search

start_time = time.time()
result_top_k_top_p = decoder_layer_beam.decode(
    text_list, max_iterations=25, mode="top_k_top_p", top_k=50, top_p=0.7, do_sample=False, eos_id=None, 
    num_return_sequences=2
)
# for i in range(len(result["input_ids"])):
#     for beam_predicted_ids in result["predicted_ids"][i]:
#         print(
#             tokenizer.decode(
#                 tf.concat([result_beam["input_ids"][i], beam_predicted_ids], axis=0).numpy()
#             )
#         )
#         print("--------------")
end_time = time.time()
print("Time taken {} seconds".format(end_time - start_time))
print('_______________________________________________________')


Time taken 3.1377787590026855 seconds
_______________________________________________________


In [46]:
result_top_k_top_p

{'iterations': 24,
 'input_ids': [[50, 620, 259, 48664, 12171, 283, 318, 530, 286, 262, 18822],
  [40, 588, 284, 2513, 351, 616, 3290]],
 'predicted_ids': <tf.Tensor: shape=(2, 2, 25), dtype=int32, numpy=
 array([[[1938,  287,  262,  995,   13,  679,  318,  257,  845,  922,
          2137,   11,  475,  339,  318,  407,  257, 1049, 2137,   13,
           679,  318,  257,  845,  922],
         [1938,  287,  262,  995,   13,  679,  318,  257,  845,  922,
          2137,   11,  475,  339,  318,  407,  257, 1049, 2137,   13,
           679,  318,  257,  845,  922]],
 
        [[  11,  475,  314,  836,  470,  588,  284, 2513,  351,  616,
          3290,   13,  314,  588,  284, 2513,  351,  616, 3290,   11,
           475,  314,  836,  470,  588],
         [  11,  475,  314,  836,  470,  588,  284, 2513,  351,  616,
          3290,   13,  314,  588,  284, 2513,  351,  616, 3290,   11,
           475,  314,  836,  470,  588]]], dtype=int32)>,
 'matched_eos_pos': <tf.Tensor: shape=(4,), dtype=i

In [52]:
decoder_layer_serializable = TextDecoderSerializable(
    model_tf_transformers,
    input_name_list=["input_ids"],
    max_iterations=25,
    num_attention_heads=12,
    num_layers=12,
    attention_state=64,
    mode="greedy",
    do_sample=False,
    eos_id=-100,
    input_mask_ids=None,
    input_type_ids=None,
)

inputs_for_serializable = tokenizer_fn(text_list)


inputs_for_serializable["input_ids"] = tf.ragged.constant(inputs_for_serializable["input_ids"]).to_tensor(-1)
start_time = time.time()
results_serializable_greedy = decoder_layer_serializable(inputs_for_serializable)
end_time = time.time()
print("Time taken {} seconds".format(end_time - start_time))

Time taken 2.552328109741211 seconds


In [53]:
decoder_layer_serializable = TextDecoderSerializable(
    model_tf_transformers,
    input_name_list=["input_ids"],
    max_iterations=25,
    num_attention_heads=12,
    num_layers=12,
    attention_state=64,
    mode="beam",
    do_sample=False,
    beam_size=2,
    eos_id=-100,
    input_mask_ids=None,
    input_type_ids=None,
)

inputs_for_serializable = tokenizer_fn(text_list)
inputs_for_serializable["input_ids"] = tf.ragged.constant(inputs_for_serializable["input_ids"]).to_tensor(-1)
start_time = time.time()
results_serializable_beam = decoder_layer_serializable(inputs_for_serializable)
end_time = time.time()
print("Time taken {} seconds".format(end_time - start_time))

Time taken 3.2864420413970947 seconds


In [54]:
decoder_layer_serializable = TextDecoderSerializable(
    model_tf_transformers,
    input_name_list=["input_ids"],
    max_iterations=25,
    num_attention_heads=12,
    num_layers=12,
    attention_state=64,
    mode="top_k_top_p",
    do_sample=False,
    num_return_sequences=2,
    top_k = 50,
    top_p = 0.7,
    eos_id=-100,
    input_mask_ids=None,
    input_type_ids=None,
)

inputs_for_serializable = tokenizer_fn(text_list)
inputs_for_serializable["input_ids"] = tf.ragged.constant(inputs_for_serializable["input_ids"]).to_tensor(-1)
start_time = time.time()
results_serializable_top_k_top_p = decoder_layer_serializable(inputs_for_serializable)
end_time = time.time()
print("Time taken {} seconds".format(end_time - start_time))

Time taken 3.2622859477996826 seconds


In [55]:
tf.assert_equal(result_greedy['predicted_ids'], results_serializable_greedy['predicted_ids'])
tf.assert_equal(result_beam['predicted_ids'], results_serializable_beam['predicted_ids'])
tf.assert_equal(results_serializable_top_k_top_p['predicted_ids'], result_top_k_top_p['predicted_ids'])