In [1]:
import sys

sys.path.append("/Users/PRVATE/Documents/tf_transformers/src/")

In [5]:
from transformers import TFGPT2Model
from tf_transformers.models import GPT2Encoder

import tensorflow as tf
import json

from tf_transformers.core import LegacyModule
from tf_transformers.utils import convert_gpt2_hf_to_tf_transformers
import os

In [3]:
# Load HF model

# Always do this
tf.keras.backend.clear_session()

local_dir = "/Users/PRVATE/HUggingFace_Models/"
hf_model_name = "gpt2"
if local_dir:
    hf_model_location = local_dir + hf_model_name

model_hf = TFGPT2Model.from_pretrained(hf_model_location)

All model checkpoint layers were used when initializing TFGPT2Model.

All the layers of TFGPT2Model were initialized from the model checkpoint at /Users/PRVATE/HUggingFace_Models/gpt2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2Model for predictions without further training.


In [6]:
# Load tf_transformers model
# Most config we will be providing

# Default configs for the model

model_config_dir = "/Users/PRVATE/Documents/tf_transformers/model_configs/"
model_name = "gpt2_base"
config_location = os.path.join(model_config_dir, model_name, "config.json")
config = json.load(open(config_location))

# Always do this
tf.keras.backend.clear_session()

# tf_transformers Layer (an extension of Keras Layer)
# This is not Keras model, but extension of keras Layer

# Save as saved_model
# If you want to use the model for Auto Regressive tasks ( text-generation ),
# you have to enable pipeline_mode='auto-regressive'.
# Because TF needs extra cache inputs in the saved_model format for doing efficient caching

model_layer = GPT2Encoder(
    config=config,
    name="gpt2",
    mask_mode=config["mask_mode"],
    is_training=False,
)

# Convert to tf.keras.Model
model_tf_transformers = model_layer.get_and_load_model(model_dir=None)
convert_gpt2_hf_to_tf_transformers(model_hf, model_tf_transformers, config)

INFO:absl:We are overwriding `is_training` is False to `is_training`                     to True with `use_dropout` is False, no effects on your inference pipeline
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids_1:0", shape=(None, None), dtype=int32)
INFO:absl:Deleteing huggingface model for saving memory
INFO:absl:Done assigning variables weights . Total 100


In [7]:
# Load model variables from HF to tf_transformers
convert_gpt2_hf_to_tf_transformers(model_hf, model_tf_transformers, config)

INFO:absl:Deleteing huggingface model for saving memory
INFO:absl:Done assigning variables weights . Total 100


In [8]:
# Please have a look at tf_transformers/extra/*.py for reference values

input_ids = tf.constant([[1, 9, 10, 11, 23], [1, 22, 234, 432, 2349]])

input_mask = tf.ones_like(input_ids)
input_type_ids = tf.zeros_like(input_ids)

inputs = {
    "input_ids": input_ids,
}

results_tf_transformers = model_tf_transformers(inputs)
for k, r in results_tf_transformers.items():
    if isinstance(r, list):
        continue
    print(k, "-->", tf.reduce_sum(r), "-->", r.shape)


# For GPT2 Base

# token_embeddings --> tf.Tensor(2371.2751, shape=(), dtype=float32) --> (2, 5, 768)
# token_logits --> tf.Tensor(-34781260.0, shape=(), dtype=float32) --> (2, 5, 50257)
# last_token_logits --> tf.Tensor(-8346980.5, shape=(), dtype=float32) --> (2, 50257)

token_embeddings --> tf.Tensor(2371.2754, shape=(), dtype=float32) --> (2, 5, 768)
token_logits --> tf.Tensor(-34781264.0, shape=(), dtype=float32) --> (2, 5, 50257)
last_token_logits --> tf.Tensor(-8346981.0, shape=(), dtype=float32) --> (2, 50257)


In [9]:
# If you want to save the model as checkpoints

checkpoint = tf.train.Checkpoint(model=model_tf_transformers)
manager = tf.train.CheckpointManager(checkpoint, directory="model_ckpt", max_to_keep=1)
manager.save()
print("Saved at {}".format(manager.latest_checkpoint))

Saved at model_ckpt/ckpt-1


In [10]:
# Save as saved_model
# If you want to use the model for Auto Regressive tasks ( text-generation ),
# you have to enable pipeline_mode='auto-regressive'.
# Because TF needs extra cache inputs in the saved_model format for doing efficient caching

model_layer = GPT2Encoder(
    config=config,
    name="gpt2",
    mask_mode=config["mask_mode"],
    is_training=False,
    pipeline_mode="auto-regressive",
)

# Convert to tf.keras.Model
model_tf_transformers = model_layer.get_and_load_model(model_dir=None)

# And now load the checkpints from previously saved model

checkpoint = tf.train.Checkpoint(model=model_tf_transformers)
manager = tf.train.CheckpointManager(checkpoint, directory="model_ckpt", max_to_keep=1)
status = checkpoint.restore(manager.latest_checkpoint)

# Important
if status.assert_existing_objects_matched():
    print("Model checkpoint matched")

INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids_2:0", shape=(None, None), dtype=int32)
INFO:absl:all_cache_key ---> Tensor("all_cache_key:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:all_cache_value ---> Tensor("all_cache_value:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:past_length ---> Tensor("past_length:0", shape=(1, None), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:input_ids ---> Tensor("input_ids_3:0", shape=(None, None), dtype=int32)
INFO:absl:all_cache_key ---> Tensor("all_cache_key_1:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:all_cache_value ---> Tensor("all_cache_value_1:0", shape=(None, None, 12, None, 64), dtype=float32)
INFO:absl:past_length ---> Tensor("past_length_1:0", shape=(1, None), dtype=int32)



Two checkpoint references resolved to different objects (<tf_transformers.models.gpt2.GPT2Encoder object at 0x14622d1c0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x147ee9be0>).



Two checkpoint references resolved to different objects (<tf_transformers.models.gpt2.GPT2Encoder object at 0x14622d1c0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x147ee9be0>).


Model checkpoint matched


In [11]:
model_tf_transformers.input

{'input_ids': <tf.Tensor 'input_ids_3:0' shape=(None, None) dtype=int32>,
 'all_cache_key': <tf.Tensor 'all_cache_key_1:0' shape=(None, None, 12, None, 64) dtype=float32>,
 'all_cache_value': <tf.Tensor 'all_cache_value_1:0' shape=(None, None, 12, None, 64) dtype=float32>,
 'past_length': <tf.Tensor 'past_length_1:0' shape=(1, None) dtype=int32>}

In [22]:
# So we load the model , now save it to .pb (saved_model)

# The problem with this approach is , TF somewhow changes the names of the output nodes
# We need to preserve it to have consistent TextDecoder class for all models
# So, we have LegacyModule, which will take care of this


# model_tf_transformers.save("model_pb", save_format='tf')

In [12]:
gpt2_module = LegacyModule(model_tf_transformers)
gpt2_module.save("model_pb")

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


INFO:tensorflow:Assets written to: model_pb/assets


INFO:tensorflow:Assets written to: model_pb/assets


In [None]:
# Done :-) . Go to 1_text_generation_gpt2 for how to do text-generation