In [17]:
from transformers import TFT5Model, T5Tokenizer
from tf_transformers.models import T5Model
import tensorflow as tf
import json
import os

from tf_transformers.utils import convert_t5_hf_to_tf_transformers


In [18]:
# Load HF model

# Always do this
tf.keras.backend.clear_session()

model_hf_location = '/Users/PRVATE/HUggingFace_Models/t5-small/'
model_hf = TFT5Model.from_pretrained(model_hf_location)
tokenizer = T5Tokenizer.from_pretrained('t5-small')

All model checkpoint layers were used when initializing TFT5Model.

All the layers of TFT5Model were initialized from the model checkpoint at /Users/PRVATE/HUggingFace_Models/t5-small/.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5Model for predictions without further training.


In [15]:
model_hf.config

T5Config {
  "_name_or_path": "/Users/PRVATE/HUggingFace_Models/t5-small/",
  "architectures": [
    "T5WithLMHeadModel"
  ],
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 6,
  "num_heads": 8,
  "num_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_length": 30,
      "no_repeat_ngram_size": 3,
      "num_beams": 4,
      "prefix": "summarize: "
    },
    "translation_en_to_de": {
      "early_stopping": true,
      "max_length": 300,
      "num_beams": 4,
      "prefix": "translate English to German: "
    },
    "translation_en_to_fr

In [20]:
model_layer, model, config = T5Model(model_name='t5-small')

INFO:absl:Initialized Variables
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:encoder_input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:encoder_input_mask ---> Tensor("input_mask:0", shape=(None, None), dtype=int32)
INFO:absl:decoder_input_ids ---> Tensor("decoder_input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:encoder_input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:encoder_input_mask ---> Tensor("input_mask:0", shape=(None, None), dtype=int32)
INFO:absl:decoder_input_ids ---> Tensor("decoder_input_ids:0", shape=(None, None), dtype=int32)


In [21]:
len(model.variables)

132

In [22]:
convert_t5_hf_to_tf_transformers(model_hf, model, config['encoder'])

INFO:absl:Deleteing huggingface model for saving memory
INFO:absl:Done assigning ENCODER variables weights 51
INFO:absl:Deleteing huggingface model for saving memory
INFO:absl:Done assigning DECODER variables weights 81


In [23]:
inputs_sample = {'encoder_input_ids': tf.constant([[8774,    6,   82, 1782,   19, 5295]]), 
                 'encoder_input_mask': tf.constant([[1, 1, 1, 1, 1, 1]]), 
                 'decoder_input_ids': tf.constant([[8774,    6,   82, 1782,   19, 5295]])}

res = model(inputs_sample)
for k, v in res.items():
    print(k, tf.reduce_sum(v))
    
# Reference 

# token_embeddings tf.Tensor(-125.860725, shape=(), dtype=float32)
# all_layer_token_embeddings tf.Tensor(-192280.42, shape=(), dtype=float32)
# token_logits tf.Tensor(-103934104.0, shape=(), dtype=float32)
# last_token_logits tf.Tensor(-19607434.0, shape=(), dtype=float32)

token_embeddings tf.Tensor(-125.860725, shape=(), dtype=float32)
all_layer_token_embeddings tf.Tensor(-192280.42, shape=(), dtype=float32)
token_logits tf.Tensor(-103934104.0, shape=(), dtype=float32)
last_token_logits tf.Tensor(-19607434.0, shape=(), dtype=float32)


In [24]:
model.save_checkpoint("/Users/PRVATE/LegacyAI_models/t5-small/", overwrite=True)

INFO:absl:Saved model at /Users/PRVATE/LegacyAI_models/t5-small/ckpt-1


In [25]:
# Lets do a simple decode

# Greedy decoding ( Train mode)

# -3.5792198 2739 tf.Tensor(-12332620.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739]], shape=(1, 2), dtype=int32)
# -----------------
# 39.560272 4445 tf.Tensor(-14821873.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739 4445]], shape=(1, 3), dtype=int32)
# -----------------
# -31.229576 436 tf.Tensor(-14894206.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739 4445  436]], shape=(1, 4), dtype=int32)
# -----------------
# -67.342186 292 tf.Tensor(-16596788.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739 4445  436  292]], shape=(1, 5), dtype=int32)
# -----------------
# -6.252556 58 tf.Tensor(-15607945.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739 4445  436  292   58]], shape=(1, 6), dtype=int32)
# -----------------
# -118.80247 1 tf.Tensor(-18228060.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739 4445  436  292   58    1]], shape=(1, 7), dtype=int32)
# -----------------
# -63.030956 1 tf.Tensor(-12004529.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739 4445  436  292   58    1    1]], shape=(1, 8), dtype=int32)
# -----------------
# -16.752626 10 tf.Tensor(-9946732.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739 4445  436  292   58    1    1   10]], shape=(1, 9), dtype=int32)
# -----------------
# -31.18003 3 tf.Tensor(-10146046.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739 4445  436  292   58    1    1   10    3]], shape=(1, 10), dtype=int32)
# -----------------
# 1.3379288 2 tf.Tensor(-11181468.0, shape=(), dtype=float32)
# tf.Tensor([[   0 2739 4445  436  292   58    1    1   10    3    2]], shape=(1, 11), dtype=int32)

encoder_input_ids   = tf.constant([[13959,  1566,    12,  2968,    10,   571,   625,    33,    25,
           58]])
encoder_input_mask  = tf.constant([[1, 1, 1, 1, 1, 1, 1, 1 ,1 ,1]])
decoder_input_ids   = tf.constant([[0]])
encoder_hidden_states = tf.zeros((1, 10, 512))

encoder_decoder_inputs = {'encoder_input_ids': encoder_input_ids, 
                          'encoder_input_mask': encoder_input_mask, 
                          'decoder_input_ids': decoder_input_ids
                         }
best_res = []

for i in range(10):


    results = model(encoder_decoder_inputs)
    best_prob, best_index = tf.nn.top_k(results['last_token_logits'][0], k=1)
    # Concataning with previous
    new_dec_ids = tf.concat([encoder_decoder_inputs['decoder_input_ids'], [best_index]], axis=1)
    encoder_decoder_inputs['decoder_input_ids'] = new_dec_ids
    print(best_prob[0].numpy() , best_index[0].numpy(), tf.reduce_sum(results['last_token_logits']))
    best_res.append((best_prob[0].numpy(), best_index[0].numpy()))
    print(new_dec_ids)
    print('-----------------')

-3.5792198 2739 tf.Tensor(-12332620.0, shape=(), dtype=float32)
tf.Tensor([[   0 2739]], shape=(1, 2), dtype=int32)
-----------------
39.560272 4445 tf.Tensor(-14821873.0, shape=(), dtype=float32)
tf.Tensor([[   0 2739 4445]], shape=(1, 3), dtype=int32)
-----------------
-31.229576 436 tf.Tensor(-14894206.0, shape=(), dtype=float32)
tf.Tensor([[   0 2739 4445  436]], shape=(1, 4), dtype=int32)
-----------------
-67.342186 292 tf.Tensor(-16596788.0, shape=(), dtype=float32)
tf.Tensor([[   0 2739 4445  436  292]], shape=(1, 5), dtype=int32)
-----------------
-6.252556 58 tf.Tensor(-15607945.0, shape=(), dtype=float32)
tf.Tensor([[   0 2739 4445  436  292   58]], shape=(1, 6), dtype=int32)
-----------------
-118.80247 1 tf.Tensor(-18228060.0, shape=(), dtype=float32)
tf.Tensor([[   0 2739 4445  436  292   58    1]], shape=(1, 7), dtype=int32)
-----------------
-63.030956 1 tf.Tensor(-12004529.0, shape=(), dtype=float32)
tf.Tensor([[   0 2739 4445  436  292   58    1    1]], shape=(1, 8), 

In [26]:
tokenizer.decode(encoder_decoder_inputs['encoder_input_ids'][0])

'translate English to German: How old are you?'

In [28]:
tokenizer.decode([   0, 2739, 4445,  436,  292,   58,    1])

'<pad> Wie alt sind Sie?</s>'

In [10]:
model_layer , model, config = T5Model(model_name='t5-small',
                                      is_training=False,
                                      pipeline_mode='auto-regressive')

INFO:absl:We are overwriding `is_training` is False to `is_training` to True with `use_dropout` is False, no effects on your inference pipeline
INFO:absl:Initialized Variables
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:encoder_input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:encoder_input_mask ---> Tensor("input_mask:0", shape=(None, None), dtype=int32)
INFO:absl:decoder_input_ids ---> Tensor("decoder_input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:decoder_all_cache_key ---> Tensor("all_cache_key:0", shape=(None, None, 8, None, 64), dtype=float32)
INFO:absl:decoder_all_cache_value ---> Tensor("all_cache_value:0", shape=(None, None, 8, None, 64), dtype=float32)
INFO:absl:encoder_hidden_states ---> Tensor("encoder_hidden_states:0", shape=(None, None, 512), dtype=float32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:encoder_input_ids ---> Tensor("input_ids:0", shape=(None, None), dtype=int32)
INFO:absl:encoder_i

In [11]:
model.load_checkpoint("/Users/PRVATE/LegacyAI_models/t5-small/")


Two checkpoint references resolved to different objects (<tf_transformers.models.encoder_decoder.EncoderDecoder object at 0x14e38dbe0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x14f057850>).



Two checkpoint references resolved to different objects (<tf_transformers.models.encoder_decoder.EncoderDecoder object at 0x14e38dbe0> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x14f057850>).
INFO:absl:Succesful: Model checkpoints matched


In [13]:
# Greedy decoding Using (AR pipeline caching)

encoder_input_ids   = tf.constant([[13959,  1566,    12,  2968,    10,   571,   625,    33,    25,
           58]])
encoder_input_mask  = tf.constant([[1, 1, 1, 1, 1, 1, 1, 1 ,1 ,1]])
decoder_input_ids   = tf.constant([[0]])
encoder_hidden_states = tf.zeros((1, 10, config['encoder']['embedding_size']))


batch_size = 1
sequence_length = 1
# (self.num_hidden_layers, batch_size, self.num_attention_heads, sequence_length, self.embedding_size//self.num_attention_heads)
all_cache_key  =  tf.zeros((config['encoder']['num_hidden_layers'],
                            batch_size, 
                            config['encoder']['num_attention_heads'], 
                            sequence_length, config['encoder']['embedding_size']//config['encoder']['num_attention_heads']))


all_cache_value = tf.zeros_like(all_cache_key)

encoder_decoder_inputs = {'encoder_input_ids': encoder_input_ids, 
                          'encoder_input_mask': encoder_input_mask, 
                          'decoder_input_ids': decoder_input_ids,
                          'encoder_hidden_states': encoder_hidden_states,
                          'decoder_all_cache_key': all_cache_key,
                          'decoder_all_cache_value': all_cache_value
                         }

best_res = []
for i in range(10):


    results = model(encoder_decoder_inputs)
    best_prob, best_index = tf.nn.top_k(results['last_token_logits'][0], k=1)
    
    # Concataning with previous
    new_dec_ids = tf.expand_dims(best_index, axis=1)
    encoder_decoder_inputs['decoder_input_ids'] = new_dec_ids
    encoder_decoder_inputs['decoder_all_cache_key'] = results['decoder_all_cache_key']
    encoder_decoder_inputs['decoder_all_cache_value'] = results['decoder_all_cache_value']
    encoder_decoder_inputs['encoder_hidden_states'] = results['encoder_hidden_states']
    print(best_prob[0].numpy() , best_index[0].numpy())
    best_res.append((best_prob[0].numpy() , best_index[0].numpy()))
    print(new_dec_ids)
    print('-----------------')
    

-3.5792198 2739
tf.Tensor([[2739]], shape=(1, 1), dtype=int32)
-----------------
39.560226 4445
tf.Tensor([[4445]], shape=(1, 1), dtype=int32)
-----------------
-31.229538 436
tf.Tensor([[436]], shape=(1, 1), dtype=int32)
-----------------
-67.3422 292
tf.Tensor([[292]], shape=(1, 1), dtype=int32)
-----------------
-6.25251 58
tf.Tensor([[58]], shape=(1, 1), dtype=int32)
-----------------
-118.80236 1
tf.Tensor([[1]], shape=(1, 1), dtype=int32)
-----------------
-63.030945 1
tf.Tensor([[1]], shape=(1, 1), dtype=int32)
-----------------
-16.752628 10
tf.Tensor([[10]], shape=(1, 1), dtype=int32)
-----------------
-31.180037 3
tf.Tensor([[3]], shape=(1, 1), dtype=int32)
-----------------
1.3379478 2
tf.Tensor([[2]], shape=(1, 1), dtype=int32)
-----------------


In [37]:
# Train mode (Keras Layer/Legacy Layer)
tf.keras.backend.clear_session()
enc_dec_model = EncoderDecoder(encoder=encoder_layer,
                  decoder=decoder_layer,
                  encoder_sequence_length=sequence_length,
                  is_training=True, 
                  name='t5_small', 
                  use_dropout=False
                  )

enc_dec_model = enc_dec_model.get_model()

INFO:absl:Inputs -->
INFO:absl:encoder_input_ids ---> Tensor("input_ids:0", shape=(1, 128), dtype=int32)
INFO:absl:encoder_input_mask ---> Tensor("input_mask:0", shape=(1, 128), dtype=int32)
INFO:absl:decoder_input_ids ---> Tensor("decoder_input_ids:0", shape=(1, 128), dtype=int32)
INFO:absl:Initialized Variables
INFO:absl:Inputs -->
INFO:absl:encoder_input_ids ---> Tensor("input_ids:0", shape=(1, 128), dtype=int32)
INFO:absl:encoder_input_mask ---> Tensor("input_mask:0", shape=(1, 128), dtype=int32)
INFO:absl:decoder_input_ids ---> Tensor("decoder_input_ids:0", shape=(1, 128), dtype=int32)


In [38]:
enc_dec_model.inputs

[<tf.Tensor 'decoder_input_ids:0' shape=(1, 128) dtype=int32>,
 <tf.Tensor 'input_ids:0' shape=(1, 128) dtype=int32>,
 <tf.Tensor 'input_mask:0' shape=(1, 128) dtype=int32>]

In [39]:
enc_dec_model.save("tmp_pb", save_format='tf')

INFO:tensorflow:Assets written to: tmp_pb/assets


INFO:tensorflow:Assets written to: tmp_pb/assets


In [40]:
model_dir = 'tmp_pb'
converter = tf.lite.TFLiteConverter.from_saved_model(model_dir) # path to the SavedModel directory
# converter.experimental_new_converter = False
converter.experimental_new_converter = True

In [41]:
tflite_model = converter.convert()

INFO:absl:Using experimental converter: If you encountered a problem please file a bug. You can opt-out by setting experimental_new_converter=False


In [42]:
!rm -r tmp_pb