In [1]:
import os
import json
import tensorflow as tf
from tf_transformers.models import ROBERTAEncoder, RobertaModel, AlbertModel


In [2]:
# Default configs for the model
model_config_dir = '/Users/PRVATE/Documents/tf_transformers/src/tf_transformers/models/model_configs/'

model_name = 'roberta_base'
config_location = os.path.join(model_config_dir, model_name, 'config.json')
config = json.load(open(config_location))
# config['num_hidden_layers'] = 1
# Always do this


# tf_transformers Layer (an extension of Keras Layer)
# This is not Keras model, but extension of keras Layer

encoder_layer = ROBERTAEncoder(config=config,
                      name='roberta',
                      mask_mode=config['mask_mode'],
                      is_training=True,
                      use_dropout=False,
                      cross_attention_inside_encoder=True,
                      )
model = encoder_layer.get_model()

INFO:absl:Initialized Variables


In [3]:
len(model.variables)

317

In [4]:
model_layer, model, config = RobertaModel(model_name='roberta_base',
                      is_training=False,
                      cross_attention_inside_encoder=True,
                      )

INFO:absl:We are overwriding `is_training` is False to `is_training` to True with `use_dropout` is False, no effects on your inference pipeline
INFO:absl:Initialized Variables


In [2]:
model_layer, model, config = AlbertModel(model_name='albert-base-v2',
                      is_training=False,
                      cross_attention_inside_encoder=True,
                      )

INFO:absl:We are overwriding `is_training` is False to `is_training` to True with `use_dropout` is False, no effects on your inference pipeline
INFO:absl:Initialized Variables


Token embeddings projected Tensor("tf_transformers/albert/lower_embedding_projection/mul_3:0", shape=(None, None, 128), dtype=float32)
Token embeddings projected Tensor("tf_transformers/albert/lower_embedding_projection/mul_7:0", shape=(None, None, 128), dtype=float32)


In [9]:
model_logits = tf.random.uniform((5, 768))

In [10]:
tf.argmax(model_logits, axis=1)

<tf.Tensor: shape=(5,), dtype=int64, numpy=array([748, 310, 292, 306, 735])>

In [11]:
tf.random.categorical(model_logits, num_samples=1)

<tf.Tensor: shape=(5, 1), dtype=int64, numpy=
array([[412],
       [714],
       [341],
       [311],
       [255]])>

In [13]:
tf.nn.top_k(model_logits, k=2)

TopKV2(values=<tf.Tensor: shape=(5, 2), dtype=float32, numpy=
array([[0.999079  , 0.99765325],
       [0.9981979 , 0.9981326 ],
       [0.9992883 , 0.9977151 ],
       [0.9998293 , 0.9983845 ],
       [0.99699914, 0.9968606 ]], dtype=float32)>, indices=<tf.Tensor: shape=(5, 2), dtype=int32, numpy=
array([[748, 484],
       [310, 213],
       [292, 592],
       [306, 241],
       [735, 623]], dtype=int32)>)

In [4]:
# Default configs for the model
model_config_dir = '/Users/PRVATE/Documents/tf_transformers/src/tf_transformers/models/model_configs/'

model_name = 'roberta_base'
config_location = os.path.join(model_config_dir, model_name, 'config.json')
config = json.load(open(config_location))
# config['num_hidden_layers'] = 1
# Always do this


# tf_transformers Layer (an extension of Keras Layer)
# This is not Keras model, but extension of keras Layer

encoder_layer = ROBERTAEncoder(config=config,
                      name='roberta',
                      mask_mode=config['mask_mode'],
                      is_training=False,
                      cross_attention_inside_encoder=True,
                      )
model_test = encoder_layer.get_model()

INFO:absl:Initialized Variables


In [5]:
len(model_test.variables)

317

In [6]:
model_test.input

{'encoder_input_ids': <tf.Tensor 'encoder_input_ids_1:0' shape=(None, None) dtype=int32>,
 'decoder_input_ids': <tf.Tensor 'decoder_input_ids_1:0' shape=(None, None) dtype=int32>,
 'encoder_input_type_ids': <tf.Tensor 'encoder_input_type_ids_1:0' shape=(None, None) dtype=int32>,
 'decoder_input_type_ids': <tf.Tensor 'decoder_input_type_ids_1:0' shape=(None, None) dtype=int32>,
 'encoder_input_mask': <tf.Tensor 'encoder_input_mask_1:0' shape=(None, None) dtype=int32>,
 'decoder_all_cache_key': <tf.Tensor 'decoder_all_cache_key_1:0' shape=(None, None, 12, None, 64) dtype=float32>,
 'decoder_all_cache_value': <tf.Tensor 'decoder_all_cache_value_1:0' shape=(None, None, 12, None, 64) dtype=float32>,
 'encoder_hidden_states': <tf.Tensor 'encoder_hidden_states_1:0' shape=(None, None, 768) dtype=float32>}

In [7]:
model_test.model_config

{'decoder': {'initializer': {'class_name': 'TruncatedNormal',
   'config': {'mean': 0.0, 'stddev': 0.02, 'seed': None}},
  'is_training': False,
  'use_dropout': False,
  'batch_size': None,
  'sequence_length': None,
  'name': 'tf_transformers/roberta',
  'use_type_embeddings': True,
  'use_positonal_embeddings': True,
  'is_decoder': False,
  'share_encoder_embeddings': False,
  'share_attention_layers': True,
  'cross_attention_inside_encoder': True,
  'attention_probs_dropout_prob': 0.1,
  'hidden_act': 'gelu',
  'intermediate_act': 'gelu',
  'hidden_dropout_prob': 0.1,
  'embedding_size': 768,
  'initializer_range': 0.02,
  'intermediate_size': 3072,
  'max_position_embeddings': 512,
  'num_attention_heads': 12,
  'num_hidden_layers': 12,
  'type_vocab_size': 1,
  'vocab_size': 50265,
  'layer_norm_epsilon': 1e-05,
  'mask_mode': 'user_defined'}}

In [8]:
import datasets
data = datasets.load_from_disk("/Users/PRVATE/HUggingFace_Models/dataset/cnn_dailymail/")

In [9]:
from transformers import RobertaTokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

train_data = data["train"].select(range(100))

In [10]:
from tf_transformers.data import pad_dataset
encoder_max_length=512
decoder_max_length=64
batch_size = 2

@pad_dataset
def process_data_to_model_inputs(text_list):
    
    # Tokenizer will automatically set [BOS] <text> [EOS]  
    inputs = tokenizer(text_list, padding="max_length", truncation=True, max_length=encoder_max_length)
    tokenized = {}
    tokenized["encoder_input_ids"] = inputs.input_ids                                                               
    tokenized["encoder_input_mask"] = inputs.attention_mask  
    tokenized["encoder_input_type_ids"] = tf.zeros_like(inputs.attention_mask).numpy()

    return tokenized  

In [11]:
sample_data = train_data.select(range(2))
article_list = [item['article'] for item in list(sample_data)]
encoded_inputs = process_data_to_model_inputs(article_list)

In [12]:
encoded_inputs['decoder_input_ids'] = tf.cast(tf.ones((2, 1)) * 0, tf.int32)
encoded_inputs['decoder_input_type_ids'] = tf.cast(tf.ones((2, 1)) * 0, tf.int32)

all_ids = []
all_probs = []
for i in range(10):
    result = model(encoded_inputs)
    best_id = tf.cast(tf.argmax(result['last_token_logits'], axis=1), tf.int32)
    best_prob = tf.reduce_max(result['last_token_logits'], axis=1)
    encoded_inputs['decoder_input_ids'] = tf.concat([encoded_inputs['decoder_input_ids'], tf.expand_dims(best_id, axis=1)], axis=1)
    encoded_inputs['decoder_input_type_ids'] = tf.concat([encoded_inputs['decoder_input_type_ids'], tf.cast(tf.expand_dims([0,0], axis=1), tf.int32)], axis=1)
    all_ids.append(best_id)
    all_probs.append(best_prob)
    print("Last token logits", tf.reduce_sum(result['last_token_logits'], axis=1))
    

Last token logits tf.Tensor([-45.611828 -41.32108 ], shape=(2,), dtype=float32)
Last token logits tf.Tensor([2.768261 5.879841], shape=(2,), dtype=float32)
Last token logits tf.Tensor([-4.442688  -2.2368774], shape=(2,), dtype=float32)
Last token logits tf.Tensor([-34.530613 -30.754063], shape=(2,), dtype=float32)
Last token logits tf.Tensor([-9.683073 -7.421894], shape=(2,), dtype=float32)
Last token logits tf.Tensor([-15.290382 -12.106243], shape=(2,), dtype=float32)
Last token logits tf.Tensor([-50.454346 -46.79603 ], shape=(2,), dtype=float32)
Last token logits tf.Tensor([-41.44793 -38.67273], shape=(2,), dtype=float32)
Last token logits tf.Tensor([-4.178528  -2.7922554], shape=(2,), dtype=float32)
Last token logits tf.Tensor([-44.15262  -41.173218], shape=(2,), dtype=float32)


In [13]:
# encoder_hidden_states tf.Tensor(-0.00020980835, shape=(), dtype=float32)

all_ids

[<tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>,
 <tf.Tensor: shape=(2,), dtype=int32, numpy=array([0, 0], dtype=int32)>]

In [14]:
all_probs

[<tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.7400916, 2.6976557], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.0687551, 3.0307393], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.1849747, 3.1487033], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.241108 , 3.2023928], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.195044, 3.158273], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.123019 , 3.0789971], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.0359914, 2.99756  ], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.0729923, 3.0340953], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.2192988, 3.1752768], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.1816077, 3.1537046], dtype=float32)>]

In [15]:
model.save_checkpoint("dummy", overwrite=True)
model_test.load_checkpoint("dummy")

INFO:absl:Saved model at dummy/ckpt-1



Two checkpoint references resolved to different objects (<tf_transformers.models.roberta.ROBERTAEncoder object at 0x109325760> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x13c590580>).



Two checkpoint references resolved to different objects (<tf_transformers.models.roberta.ROBERTAEncoder object at 0x109325760> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x13c590580>).
INFO:absl:Succesful: Model checkpoints matched


In [19]:
encoded_inputs = process_data_to_model_inputs(article_list)
#from tf_transformers.text import TextDecoderSeq2Seq
decoder = TextDecoderSeq2Seq(
    model = model_test,
    decoder_start_token_id = 0,
    decoder_input_type_ids = 0
)
decoder_results = decoder.decode(encoded_inputs, 
               mode='greedy', 
               max_iterations=10, 
               eos_id=2)

# Encoder hidden states tf.Tensor(-0.00036621094, shape=(), dtype=float32)
# Decoder hidden states tf.Tensor(7.6293945e-06, shape=(), dtype=float32)


0 tf.Tensor([2.7400916 2.6976557], shape=(2,), dtype=float32)
1 tf.Tensor([3.0687551 3.0307395], shape=(2,), dtype=float32)
2 tf.Tensor([3.1849747 3.148703 ], shape=(2,), dtype=float32)
3 tf.Tensor([3.241109  3.2023926], shape=(2,), dtype=float32)
4 tf.Tensor([3.1950445 3.1582716], shape=(2,), dtype=float32)
5 tf.Tensor([3.1230192 3.0789976], shape=(2,), dtype=float32)
6 tf.Tensor([3.0359912 2.9975595], shape=(2,), dtype=float32)
7 tf.Tensor([3.0729923 3.034095 ], shape=(2,), dtype=float32)
8 tf.Tensor([3.2192986 3.1752765], shape=(2,), dtype=float32)
9 tf.Tensor([3.181608  3.1537042], shape=(2,), dtype=float32)


In [17]:
decoder_results['predicted_ids']

<tf.Tensor: shape=(2, 1, 10), dtype=int64, numpy=
array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]])>

In [20]:
model_test.save_as_serialize_module("temp")









Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


INFO:tensorflow:Assets written to: temp/assets


INFO:tensorflow:Assets written to: temp/assets
