In [1]:
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
import tensorflow as tf
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM



In [2]:
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = TFT5ForConditionalGeneration.from_pretrained(model_name)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [3]:
def generate_beam(model, tokenizer, input_text, num_beams=5, max_length=50):
    input_ids = tokenizer.encode(input_text, return_tensors="tf")

    # Set an initial token for the decoder input
    initial_decoder_input_ids = tf.constant([[tokenizer.pad_token_id]])

    beam_output = model.generate(input_ids, decoder_input_ids=initial_decoder_input_ids, max_length=max_length, num_beams=num_beams, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=1.0)

    # Decode the beam search output
    decoded_sequences = tokenizer.batch_decode(beam_output.numpy(), skip_special_tokens=True)

    # Access logits from the model output
    logits = model(input_ids, decoder_input_ids=initial_decoder_input_ids).logits

    # Calculate probabilities
    probabilities = tf.nn.softmax(logits, axis=-1)

    # Print each translation and its probability
    for sequence, probability in zip(decoded_sequences, probabilities):
        print(f"Translation: {sequence}, Probability: {probability.numpy().max():.4f}")


In [4]:
input_text = "hello my name is ammar"
generate_beam(model, tokenizer, input_text, num_beams=3, max_length=len(input_text.split()))


2024-03-03 20:55:37.714327: E ./tensorflow/core/kernels/cast_op_impl.h:64] IMPORTANT! The input tensor to Cast contains values out of range for the target type. This is undefined behavior and likely a bug in your model. A crash immediately after this under ubsan is expected.
2024-03-03 20:55:37.789974: E ./tensorflow/core/kernels/cast_op_impl.h:64] IMPORTANT! The input tensor to Cast contains values out of range for the target type. This is undefined behavior and likely a bug in your model. A crash immediately after this under ubsan is expected.
2024-03-03 20:55:37.917122: E ./tensorflow/core/kernels/cast_op_impl.h:64] IMPORTANT! The input tensor to Cast contains values out of range for the target type. This is undefined behavior and likely a bug in your model. A crash immediately after this under ubsan is expected.
2024-03-03 20:55:37.927121: I external/local_xla/xla/service/service.cc:168] XLA service 0x2b495c870 initialized for platform Host (this does not guarantee that XLA will be

Translation: hello hello my name, Probability: 0.5881


2024-03-03 20:55:38.156548: E ./tensorflow/core/kernels/cast_op_impl.h:64] IMPORTANT! The input tensor to Cast contains values out of range for the target type. This is undefined behavior and likely a bug in your model. A crash immediately after this under ubsan is expected.
2024-03-03 20:55:38.253404: E ./tensorflow/core/kernels/cast_op_impl.h:64] IMPORTANT! The input tensor to Cast contains values out of range for the target type. This is undefined behavior and likely a bug in your model. A crash immediately after this under ubsan is expected.
2024-03-03 20:55:38.307290: E ./tensorflow/core/kernels/cast_op_impl.h:64] IMPORTANT! The input tensor to Cast contains values out of range for the target type. This is undefined behavior and likely a bug in your model. A crash immediately after this under ubsan is expected.


In [32]:
model.summary()

Model: "tft5_for_conditional_generation_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 shared (Embedding)          multiple                  16449536  
                                                                 
 encoder (TFT5MainLayer)     multiple                  35330816  
                                                                 
 decoder (TFT5MainLayer)     multiple                  41625344  
                                                                 
Total params: 60506624 (230.81 MB)
Trainable params: 60506624 (230.81 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [130]:
for i, block in enumerate(model.decoder.block):
    print("Block", str(i) + ":")
    for j, layer in enumerate(block.layer):
        print(f"Layer {j}:{layer}")
    print("\n")


Block 0:
Layer 0:<transformers.models.t5.modeling_tf_t5.TFT5LayerSelfAttention object at 0x3100e66d0>
Layer 1:<transformers.models.t5.modeling_tf_t5.TFT5LayerCrossAttention object at 0x30b987fd0>
Layer 2:<transformers.models.t5.modeling_tf_t5.TFT5LayerFF object at 0x338389410>


Block 1:
Layer 0:<transformers.models.t5.modeling_tf_t5.TFT5LayerSelfAttention object at 0x30b90d490>
Layer 1:<transformers.models.t5.modeling_tf_t5.TFT5LayerCrossAttention object at 0x33546a1d0>
Layer 2:<transformers.models.t5.modeling_tf_t5.TFT5LayerFF object at 0x30b925d50>


Block 2:
Layer 0:<transformers.models.t5.modeling_tf_t5.TFT5LayerSelfAttention object at 0x30b929a90>
Layer 1:<transformers.models.t5.modeling_tf_t5.TFT5LayerCrossAttention object at 0x30b92ddd0>
Layer 2:<transformers.models.t5.modeling_tf_t5.TFT5LayerFF object at 0x30b936350>


Block 3:
Layer 0:<transformers.models.t5.modeling_tf_t5.TFT5LayerSelfAttention object at 0x30b93dfd0>
Layer 1:<transformers.models.t5.modeling_tf_t5.TFT5LayerCr