In [3]:
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForCausalLM

from pprint import pprint

In [4]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
def preprocess(texts: list[str]):
    return [tokenizer(text, return_tensors='tf') for text in texts]

inputs = preprocess(["Hello, how are you?", "I am fine, thank you!"])
inputs




[{'input_ids': <tf.Tensor: shape=(1, 6), dtype=int32, numpy=array([[15496,    11,   703,   389,   345,    30]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(1, 6), dtype=int32, numpy=array([[1, 1, 1, 1, 1, 1]], dtype=int32)>},
 {'input_ids': <tf.Tensor: shape=(1, 7), dtype=int32, numpy=array([[  40,  716, 3734,   11, 5875,  345,    0]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(1, 7), dtype=int32, numpy=array([[1, 1, 1, 1, 1, 1, 1]], dtype=int32)>}]

In [5]:
model = TFAutoModelForCausalLM.from_pretrained("gpt2")
model.config.pad_token_id = model.config.eos_token_id
model

All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


<transformers.models.gpt2.modeling_tf_gpt2.TFGPT2LMHeadModel at 0x722c3418fa30>

In [6]:
generated = model.generate(**inputs[0], do_sample=True, seed=(42, 0))
print("Sampling output: ", tokenizer.decode(generated[0]))



Sampling output:  Hello, how are you?

I am an English teacher. I am the first one to


In [7]:
def get_all_layers(model):
    """
    Recursively retrieve all layers from a model.
    
    Args:
    - model: TensorFlow or Keras model
    
    Returns:
    - List of all layers in the model
    """
    layers = []
    
    # Recursively add layers
    def extract_layers(layer):
        layers.append(layer)
        # If the layer has sublayers, iterate over them
        if hasattr(layer, 'layers'):
            for sublayer in layer.layers:
                extract_layers(sublayer)

    # Start recursion with the input model
    extract_layers(model)
    return layers

get_all_layers(model)

[<transformers.models.gpt2.modeling_tf_gpt2.TFGPT2LMHeadModel at 0x722c3418fa30>,
 <transformers.models.gpt2.modeling_tf_gpt2.TFGPT2MainLayer at 0x722b73507fd0>]

In [8]:
# Function to extract activations from self-attention, feed-forward, and hidden states
def get_transformer_block_activations(model, inputs):
    """
    Function to get intermediate activations from each transformer block, including self-attention and feed-forward activations.
    
    Args:
    - model: Transformer model from Hugging Face (e.g., GPT-2)
    - inputs: Tokenized input (output of tokenizer)
    
    Returns:
    - Dictionary containing activations for self-attention, feed-forward, and hidden states
    """
    print(inputs)
    # Run the model with output_hidden_states and output_attentions to get all intermediate data
    outputs = model(input_ids=inputs['input_ids'], output_hidden_states=True, output_attentions=True)

    # The hidden_states contains the hidden layer outputs, and attentions contains attention scores
    hidden_states = outputs.hidden_states
    attentions = outputs.attentions

    activations = {
        "embedding_layer": hidden_states[0],  # First hidden state is the embedding layer
    }

    # Iterate over the transformer blocks
    for i, (attention, hidden_state) in enumerate(zip(attentions, hidden_states[1:])):
        # Self-attention output (stored in attentions)
        activations[f"block_{i}_self_attention"] = attention
        
        # Feed-forward output (this is typically captured in the hidden states after attention and feed-forward pass)
        activations[f"block_{i}_feed_forward"] = hidden_state

    return activations

# Tokenized inputs
inputs = preprocess(["Hello, how are you?", "I am fine, thank you!"])

# Process each input to get activations inside transformer blocks
for input_data in inputs:
    # Get the intermediate activations (self-attention, feed-forward layers) inside each block
    block_activations = get_transformer_block_activations(model, input_data)
    
    # Display the activations for each component within each transformer block
    for layer_name, activation in block_activations.items():
        print(f"{layer_name}: activation shape {activation.shape}")
        
    break


{'input_ids': <tf.Tensor: shape=(1, 6), dtype=int32, numpy=array([[15496,    11,   703,   389,   345,    30]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(1, 6), dtype=int32, numpy=array([[1, 1, 1, 1, 1, 1]], dtype=int32)>}
embedding_layer: activation shape (1, 6, 768)
block_0_self_attention: activation shape (1, 12, 6, 6)
block_0_feed_forward: activation shape (1, 6, 768)
block_1_self_attention: activation shape (1, 12, 6, 6)
block_1_feed_forward: activation shape (1, 6, 768)
block_2_self_attention: activation shape (1, 12, 6, 6)
block_2_feed_forward: activation shape (1, 6, 768)
block_3_self_attention: activation shape (1, 12, 6, 6)
block_3_feed_forward: activation shape (1, 6, 768)
block_4_self_attention: activation shape (1, 12, 6, 6)
block_4_feed_forward: activation shape (1, 6, 768)
block_5_self_attention: activation shape (1, 12, 6, 6)
block_5_feed_forward: activation shape (1, 6, 768)
block_6_self_attention: activation shape (1, 12, 6, 6)
block_6_feed_forward: activatio