In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from bertviz import head_view
import torch

# Load the GPT-2 model and tokenizer with attention output enabled
model_name = "gpt2"  # Replace with your model name
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, output_attentions=True)

# Input prompt
prompt = "Explainable AI is important because"
inputs = tokenizer(prompt, return_tensors="pt")

# Generate output with attention
outputs = model(**inputs)

# Check if attention is returned
if hasattr(outputs, 'attentions') and outputs.attentions:
    print(f"Number of layers with attention: {len(outputs.attentions)}")
    
    # Extract attention tensors and ensure they are in the correct format
    attention = outputs.attentions  # List of tensors: (num_layers, batch_size, num_heads, seq_len, seq_len)
    
    # Debug: Print attention tensor dimensions
    for i, layer_attention in enumerate(attention):
        print(f"Layer {i} attention shape: {layer_attention.shape}")
    
    # Remove batch dimension (batch_size=1) and convert to numpy
    formatted_attention = [layer.squeeze(0).detach().numpy() for layer in attention]
    
    # Extract tokens
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
    
    # Debug: Print tokens
    print("Tokens:", tokens)
    
    # Visualize using BertViz
    head_view(formatted_attention, tokens, tokens)
else:
    print("No attention weights were returned. Ensure 'output_attentions=True' is set during model initialization.")


In [None]:
# Increase the cell output height
from IPython.display import Javascript, display
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'''))

try:
  from algebraic_value_editing import widgets, utils
  from transformer_lens import HookedTransformer
  from google.colab import output

  output.enable_custom_widget_manager()

  utils.enable_ipython_reload()

  # Load a model
  MODEL: HookedTransformer = HookedTransformer.from_pretrained(
      model_name="gpt2-small", device="cpu"
  ).to("cuda:0")

  # Create and display the widget
  ui, out = widgets.make_widget(
      MODEL,
      initial_input_text="I'm excited because I'm going to a",
      initial_phrases=(" weddings", ""),
      initial_act_name=16,
      initial_coeff=1.0,
  )
  display(ui, out)

except ModuleNotFoundError as e:
  commit = "ef0818ccde"  # Stable commit on main
  get_ipython().run_line_magic(
      magic_name="pip",
      line=(
          "install -U"
          f" git+https://github.com/montemac/algebraic_value_editing.git@{commit}"
      ),
  )

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Cache to store loaded models
model_cache = {}

def load_model(model_name: str):
    try:
        if model_name in model_cache:
            return {"status": "Model already loaded", "model_name": model_name}
        
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name, output_attentions=True)  # Enable attention outputs
        model_cache[model_name] = {"model": model, "tokenizer": tokenizer}

        return {"status": "Model loaded successfully", "model_name": model_name}
    except Exception as e:
        return {"error": str(e)}

In [None]:
response = load_model("prithivMLmods/Llama-Magpie-3.2-3B-Instruct")
response

In [8]:
import requests
import torch
from bertviz import head_view

payload = {"prompt": "Hello, my name is"}
API_BASE_URL = "http://127.0.0.1:8000"

# st.write(prompt)
attention_response = requests.post(f"{API_BASE_URL}/attention_weights", json=payload)

if attention_response.status_code == 200:
    attention_data = attention_response.json()
    attention = torch.tensor(attention_data["attention"])  # Convert to NumPy array
    tokens = attention_data["tokens"]
    head_view(attention, tokens)

<IPython.core.display.Javascript object>

In [11]:
from transformers import AutoTokenizer, AutoModel, utils
from bertviz import model_view
from bertviz import head_view

utils.logging.set_verbosity_error()  # Suppress standard warnings

model_name = "microsoft/xtremedistil-l12-h384-uncased"  # Find popular HuggingFace models here: https://huggingface.co/models
input_text = "The cat sat on the mat"  
model = AutoModel.from_pretrained(model_name, output_attentions=True)  # Configure model to return attention values
tokenizer = AutoTokenizer.from_pretrained(model_name)
inputs = tokenizer.encode(input_text, return_tensors='pt')  # Tokenize input text
outputs = model(inputs)  # Run model
attention = outputs[-1]  # Retrieve attention from model outputs
tokens = tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings
head_view(attention, tokens)  # Display model view

<IPython.core.display.Javascript object>

In [1]:
from bertviz.transformers_neuron_view import GPT2Model, GPT2Tokenizer
from bertviz.neuron_view import show

model_type = 'gpt2'
model_version = 'gpt2'
model = GPT2Model.from_pretrained(model_version)
tokenizer = GPT2Tokenizer.from_pretrained(model_version)
text = "Machine learning is great for humanity. It helps a lot of people."
show(model, model_type, tokenizer, text)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
from transformers import AutoTokenizer, AutoModel, utils
from bertviz import model_view
from bertviz import head_view

utils.logging.set_verbosity_error()  # Suppress standard warnings

model_name = 'gpt2'
input_text = "Machine learning is great for humanity. It helps a lot of people."
model = AutoModel.from_pretrained(model_name, output_attentions=True)  # Configure model to return attention values
tokenizer = AutoTokenizer.from_pretrained(model_name)
inputs = tokenizer.encode(input_text, return_tensors='pt')  # Tokenize input text
outputs = model(inputs)  # Run model
attention = outputs[-1]  # Retrieve attention from model outputs
tokens = tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings
head_view(attention, tokens)  # Display model view

<IPython.core.display.Javascript object>

In [3]:

# from transformers import AutoModelForCausalLM, AutoTokenizer

# model_path = "styalai/gpt2o-chatbot-02"

# tokenizer = AutoTokenizer.from_pretrained(model_path)
# model = AutoModelForCausalLM.from_pretrained(
#     model_path,
#     device_map="auto",
#     torch_dtype='auto'
# ).eval().to('cpu')

# # Prompt content: "hi"
# messages = [
#     {"role": "user", "content": "hi"}
# ]

# input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt').to('cpu')
# output_ids = model.generate(input_ids,max_new_tokens=100)
# response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)

# # Model response: "Hello! How can I assist you today?"
# print(response)


In [3]:
# from transformers import GPT2LMHeadModel, GPT2Tokenizer
# import torch
# import torch.nn.functional as F

# def load_gpt2_model():
#     """
#     Load GPT-2 model and tokenizer.
#     Returns:
#         model: The GPT-2 model.
#         tokenizer: The GPT-2 tokenizer.
#     """
#     model_name = "gpt2"  # You can specify a different GPT-2 model variant, e.g., "gpt2-medium"
#     tokenizer = GPT2Tokenizer.from_pretrained(model_name)
#     model = GPT2LMHeadModel.from_pretrained(model_name)
    
#     # Set pad_token_id explicitly if needed
#     tokenizer.pad_token = tokenizer.eos_token
#     model.config.pad_token_id = tokenizer.eos_token_id

#     return model, tokenizer

# def generate_text_with_log_probs(prompt, model, tokenizer, max_length=50, temperature=1.0, top_p=0.9):
#     """
#     Generate text using GPT-2 and calculate log probabilities for each token.

#     Args:
#         prompt (str): The input prompt to generate text.
#         model: The loaded GPT-2 model.
#         tokenizer: The GPT-2 tokenizer.
#         max_length (int): Maximum length of the generated sequence.
#         temperature (float): Sampling temperature.
#         top_p (float): Nucleus sampling probability.

#     Returns:
#         generated_text (str): The generated text.
#         log_probs (list): Log probabilities of the generated tokens.
#     """
#     # Tokenize the prompt
#     inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
#     input_ids = inputs["input_ids"]

#     # Generate output
#     outputs = model.generate(
#         input_ids=input_ids,
#         max_length=max_length,
#         temperature=temperature,
#         top_p=top_p,
#         do_sample=True,
#         pad_token_id=tokenizer.eos_token_id,
#         return_dict_in_generate=True,
#         output_scores=True
#     )

#     # Decode the generated tokens
#     generated_ids = outputs.sequences[0]
#     generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True)

#     # Calculate log probabilities for generated tokens
#     log_probs = []
#     for i, logits in enumerate(outputs.scores):
#         # Match logits to the generated token at this step
#         token_id = generated_ids[input_ids.shape[-1] + i]  # Offset by input length
#         probs = F.log_softmax(logits, dim=-1)
#         token_log_prob = probs[0, token_id].item()  # Log probability of the token
#         log_probs.append(token_log_prob)

#     return generated_text, log_probs

# # Load model and tokenizer
# model, tokenizer = load_gpt2_model()

# # Generate text and retrieve log probabilities
# prompt = "Once upon a time"
# generated_text, log_probs = generate_text_with_log_probs(prompt, model, tokenizer)

# # Print results
# print("Generated Text:\n", generated_text)
# print("\nLog Probabilities:\n", log_probs)

In [12]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import torch.nn.functional as F

def load_gpt2_model():
    """
    Load GPT-2 model and tokenizer.
    Returns:
        model: The GPT-2 model.
        tokenizer: The GPT-2 tokenizer.
    """
    model_name = "gpt2"  # You can specify a different GPT-2 model variant, e.g., "gpt2-medium"
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    
    # Set pad_token_id explicitly if needed
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id

    return model, tokenizer

def generate_text_with_log_probs_and_top_tokens(prompt, model, tokenizer, max_length=15, temperature=0.70, top_p=0.2):
    """
    Generate text using GPT-2 and calculate log probabilities for each token.
    Display the top 10 probable tokens for each step.

    Args:
        prompt (str): The input prompt to generate text.
        model: The loaded GPT-2 model.
        tokenizer: The GPT-2 tokenizer.
        max_length (int): Maximum length of the generated sequence.
        temperature (float): Sampling temperature.
        top_p (float): Nucleus sampling probability.

    Returns:
        None: Prints generated text and log probabilities in a formatted way.
    """
    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
    input_ids = inputs["input_ids"]

    # Generate output
    outputs = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        temperature=temperature,
        # top_p=top_p,
        top_k=20,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        return_dict_in_generate=True,
        output_scores=True
    )

    # Decode the generated tokens
    generated_ids = outputs.sequences[0]
    generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True)

    print("Generated Text:")
    print(generated_text)
    print("\nToken-wise Top 10 Log Probabilities:\n")

    # Display log probabilities and top tokens for generated tokens
    for i, logits in enumerate(outputs.scores):
        # Match logits to the generated token at this step
        token_id = generated_ids[input_ids.shape[-1] + i]  # Offset by input length
        probs = F.log_softmax(logits, dim=-1)

        # Get top 10 tokens and their log probabilities
        top_10_probs, top_10_ids = torch.topk(probs[0], 10)
        top_10_tokens = tokenizer.convert_ids_to_tokens(top_10_ids.tolist())

        print(f"Generated Token: {tokenizer.decode([token_id])}")
        print("Top 10 Predictions:")
        for rank, (token, log_prob) in enumerate(zip(top_10_tokens, top_10_probs.tolist()), 1):
            print(f"  {rank}. {token:>15} | Log Prob: {log_prob:.4f}")
        print("-" * 50)

# Load model and tokenizer
model, tokenizer = load_gpt2_model()

# Generate text and display token-wise top 10 log probabilities
prompt = "What comes after night? Answer is: "
generate_text_with_log_probs_and_top_tokens(prompt, model, tokenizer)


Generated Text:
What comes after night? Answer is:  In a sense, I

Token-wise Top 10 Log Probabilities:

Generated Token:  
Top 10 Predictions:
  1.              Âł | Log Prob: -0.1700
  2.        ________ | Log Prob: -3.6334
  3.            ???? | Log Prob: -3.9859
  4.              ~~ | Log Prob: -4.1283
  5.             !!! | Log Prob: -4.1946
  6. ------------------------ | Log Prob: -4.3614
  7. ________________________________________________________________ | Log Prob: -4.5575
  8.           _____ | Log Prob: -4.7585
  9. ________________ | Log Prob: -4.7972
  10.            ____ | Log Prob: -5.0503
--------------------------------------------------
Generated Token: In
Top 10 Predictions:
  1.               " | Log Prob: -1.2575
  2.               I | Log Prob: -1.7287
  3.             The | Log Prob: -1.9908
  4.              It | Log Prob: -2.7946
  5.             You | Log Prob: -2.9372
  6.            What | Log Prob: -3.2961
  7.               A | Log Prob: -3.3078
  8.    

In [16]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import torch.nn.functional as F

def load_gpt2_model():
    """
    Load GPT-2 model and tokenizer.
    Returns:
        model: The GPT-2 model.
        tokenizer: The GPT-2 tokenizer.
    """
    model_name = "gpt2"  # You can specify a different GPT-2 model variant, e.g., "gpt2-medium"
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2LMHeadModel.from_pretrained(model_name)
    
    # Set pad_token_id explicitly if needed
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id

    return model, tokenizer

def generate_text_with_top_log_probs(prompt, model, tokenizer, max_length=20, temperature=1.0, top_p=0.9):
    """
    Generate text using GPT-2 and calculate top 10 log probabilities for each generated word.

    Args:
        prompt (str): The input prompt to generate text.
        model: The loaded GPT-2 model.
        tokenizer: The GPT-2 tokenizer.
        max_length (int): Maximum length of the generated sequence.
        temperature (float): Sampling temperature.
        top_p (float): Nucleus sampling probability.

    Returns:
        generated_text (str): The generated text.
        word_log_probs (list): A list of dictionaries with words and their top 10 log probabilities.
    """
    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
    input_ids = inputs["input_ids"]

    # Generate output
    outputs = model.generate(
        input_ids=input_ids,
        max_length=max_length,
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        return_dict_in_generate=True,
        output_scores=True
    )

    # Decode the generated tokens
    generated_ids = outputs.sequences[0]
    generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True)

    # Calculate top log probabilities for generated tokens
    word_log_probs = []
    for i, logits in enumerate(outputs.scores):
        # Match logits to the generated token at this step
        token_id = generated_ids[input_ids.shape[-1] + i]
        token_word = tokenizer.decode([token_id]).strip()

        # Calculate log probabilities for the current token
        probs = F.log_softmax(logits, dim=-1)
        top_k_probs, top_k_ids = torch.topk(probs, k=10, dim=-1)

        # Decode top tokens
        top_words = tokenizer.batch_decode(top_k_ids[0], skip_special_tokens=True)
        top_log_probs = top_k_probs[0].tolist()

        # Append to the list
        word_log_probs.append({
            "word": token_word,
            "top_10_words": [{"word": top_words[j], "log_prob": top_log_probs[j]} for j in range(10)]
        })

    return generated_text, word_log_probs

# Load model and tokenizer
model, tokenizer = load_gpt2_model()

# Generate text and retrieve top log probabilities
# prompt = "Once upon a time"
prompt = "What comes after night? Answer is: "

generated_text, word_log_probs = generate_text_with_top_log_probs(prompt, model, tokenizer)

# Display results in a structured manner
print(f"Generated Text: {generated_text}\n")
print("Log Probabilities for Each Word:\n")
for item in word_log_probs:
    print(f"Word: {item['word']}")
    print("Top 10 Predictions:")
    for entry in item["top_10_words"]:
        print(f"  {entry['word']}: {entry['log_prob']:.4f}")
    print("\n")


Generated Text: What comes after night? Answer is:  I'm not sure if I need to tell you

Log Probabilities for Each Word:

Word: 
Top 10 Predictions:
   : -0.5683
  ________: -2.9926
  ????: -3.2394
  ~~: -3.3391
  !!!: -3.3855
  ------------------------: -3.5022
  ________________________________________________________________: -3.6395
  _____: -3.7803
  ________________: -3.8073
  ____: -3.9845


Word: I
Top 10 Predictions:
  ": -1.8401
  I: -2.1700
  The: -2.3534
  It: -2.9161
  You: -3.0159
  What: -3.2671
  A: -3.2753
  No: -3.3235
  Yes: -3.6431
  We: -3.6889


Word: 'm
Top 10 Predictions:
  'm: -1.8763
   am: -2.2267
   don: -2.2479
  'll: -2.5389
   have: -2.5420
   will: -2.6218
  've: -3.1164
   can: -3.1612
   think: -3.3843
   want: -3.4698


Word: not
Top 10 Predictions:
   not: -1.4735
   going: -1.5097
   a: -2.9440
   in: -3.2188
   sure: -3.2196
   still: -3.2373
   gonna: -3.3709
   sorry: -3.4021
   tired: -3.7393
   just: -3.8979


Word: sure
Top 10 Predictions:
   