In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import numpy as np
from scipy.stats import entropy

# Check for CUDA availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load a model - using a smaller one for local experimentation
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

# Make sure we're in evaluation mode (not training)
model.eval()

def generate_with_entropy_tracking(prompt, model, tokenizer, max_new_tokens=50):
    """
    Generate text while tracking the entropy at each token position.
    
    Returns:
    - generated_text: the complete generated response
    - token_entropies: list of entropy values, one per generated token
    - tokens: list of the actual tokens generated
    - top_probs: for each position, the probabilities of the top 5 most likely tokens
    """
    
    # Encode the prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    
    # We'll store entropy values and tokens as we generate
    token_entropies = []
    generated_tokens = []
    top_probs_per_position = []
    
    # Generate token by token so we can inspect each distribution
    with torch.no_grad():  # We don't need gradients for generation
        for _ in range(max_new_tokens):
            # Get the model's output for the current sequence
            outputs = model(input_ids)
            
            # The logits are the raw scores before softmax
            # We only care about the last position (what comes next)
            next_token_logits = outputs.logits[:, -1, :]
            
            # Convert logits to probabilities using softmax
            # This gives us a proper probability distribution over the vocabulary
            probs = torch.softmax(next_token_logits, dim=-1)
            
            # Calculate entropy of this distribution
            # We convert to numpy for easier calculation
            prob_dist = probs.cpu().numpy()[0]
            
            # Entropy calculation: -sum(p * log(p)) for all tokens
            # Higher entropy means more uncertainty (probability spread across many tokens)
            # Lower entropy means more certainty (probability concentrated on few tokens)
            token_entropy = entropy(prob_dist)
            token_entropies.append(token_entropy)
            
            # Also store the top 5 most probable tokens for inspection
            top_5_probs, top_5_indices = torch.topk(probs, 5)
            top_probs_per_position.append({
                'probs': top_5_probs.cpu().numpy()[0],
                'tokens': [tokenizer.decode([idx]) for idx in top_5_indices[0]]
            })
            
            # Sample the next token (you could also use greedy decoding with argmax)
            # Using multinomial sampling to respect the probability distribution
            next_token = torch.multinomial(probs, num_samples=1)
            
            generated_tokens.append(tokenizer.decode(next_token[0]))
            
            # Add the new token to our sequence for the next iteration
            input_ids = torch.cat([input_ids, next_token], dim=-1)
            
            # Stop if we generate an end-of-sequence token
            if next_token.item() == tokenizer.eos_token_id:
                break
    
    # Decode the full generated sequence
    generated_text = tokenizer.decode(input_ids[0], skip_special_tokens=True)
    
    return {
        'text': generated_text,
        'entropies': token_entropies,
        'tokens': generated_tokens,
        'top_probs': top_probs_per_position
    }

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


KeyboardInterrupt: 

In [None]:

# Example experiment: analyze entropy patterns across different prompts
prompts = [
    "The capital of France is",  # Very predictable completion
    "The meaning of life is",     # More open-ended, likely higher entropy
    "Once upon a time",           # Creative continuation, variable uncertainty
]

print("=" * 80)
print("ENTROPY ANALYSIS OF MODEL GENERATIONS")
print("=" * 80)

for prompt in prompts:
    print(f"\n\nPrompt: '{prompt}'")
    print("-" * 80)
    
    result = generate_with_entropy_tracking(prompt, model, tokenizer, max_new_tokens=20)
    
    print(f"\nGenerated text: {result['text']}")
    print(f"\nAverage entropy: {np.mean(result['entropies']):.3f}")
    print(f"Max entropy: {np.max(result['entropies']):.3f}")
    print(f"Min entropy: {np.min(result['entropies']):.3f}")
    
    print("\nToken-by-token breakdown:")
    for i, (token, ent, top) in enumerate(zip(result['tokens'], 
                                                result['entropies'], 
                                                result['top_probs'])):
        print(f"  Position {i}: '{token}' | Entropy: {ent:.3f}")
        print(f"    Top candidates: {top['tokens'][:3]} with probs {top['probs'][:3]}")

# You could also generate multiple responses to the same prompt
# to see if entropy patterns are consistent
print("\n\n" + "=" * 80)
print("COMPARING ENTROPY ACROSS MULTIPLE GENERATIONS")
print("=" * 80)

test_prompt = "The future of artificial intelligence"
num_generations = 5

all_entropies = []
for i in range(num_generations):
    result = generate_with_entropy_tracking(test_prompt, model, tokenizer, max_new_tokens=15)
    all_entropies.append(result['entropies'])
    print(f"\nGeneration {i+1}: {result['text']}")
    print(f"  Mean entropy: {np.mean(result['entropies']):.3f}")

# Compare entropy at each position across all generations
# This shows whether certain positions are consistently uncertain
print("\n\nPosition-wise entropy comparison:")
min_length = min(len(e) for e in all_entropies)
for pos in range(min_length):
    entropies_at_pos = [gen_entropies[pos] for gen_entropies in all_entropies]
    print(f"  Position {pos}: mean={np.mean(entropies_at_pos):.3f}, std={np.std(entropies_at_pos):.3f}")