# Interpretable Failure Detection in LLM Reasoning


In [1]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import numpy as np

### 1. Setup the LLM import


In [3]:
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

Loading weights:   0%|          | 0/338 [00:00<?, ?it/s]

In [4]:
def get_response_with_uncertainty(prompt):
    """
    Generates a response and calculates the average entropy (uncertainty).
    """
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # Generate with output_scores=True to get logits
    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=200, 
            output_scores=True, 
            return_dict_in_generate=True, 
            temperature=0.7, 
            do_sample=True
        )

    # 2. EXTRACT LOGITS & CALCULATE ENTROPY
    # Stack scores: (num_generated_tokens, batch_size, vocab_size)
    logits = torch.stack(outputs.scores, dim=0).squeeze(1) 
    probs = torch.softmax(logits, dim=-1)
    
    # Entropy formula: - sum(p * log(p))
    entropy_per_token = -torch.sum(probs * torch.log(probs + 1e-9), dim=-1)
    
    # Average entropy for the whole response (Simple metric)
    avg_entropy = torch.mean(entropy_per_token).item()
    
    # Decode answer
    generated_ids = outputs.sequences[0][inputs.input_ids.shape[1]:]
    answer_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
    
    return answer_text, avg_entropy

In [5]:
question = "Solve for x: 3x + 10 = 25"
prompt = f"User: {question}\n\nAssistant: Let's think step by step."

In [6]:
# run 3 times through the Qwen model to check for consistency
for i in range(3):
    answer, entropy = get_response_with_uncertainty(prompt)
    print(f"{20*'*'}\nRun {i+1}:")
    print(f"\tAnswer: {answer}")
    print(f"\t==> Entropy: {entropy}\n")



********************
Run 1:
	Answer:  We have the equation:

\[ 3x + 10 = 25 \]

To solve for \( x \), we need to isolate it on one side of the equation. First, let's subtract 10 from both sides to get rid of the constant term on the left side:

\[ 3x + 10 - 10 = 25 - 10 \]
\[ 3x = 15 \]

Now that we've isolated the term with \( x \) (which is 3 times \( x \)), we can divide both sides by 3 to solve for \( x \):

\[ \frac{3x}{3} = \frac{15}{3} \]
\[ x = 5 \]

So, the solution is \( x = 5 \).
	==> Entropy: 0.09763886034488678

********************
Run 2:
	Answer:  We have the equation:

\[ 3x + 10 = 25 \]

First, we need to isolate \(x\). To do this, let's subtract 10 from both sides of the equation:

\[ 3x + 10 - 10 = 25 - 10 \]

This simplifies to:

\[ 3x = 15 \]

Now, to solve for \(x\), divide both sides by 3:

\[ \frac{3x}{3} = \frac{15}{3} \]

So,

\[ x = 5 \]
	==> Entropy: 0.09138200432062149

********************
Run 3:
	Answer:  
We have the equation 3x + 10 = 25.
First, we nee