In [3]:
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

In [4]:
# --- 1. CONFIGURATION ---
base_model_id = "HuggingFaceTB/smollm2-135M-SFT-Only"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Define your models to test
models_to_test = {
    "Base_SFT": None,  # None means load base model without adapter
    "DPO": "../../models/smollm2-dpo-final",
    "PPO-Sparse": "../../models/smollm2-ppo-sparse-final",
    "PPO-Dense": "../../models/smollm2-ppo-dense-final",
    "GRPO": "../../models/smollm2-grpo-final" 
}

In [5]:
constraint_scenarios = [
    {
        "prompt": "Explain the theory of relativity in 50 words or less.",
        "limit": 50
    },
    {
        "prompt": "Summarize the plot of Romeo and Juliet in exactly 20 words or less.",
        "limit": 20
    },
    {
        "prompt": "Describe the taste of a lemon in 10 words or less.",
        "limit": 10
    },
    {
        "prompt": "Write a very short story about a lost key. Do not exceed 40 words.",
        "limit": 40
    },
    {
        "prompt": "Explain why the sky is blue in under 30 words.",
        "limit": 30
    }
]

In [6]:
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
tokenizer.pad_token = tokenizer.eos_token

def count_words(text):
    """Simple whitespace-based word count"""
    return len(text.split())

# --- 4. EVALUATION LOOP ---
results = []

for model_name, adapter_path in models_to_test.items():
    print(f"\n--- Testing Model: {model_name} ---")
    
    # Load Model
    try:
        policy = AutoModelForCausalLM.from_pretrained(
            base_model_id, torch_dtype=torch.float16, device_map=device
        )
        if adapter_path:
            policy = PeftModel.from_pretrained(policy, adapter_path)
        policy.eval()
    except Exception as e:
        print(f"Skipping {model_name}: {e}")
        continue

    # Run Scenarios
    for scenario in constraint_scenarios:
        prompt = scenario["prompt"]
        limit = scenario["limit"]
        
        # Format
        messages = [{"role": "user", "content": prompt}]
        inputs = tokenizer.apply_chat_template(
            messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
        ).to(device)
        
        input_len = inputs.shape[1]
        
        # Generate
        with torch.no_grad():
            outputs = policy.generate(
                inputs,
                max_new_tokens=100, # Give it room to fail (overshoot)
                do_sample=True,
                temperature=0.7,
                pad_token_id=tokenizer.eos_token_id
            )
        
        # Decode response only
        response_text = tokenizer.decode(outputs[0][input_len:], skip_special_tokens=True)
        word_count = count_words(response_text)
        
        # Metrics
        is_compliant = word_count <= limit
        deviation = max(0, word_count - limit) # How many words OVER the limit?
        
        results.append({
            "Model": model_name,
            "Prompt_Limit": limit,
            "Word_Count": word_count,
            "Compliant": is_compliant,
            "Deviation": deviation,
            "Response": response_text
        })
        
    del policy
    torch.cuda.empty_cache()

# --- 5. SAVE & ANALYZE ---
df = pd.DataFrame(results)
df.to_csv("constraint_compliance_results.csv", index=False)

print("\n=== VERBOSITY COMPLIANCE REPORT ===")
# Group by Model to see aggregate stats
summary = df.groupby("Model").agg(
    Compliance_Rate=('Compliant', 'mean'),
    Avg_Deviation=('Deviation', 'mean'),
    Avg_Word_Count=('Word_Count', 'mean')
).sort_values("Compliance_Rate", ascending=False)

print(summary)

print("\n--- Detailed Failure Cases (Deviation > 0) ---")
failures = df[df["Deviation"] > 0].sort_values("Deviation", ascending=False)
if not failures.empty:
    for _, row in failures.iterrows():
        print(f"[{row['Model']}] Limit: {row['Prompt_Limit']} | Actual: {row['Word_Count']} (+{row['Deviation']})")
        print(f"  -> {row['Response']}")
else:
    print("No failures! All models compliant.")


--- Testing Model: Base_SFT ---


`torch_dtype` is deprecated! Use `dtype` instead!
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



--- Testing Model: DPO ---

--- Testing Model: PPO-Sparse ---

--- Testing Model: PPO-Dense ---

--- Testing Model: GRPO ---

=== VERBOSITY COMPLIANCE REPORT ===
            Compliance_Rate  Avg_Deviation  Avg_Word_Count
Model                                                     
PPO-Sparse              0.4           19.6            48.8
DPO                     0.2           24.2            51.0
Base_SFT                0.0           25.6            55.6
GRPO                    0.0           31.4            61.4
PPO-Dense               0.0           32.0            62.0

--- Detailed Failure Cases (Deviation > 0) ---
[DPO] Limit: 20 | Actual: 82 (+62)
  -> In Act 1 Scene 1 of Romeo and Juliet, the play begins with a fight between the families of two young lovers, Montague's and Capulet's, as they go their separate ways in the night. As they flee the city, they encounter the servant, Juliet, who arrives with a mysterious message telling her to meet Juliet on the balcony of her parents' e