# Interactive Model Testing

Use this notebook to test your trained `Qwen2.5-0.5B-Glyph` model. 
We will load the model (and LoRA adapter if applicable) and generate responses to see if the **Glyphs** emerge.

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# CONFIG
BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
CHECKPOINT_PATH = "checkpoints/qwen2.5-0.5b-glyph-sft"
USE_LORA = False # Set to True if you trained with LoRA, False if Full SFT

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

In [None]:
# Load Tokenizer & Model
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)

print("Loading Base Model...")
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

if USE_LORA:
    print(f"Loading LoRA Adapter from {CHECKPOINT_PATH}...")
    model = PeftModel.from_pretrained(model, CHECKPOINT_PATH)
else:
    print(f"Loading Full Finetuned Weights (if merged) or attempting to load from {CHECKPOINT_PATH}...")
    # If Checkpoint is a full model save:
    try:
        model = AutoModelForCausalLM.from_pretrained(
            CHECKPOINT_PATH,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )
        print("Loaded finetuned model successfully.")
    except Exception as e:
        print(f"Could not load full model from checkpoint (maybe it is LoRA?): {e}")
        print("Falling back to Base Model (Result will be Untrained behavior) or check path.")

model.eval()
print("Model Ready!")

In [None]:
def generate(prompt, max_new_tokens=512):
    messages = [
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    with torch.no_grad():
        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False, # Deterministic for evaluation
            temperature=None,
            top_p=None,
            pad_token_id=tokenizer.eos_token_id
        )

    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

### Test Cases
Try a **Latent Prompt** (Neutral) on a **Hard** problem. We expect to see glyphs (`ðŸœž`, `ðŸœ†`, etc.) emerge autonomously.

In [None]:
latent_prompt = """
Solve the following problem carefully.
Do not mention any tags, symbols, or special formatting.

Problem:
Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?
"""

response = generate(latent_prompt)
print("Response:\n" + "-"*20)
print(response)

In [None]:
# Try your own prompt here
my_prompt = """
Solve the following problem carefully.
Do not mention any tags, symbols, or special formatting.

Problem:
There are 5 houses on a street. Each house has 3 windows. If 4 windows are broken in total, how many unbroken windows are there?
"""

print(generate(my_prompt))