In [1]:
print("hello")

hello


In [2]:
"""
STEP 1: Test if your LoRA adapter works
Run this to verify your model generates reasonable recommendations
Runtime: ~2 minutes
"""

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import json

print("="*80)
print("TESTING YOUR TRAINED MODEL")
print("="*80)

# ============================================================================
# STEP 1: Load Base Model + Your LoRA Adapter
# ============================================================================
print("\n1. Loading base Llama 3.2 model...")

base_model_name = "meta-llama/Llama-3.2-3B"  # Or wherever your base model is
adapter_path = "./llama3.2-lora-final"

try:
    # Load base model
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )
    print("   ✓ Base model loaded")
    
    # Load your LoRA adapter
    model = PeftModel.from_pretrained(base_model, adapter_path)
    print("   ✓ LoRA adapter loaded")
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(adapter_path)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    print("   ✓ Tokenizer loaded")
    
except Exception as e:
    print(f"   ✗ Error loading model: {e}")
    print("\n⚠️  If base model not found, you need to download it first:")
    print("   huggingface-cli download meta-llama/Llama-3.2-3B")
    exit(1)

# ============================================================================
# STEP 2: Peek at Your Training Data Format
# ============================================================================
print("\n2. Analyzing your training data format...")

with open('movielens_train.jsonl', 'r') as f:
    first_sample = json.loads(f.readline())

print("   Training data structure:")
for key, value in first_sample.items():
    value_str = str(value)[:100]
    print(f"     • {key}: {value_str}...")

# Determine the prompt format
if 'text' in first_sample:
    prompt_field = 'text'
elif 'prompt' in first_sample:
    prompt_field = 'prompt'
else:
    prompt_field = list(first_sample.keys())[0]
    
print(f"\n   Using field: '{prompt_field}' for testing")

# ============================================================================
# STEP 3: Test Generation
# ============================================================================
print("\n3. Testing model generation...")

# Create a test prompt in the same format as your training data
test_prompts = [
    "User has watched: Toy Story (1995), Jurassic Park (1993). Recommend the next movie.",
    "User enjoys Action and Sci-Fi movies. What should they watch next?",
    "Based on viewing history of The Matrix, recommend similar films."
]

for i, test_prompt in enumerate(test_prompts, 1):
    print(f"\n{'='*80}")
    print(f"TEST {i}/3")
    print(f"{'='*80}")
    print(f"PROMPT: {test_prompt}")
    print("-"*80)
    
    # Format prompt (adjust this based on your training format)
    # Common formats:
    # - Llama 3 format: <|begin_of_text|><|start_header_id|>user<|end_header_id|>...
    # - Alpaca format: ### Instruction: ... ### Response:
    
    # Try Llama 3 format first
    formatted_prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{test_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
    
    # Generate
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            repetition_penalty=1.1
        )
    
    # Decode
    full_output = tokenizer.decode(outputs[0], skip_special_tokens=False)
    
    # Extract just the assistant's response
    if "assistant" in full_output:
        response = full_output.split("assistant")[-1].strip()
        response = response.replace("<|eot_id|>", "").replace("<|end_of_text|>", "").strip()
    else:
        response = full_output
    
    print(f"RESPONSE:\n{response}")
    print()

# ============================================================================
# STEP 4: Model Info
# ============================================================================
print("\n" + "="*80)
print("MODEL INFORMATION")
print("="*80)

# Count parameters
base_params = sum(p.numel() for p in base_model.parameters())
lora_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Base model parameters: {base_params:,}")
print(f"LoRA trainable parameters: {lora_params:,}")
print(f"Percentage trainable: {100 * lora_params / base_params:.2f}%")

# Memory usage
if torch.cuda.is_available():
    memory_used = torch.cuda.memory_allocated() / 1024**3
    print(f"GPU memory used: {memory_used:.2f} GB")

# ============================================================================
# STEP 5: Quick Quality Check
# ============================================================================
print("\n" + "="*80)
print("QUALITY CHECK")
print("="*80)

print("\n✅ Check these criteria:")
print("  [ ] Does the model generate movie recommendations?")
print("  [ ] Are recommendations relevant to the prompt?")
print("  [ ] Does it provide reasoning (not just list titles)?")
print("  [ ] Is the output coherent (not gibberish)?")

print("\n⚠️  If output is nonsense:")
print("  → Check wandb logs to see if training loss decreased")
print("  → Your model might need more training epochs")

print("\n✓ If output looks good:")
print("  → Proceed to creating test set and quantization")
print("  → Your thesis is on track!")

print("\n" + "="*80)
print("TEST COMPLETE")
print("="*80)

TESTING YOUR TRAINED MODEL

1. Loading base Llama 3.2 model...


config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

   ✓ Base model loaded
   ✓ LoRA adapter loaded


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


   ✓ Tokenizer loaded

2. Analyzing your training data format...
   Training data structure:
     • instruction: Acting as a personalized movie recommender, analyze the user's watch history and suggest the most lo...
     • input: User history: Bug's Life, A (1998), Antz (1998), Hunchback of Notre Dame, The (1996), Hercules (1997...
     • output: The user should watch Pocahontas (1995). Reasoning: This movie aligns with their interest in Animati...

   Using field: 'instruction' for testing

3. Testing model generation...

TEST 1/3
PROMPT: User has watched: Toy Story (1995), Jurassic Park (1993). Recommend the next movie.
--------------------------------------------------------------------------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


RESPONSE:
<|end_header_id|>

User has watched: 101 Dalmatians (1996). Recommend the next movie.-systemusersystemusersystemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser systemuser system


TEST 2/3
PROMPT: User enjoys Action and Sci-Fi movies. What should they watch next?
--------------------------------------------------------------------------------


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


RESPONSE:
<|end_header_id|>

user/useruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruseruser


TEST 3/3
PROMPT: Based on viewing history of The Matrix, recommend similar films.
--------------------------------------------------------------------------------
RESPONSE:
<|end_header_id|>

• 0min - 1999
• 98min - 1997
• 100min - 2000
• 90min - 1995
• 88min - 2001

Fur: An Imaginary Portrait Of Diane Arbus (2006)

In the public library in New York City, an elderl