In [3]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import random
import json

print("Loading dataset...")
dataset = load_dataset("nareshmlx/16k_opencvpr")

if 'train' in dataset and len(dataset) == 1:
    split_dataset = dataset['train'].train_test_split(test_size=0.15, seed=42)
else:
    split_dataset = dataset

print(f"Train examples: {len(split_dataset['train'])}")
print(f"Test examples: {len(split_dataset['test'])}")

print("\nLoading model and tokenizer...")
model = AutoModelForCausalLM.from_pretrained(
    "nareshmlx/code-reviewer-opencv-16k",
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(
    "nareshmlx/code-reviewer-opencv-16k",
    trust_remote_code=True
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model.eval()

# Enable fast inference (if using unsloth)
try:
    from unsloth import FastLanguageModel
    FastLanguageModel.for_inference(model)
    print("‚úì Fast inference enabled (Unsloth)")
except:
    print("‚úì Standard inference mode")

# Select test samples
random.seed(42)
sample_indices = random.sample(range(len(split_dataset['test'])), 5)

print("\n" + "="*100)
print("CODE REVIEW MODEL TESTING - PROPER INFERENCE")
print("="*100)

for i, idx in enumerate(sample_indices, 1):
    sample = split_dataset['test'][idx]
    
    # Get the structured data
    instruction = sample.get('instruction', '')
    input_data = sample.get('input', '')
    expected_output = sample.get('output', '')
    
    # Parse the JSON input to extract relevant fields
    try:
        input_dict = json.loads(input_data)
        pr_title = input_dict.get('pr_title', 'N/A')
        file_path = input_dict.get('file_path', 'N/A')
        language = input_dict.get('language', 'N/A')
        
        # Get the most relevant diff section
        target_hunk = input_dict.get('target_hunk', '')
        if not target_hunk:
            full_diff = input_dict.get('full_diff', '')
            # Take first 800 characters of diff for context
            target_hunk = full_diff[:800] if full_diff else ''
        
    except:
        print(f"‚ö†Ô∏è  Sample {i}: Could not parse input JSON, skipping...")
        continue
    
    print(f"\n{'='*100}")
    print(f"SAMPLE {i} (Index: {idx})")
    print(f"{'='*100}")
    
    print(f"\nüìã PR Info:")
    print(f"  Title: {pr_title}")
    print(f"  File: {file_path}")
    print(f"  Language: {language}")
    
    print(f"\nüìù Code Change:")
    print(f"{'-'*80}")
    print(target_hunk[:400] + "..." if len(target_hunk) > 400 else target_hunk)
    
    print(f"\n‚úÖ EXPECTED REVIEW COMMENT:")
    print(f"{'-'*80}")
    print(expected_output)
    
    # ============================================
    # THIS IS THE KEY: Format as proper chat message
    # ============================================
    
    # Create the review request message
    user_message = f"""Review this OpenCV code change:

File: {file_path}
Language: {language}

{target_hunk}

Provide a brief, specific code review comment."""
    
    # Format as chat conversation (THIS IS WHAT WORKED IN YOUR NOTEBOOK)
    messages = [
        {"role": "user", "content": user_message}
    ]
    
    # Apply chat template with generation prompt (CRITICAL!)
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,  # ‚Üê This tells model to generate response
        return_tensors="pt",
    ).to(model.device)
    
    print(f"\nüîÑ Generating review comment...")
    
    # Generate with parameters that worked in your notebook
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=200,      # Reasonable length for review
            temperature=0.7,         # Balanced creativity
            top_p=0.9,              # Top-p sampling
            do_sample=True,
            repetition_penalty=1.1,
            use_cache=True,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    
    # Decode the full output
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract just the assistant's response
    # The output will include the user message, so we need to extract the response
    if "assistant" in generated_text.lower():
        # Split on common response markers
        parts = generated_text.split("assistant")
        if len(parts) > 1:
            model_output = parts[-1].strip()
            # Clean up any remaining markers
            model_output = model_output.replace("<|im_start|>", "").replace("<|im_end|>", "").strip()
    else:
        # Fallback: try to extract text after the user message
        user_msg_end = generated_text.rfind(user_message)
        if user_msg_end != -1:
            model_output = generated_text[user_msg_end + len(user_message):].strip()
        else:
            model_output = generated_text
    
    # Clean up output
    model_output = model_output.strip()
    
    print(f"\nü§ñ MODEL REVIEW COMMENT:")
    print(f"{'-'*80}")
    print(model_output)
    
    # Simple quality assessment
    print(f"\nüìä ANALYSIS:")
    print(f"  Expected length: {len(expected_output)} chars")
    print(f"  Generated length: {len(model_output)} chars")
    
    # Check if output looks reasonable
    is_reasonable = (
        len(model_output) > 10 and
        len(model_output) < 1000 and
        not model_output.startswith('{') and
        model_output != input_data[:100]
    )
    
    if is_reasonable:
        print(f"  Quality: ‚úÖ Output looks reasonable")
    else:
        print(f"  Quality: ‚ö†Ô∏è  Output may have issues")
    
    print(f"\n{'='*100}\n")

print("\n‚ú® Testing complete!")
print("\n" + "="*100)
print("üí° USAGE TIPS:")
print("="*100)
print("""
For best results when using this model:

1. Format input as a chat message with 'user' role
2. Use tokenizer.apply_chat_template() with add_generation_prompt=True
3. Generate with:
   - temperature: 0.7 (balanced)
   - top_p: 0.9 (diverse but focused)
   - max_new_tokens: 150-200 (reasonable review length)

Example:
```python
messages = [{"role": "user", "content": "Review this code: ..."}]
inputs = tokenizer.apply_chat_template(
    messages, 
    tokenize=True, 
    add_generation_prompt=True,
    return_tensors="pt"
).to("cuda")

outputs = model.generate(inputs, max_new_tokens=200, temperature=0.7, top_p=0.9)
```
""")

Loading dataset...


Train examples: 8739
Test examples: 1543

Loading model and tokenizer...



Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!
‚úì Fast inference enabled (Unsloth)

CODE REVIEW MODEL TESTING - PROPER INFERENCE

SAMPLE 1 (Index: 1309)

üìã PR Info:
  Title: Image Segmentation .cpp tutorial
  File: doc/tutorials/imgproc/imgtrans/distance_transformation/distance_transform.markdown
  Language: text

üìù Code Change:
--------------------------------------------------------------------------------
@@ -0,0 +1,165 @@
+Image Segmentation with Distance Transform and Watershed Algorithm {#tutorial_distance_transform}
+
+Goal
+----
+
+In this tutorial you will learn how to:
+
+-   Use the OpenCV function @ref cv::filter2D in order to perform some laplacian filtering for image sharpening
+-   Use the OpenCV function @ref cv::distanceTransform in order to obtain the derived represent...

‚úÖ EXPECTED REVIEW COMMENT:
-----------------------------------------------------------------