# Tier 1 v2: Qwen 2.5 1.5B with Ultra-Structured Prompt

## Key Improvements from v1
1. ✅ Chat template for proper system/user messages
2. ✅ Ultra-clear instructions: "RESPOND ONLY WITH A NUMBER"
3. ✅ Few-shot examples showing exact format
4. ✅ Reduced max_tokens from 50 to 10
5. ✅ Lower temperature: 0.01 (from 0.1)
6. ✅ 3-stage robust parsing: direct float → regex → keyword fallback

## Expected Result
- LB Score: **0.91-0.92** (conservative)
- Medal Range: **0.92-0.925** (optimistic)

In [None]:
!pip install -q transformers peft accelerate

In [None]:
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from tqdm import tqdm
import re

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Device: {torch.cuda.get_device_name(0)}")

In [None]:
class CFG:
    BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
    LORA_ADAPTER = "/kaggle/input/mahmoudmohamed-lora-adapter/"
    TEST_DATA = "/kaggle/input/jigsaw-agile-community-rules/test.csv"
    OUTPUT = "submission.csv"
    
    MAX_LENGTH = 512
    TEMPERATURE = 0.01
    MAX_NEW_TOKENS = 10
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Config: {CFG.BASE_MODEL}")

In [None]:
def create_prompt_v2(tokenizer, row):
    """V2: Ultra-structured prompt"""
    system = "You are a precise AI. Respond ONLY with a decimal number between 0.0 and 1.0. NO other text."
    
    user = f"""Analyze if this post violates the rule.

RULE: {row['rule']}
POST: {row['body']}

POSITIVE EXAMPLES (violations):
1. {row['positive_example_1']}
2. {row['positive_example_2']}

NEGATIVE EXAMPLES (not violations):
1. {row['negative_example_1']}
2. {row['negative_example_2']}

Respond with ONLY a number between 0.0 and 1.0:
- 0.0 = NOT a violation
- 1.0 = IS a violation

Examples: 0.95, 0.23, 0.78, 0.02

Your answer (number only):"""
    
    messages = [
        {"role": "system", "content": system},
        {"role": "user", "content": user}
    ]
    
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [None]:
def predict_single_v2(model, tokenizer, prompt, row_id):
    """V2: 3-stage robust parsing"""
    inputs = tokenizer(prompt, return_tensors="pt", max_length=CFG.MAX_LENGTH, truncation=True).to(CFG.DEVICE)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=CFG.MAX_NEW_TOKENS,
            temperature=CFG.TEMPERATURE,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    
    generated_ids = outputs[0][len(inputs.input_ids[0]):]
    text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
    
    # Stage 1: Direct float
    try:
        prob = float(text)
        return max(0.0, min(1.0, prob))
    except:
        pass
    
    # Stage 2: Regex
    matches = re.findall(r'\b(0\.\d+|1\.0+)\b', text)
    if matches:
        try:
            prob = float(matches[0])
            return max(0.0, min(1.0, prob))
        except:
            pass
    
    # Stage 3: Keywords
    lower = text.lower()
    if any(w in lower for w in ['yes', 'violate', 'spam']):
        return 0.8
    if any(w in lower for w in ['no', 'not', 'fine']):
        return 0.2
    
    print(f"⚠️ Row {row_id}: '{text[:50]}' -> 0.5")
    return 0.5

In [None]:
print("=" * 80)
print("Tier 1 v2: Ultra-Structured Prompt")
print("=" * 80)

tokenizer = AutoTokenizer.from_pretrained(CFG.BASE_MODEL, trust_remote_code=True)
base_model = AutoModelForCausalLM.from_pretrained(
    CFG.BASE_MODEL, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True
)
model = PeftModel.from_pretrained(base_model, CFG.LORA_ADAPTER, torch_dtype=torch.float16)
model.eval()
print(f"✅ Model loaded on {CFG.DEVICE}")

In [None]:
test_df = pd.read_csv(CFG.TEST_DATA)
print(f"Test samples: {len(test_df)}")

In [None]:
print("Running inference...")
predictions = []
success = fail = 0

for idx, row in tqdm(test_df.iterrows(), total=len(test_df)):
    prompt = create_prompt_v2(tokenizer, row)
    prob = predict_single_v2(model, tokenizer, prompt, row['row_id'])
    predictions.append({'row_id': row['row_id'], 'rule_violation': prob})
    
    if prob not in [0.2, 0.5, 0.8]:
        success += 1
    else:
        fail += 1
    
    if idx < 3:
        print(f"Sample {idx+1}: {row['body'][:60]}... -> {prob:.4f}")

In [None]:
submission = pd.DataFrame(predictions)
submission.to_csv(CFG.OUTPUT, index=False)

print(f"\n{'='*80}")
print(f"✅ Saved: {CFG.OUTPUT}")
print(f"Total: {len(submission)}")
print(f"Parse success: {success}/{len(test_df)} ({100*success/len(test_df):.1f}%)")
print(f"\nStats:")
print(submission['rule_violation'].describe())
print(f"\nFirst 10:")
print(submission.head(10))