# Custom Reward Models

This notebook demonstrates how to build custom reward models with RM-Gallery.

## Import Required Modules

In [None]:
from rm_gallery.core.reward import BaseRewardModel
from rm_gallery.gallery.rm import RubricRM, EnsembleRM
from typing import List, Dict
import torch

## Method 1: Rubric-Based Reward Model

Create a reward model using custom evaluation rubrics:

In [None]:
# Define custom rubric
custom_rubric = {
    "helpfulness": {
        "description": "Does the response helpful answer the question?",
        "weight": 0.4
    },
    "accuracy": {
        "description": "Is the information factually correct?",
        "weight": 0.3
    },
    "clarity": {
        "description": "Is the response clear and well-structured?",
        "weight": 0.2
    },
    "safety": {
        "description": "Is the response safe and appropriate?",
        "weight": 0.1
    }
}

# Initialize rubric-based RM
rubric_rm = RubricRM(
    rubric=custom_rubric,
    llm_model="gpt-4",
    temperature=0.7
)

print("Rubric-based RM created successfully!")

## Method 2: Custom Python Class

Implement a completely custom reward model:

In [None]:
class LengthBasedRM(BaseRewardModel):
    """Reward model that scores based on response length."""
    
    def __init__(self, optimal_length: int = 200):
        super().__init__()
        self.optimal_length = optimal_length
    
    def score(self, prompt: str, response: str) -> float:
        """Score based on how close the response is to optimal length."""
        length = len(response.split())
        deviation = abs(length - self.optimal_length)
        score = max(0, 1 - (deviation / self.optimal_length))
        return score
    
    def batch_score(self, prompts: List[str], responses: List[str]) -> List[float]:
        """Batch scoring."""
        return [self.score(p, r) for p, r in zip(prompts, responses)]

# Test the custom RM
length_rm = LengthBasedRM(optimal_length=50)
score = length_rm.score(
    prompt="Tell me about AI",
    response="Artificial Intelligence is a fascinating field." * 10
)
print(f"Length-based score: {score:.4f}")

## Method 3: Ensemble Reward Model

Combine multiple reward models for robust scoring:

In [None]:
from rm_gallery.gallery.rm import RewardModel

# Create ensemble of multiple RMs
ensemble_rm = EnsembleRM(
    models=[
        RewardModel("Skywork/Skywork-Reward-Llama-3.1-8B"),
        RewardModel("Ray2333/GRM-Llama3-8B-rewardmodel-ft"),
        LengthBasedRM(optimal_length=100)
    ],
    weights=[0.4, 0.4, 0.2],  # Weight each model's contribution
    aggregation="weighted_mean"  # or "max", "min", "median"
)

# Test ensemble
ensemble_score = ensemble_rm.score(
    prompt="Explain machine learning",
    response="Machine learning is a subset of AI that enables systems to learn from data."
)
print(f"Ensemble score: {ensemble_score:.4f}")

## Method 4: Fine-tuned Reward Model

Load and use a fine-tuned reward model:

In [None]:
# Load fine-tuned model from local path
finetuned_rm = RewardModel(
    model_name="./my_finetuned_rm",  # Local path
    device="cuda",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

# Or from Hugging Face Hub
# finetuned_rm = RewardModel(
#     model_name="your-username/your-rm-model",
#     device="cuda"
# )

print("Fine-tuned RM loaded successfully!")

## Auto-Generate Rubrics

Automatically generate rubrics from example data:

In [None]:
from rm_gallery.gallery.rm import AutoRubric

# Provide examples of good and bad responses
examples = [
    {
        "prompt": "Explain photosynthesis",
        "good": "Photosynthesis is the process by which plants...",
        "bad": "Plants make food."
    },
    {
        "prompt": "What is gravity?",
        "good": "Gravity is a fundamental force...",
        "bad": "Things fall down."
    }
]

# Auto-generate rubric
auto_rubric = AutoRubric.from_examples(
    examples=examples,
    num_criteria=5,
    llm_model="gpt-4"
)

print("\nAuto-generated Rubric:")
print(auto_rubric)

## Compare Custom RMs

Evaluate and compare different custom reward models:

In [None]:
# Test data
test_cases = [
    {
        "prompt": "Write a short story",
        "response": "Once upon a time, in a land far away..."
    },
    {
        "prompt": "Explain quantum mechanics",
        "response": "Quantum mechanics is a fundamental theory in physics..."
    }
]

# Compare models
models = {
    "Rubric-based": rubric_rm,
    "Length-based": length_rm,
    "Ensemble": ensemble_rm
}

for test in test_cases:
    print(f"\nPrompt: {test['prompt']}")
    print("-" * 50)
    for name, model in models.items():
        score = model.score(test["prompt"], test["response"])
        print(f"{name:20s}: {score:.4f}")

## Save and Load Custom Models

Persist your custom reward models:

In [None]:
# Save model
rubric_rm.save("my_custom_rubric_rm")

# Load model
loaded_rm = RubricRM.load("my_custom_rubric_rm")

# Verify it works
score = loaded_rm.score(
    prompt="Test prompt",
    response="Test response"
)
print(f"Loaded model score: {score:.4f}")