# This notebook demonstrates how to evaluate simple text outputs using a custom keyword-based scorer with the judgeval SDK. We test if the answer contains "Real Madrid" and log the results to the Judgment dashboard using JudgmentClient.

In [12]:
from judgeval.scorers.answer_relevancy import AnswerRelevancyScorer

from judgeval import JudgmentClient
from judgeval.data import Example
from judgeval.scorers.base_scorer import BaseScorer
from typing import List
from pydantic import Field

# Step 1: Create a custom scorer
class SimpleKeywordScorer(BaseScorer):
    score_type: str = "Custom"
    required_params: List[str] = Field(default_factory=list)

    async def a_score_example(self, example: Example, *args, **kwargs) -> float:
        if "Real Madrid" in example.actual_output:
            return 1.0
        else:
            return 0.0

# Step 2: Initialize client and scorer
client = JudgmentClient(
    api_key= "ae1abbce-915a-4688-8ad8-1a7351345014",
    organization_id= "55082cb3-febb-4aa2-97e0-bf33f247ad6b"
)
scorer = SimpleKeywordScorer()

# Step 3: Define examples
examples = [
    Example(input="Which club signed Mbappe?", actual_output="Real Madrid signed him."),
    Example(input="Which club signed Mbappe?", actual_output="PSG signed him."),
]

# Step 4: Run the evaluation
results = client.run_evaluation(
    examples=examples,
    scorers=[scorer],
    project_name="real-madrid-keyword-scorer",
    eval_run_name="run_2"  # or any unique name
)



Evaluating 2 example(s) in parallel: |████████████████████████████████|100% (2/2) [Time Taken: 00:00, 1932.41Example/s]

Logging Results: -




                  