# Custom pairwise evaluator

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [1]:
from typing import Any, Optional

from langchain.evaluation import PairwiseStringEvaluator


class LengthComparisonPairwiseEvaluator(PairwiseStringEvaluator):
    """
    Custom evaluator to compare two strings.
    """

    def _evaluate_string_pairs(
        self,
        *,
        prediction: str,
        prediction_b: str,
        reference: Optional[str] = None,
        input: Optional[str] = None,
        **kwargs: Any,
    ) -> dict:
        score = int(len(prediction.split()) > len(prediction_b.split()))
        return {"score": score}

In [3]:
evaluator = LengthComparisonPairwiseEvaluator()

evaluator.evaluate_string_pairs(
    prediction="The quick brown fox jumped over the lazy dog dog.",
    prediction_b="The quick brown fox jumped over the dog.",
)

{'score': 1}

### LLM-Based Example

In [4]:
from typing import Any, Optional

from langchain.chains import LLMChain
from langchain.evaluation import PairwiseStringEvaluator
from langchain_community.chat_models import ChatAnthropic
from langchain_openai import ChatOpenAI


class CustomPreferenceEvaluator(PairwiseStringEvaluator):
    """
    Custom evaluator to compare two strings using a custom LLMChain.
    """

    def __init__(self) -> None:
        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
        self.eval_chain = LLMChain.from_string(
            """Which option is preferred? Do not take order into account. Evaluate based on accuracy and helpfulness. If neither is preferred, respond with C. Provide your reasoning, then finish with Preference: A/B/C

Input: How do I get the path of the parent directory in python 3.8?
Option A: You can use the following code:
```python
import os

os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

SyntaxError: incomplete input (3713648768.py, line 18)

In [5]:
from pathlib import Path
Path(__file__).absolute().parent

NameError: name '__file__' is not defined

In [6]:
@property
def requires_input(self) -> bool:
    return True

@property
def requires_reference(self) -> bool:
    return False

def _evaluate_string_pairs(
    self,
    *,
    prediction: str,
    prediction_b: str,
    reference: Optional[str] = None,
    input: Optional[str] = None,
    **kwargs: Any,
) -> dict:
    result = self.eval_chain(
        {
            "input": input,
            "prediction": prediction,
            "prediction_b": prediction_b,
            "stop": ["Which option is preferred?"],
        },
        **kwargs,
    )

    response_text = result["text"]
    reasoning, preference = response_text.split("Preference:", maxsplit=1)
    preference = preference.strip()
    score = 1.0 if preference == "A" else (0.0 if preference == "B" else None)
    return {"reasoning": reasoning.strip(), "value": preference, "score": score}