# Advanced Metrics and Customization

Please read the full article at [thedataguy.pro](https://thedataguy.pro/blog/generating-test-data-with-ragas/).


In [1]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
from dataclasses import dataclass, field
from typing import Dict, Optional, Set
import typing as t

from ragas.metrics.base import MetricWithLLM, SingleTurnMetric
from ragas.prompt import PydanticPrompt
from ragas.metrics import MetricType, MetricOutputType
from pydantic import BaseModel

# Define input/output models for the prompt
class TechnicalAccuracyInput(BaseModel):
    question: str
    context: str
    response: str
    programming_language: str = "python"

class TechnicalAccuracyOutput(BaseModel):
    score: float
    feedback: str


# Define the prompt
class TechnicalAccuracyPrompt(PydanticPrompt[TechnicalAccuracyInput, TechnicalAccuracyOutput]):
    instruction: str = (
        "Evaluate the technical accuracy of the response to a programming question. "
        "Consider syntax correctness, algorithmic accuracy, and best practices."
    )
    input_model = TechnicalAccuracyInput
    output_model = TechnicalAccuracyOutput
    examples = [
        # Add examples here
    ]

# Create the metric
@dataclass
class TechnicalAccuracy(MetricWithLLM, SingleTurnMetric):
    name: str = "technical_accuracy"
    _required_columns: Dict[MetricType, Set[str]] = field(
        default_factory=lambda: {
            MetricType.SINGLE_TURN: {
                "user_input",
                "response",
                
            }
        }
    )
    output_type: Optional[MetricOutputType] = MetricOutputType.CONTINUOUS
    evaluation_prompt: PydanticPrompt = field(default_factory=TechnicalAccuracyPrompt)
    
    async def _single_turn_ascore(self, sample, callbacks) -> float:
        assert self.llm is not None, "LLM must be set"
        
        question = sample.user_input
        response = sample.response
        # Extract programming language from question if possible
        programming_language = "python"  # Default
        languages = ["python", "javascript", "java", "c++", "rust", "go"]
        for lang in languages:
            if lang in question.lower():
                programming_language = lang
                break
        
        # Get the context
        context = "\n".join(sample.retrieved_contexts) if sample.retrieved_contexts else ""
        
        # Prepare input for prompt
        prompt_input = TechnicalAccuracyInput(
            question=question,
            context=context,
            response=response,
            programming_language=programming_language
        )
        
        # Generate evaluation
        evaluation = await self.evaluation_prompt.generate(
            data=prompt_input, llm=self.llm, callbacks=callbacks
        )
        
        return evaluation.score

In [18]:
from langchain_openai import ChatOpenAI
from ragas import SingleTurnSample
from ragas.llms import LangchainLLMWrapper

# Initialize the LLM, you are going to OPENAI API key
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o")) 

test_data = {
    "user_input": "Write a function to calculate the factorial of a number in Python.",
    "retrieved_contexts": ["Python is a programming language.", "A factorial of a number n is the product of all positive integers less than or equal to n."],
    "response": "def factorial(n):\n    if n == 0:\n        return 1\n    else:\n        return n * factorial(n-1)",
}

# Create a sample
sample = SingleTurnSample(**test_data)  # Unpack the dictionary into the constructor
technical_accuracy = TechnicalAccuracy(llm=evaluator_llm)
score = await technical_accuracy.single_turn_ascore(sample)
print(f"Technical Accuracy Score: {score}")
# Note: The above code is a simplified example. In a real-world scenario, you would need to handle exceptions,

Technical Accuracy Score: 9.0
