# G-Eval

In [None]:
from deepeval.metrics import GEval
from deepeval.test_case import LLMTestCaseParams, LLMTestCase
from deepeval.models.base_model import DeepEvalBaseLLM

import os, requests

In [None]:
with open("example_documents/nhs_history.txt", "r") as file:
    data = file.read()
with open("example_documents/summary_1.txt", "r") as file:
    summary_1 = file.read()
with open("example_documents/summary_2.txt", "r") as file:
    summary_2 = file.read()

In [None]:
class AzureOpenAI(DeepEvalBaseLLM):

    def __init__(self, model, endpoint_url, api_key):
        self.endpoint_url = endpoint_url
        self.api_key = api_key

    def get_model_name(self):
        return "Azure OpenAI Model"
    
    def load_model(self):
        return self.model

    def generate(self, prompt: str) -> str:

        headers = {
            "Content-Type": "application/json",
            "api-key": self.api_key,
        }

        payload = {
        "messages": [
            {
            "role": "system",
            "content": [
                {
                "type": "text",
                "text": "You are a helpful AI assistant"
                }
            ]
            },
            {
            "role": "user",
            "content": prompt
            }
        ],
        "temperature": 0.4,
        "top_p": 0.95,
        "max_tokens": 800
        }

        try:
            response = requests.post(self.endpoint_url, headers=headers, json=payload)
            response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
        except requests.RequestException as e:
            raise SystemExit(f"Failed to make the request. Error: {e}")

        json_response = response.json()
        
        return json_response["choices"][0]["message"]["content"]
    
    async def a_generate(self, prompt: str) -> str:

        headers = {
            "Content-Type": "application/json",
            "api-key": self.api_key,
        }

        payload = {
        "messages": [
            {
            "role": "system",
            "content": [
                {
                "type": "text",
                "text": "You are a helpful AI assistant"
                }
            ]
            },
            {
            "role": "user",
            "content": prompt
            }
        ],
        "temperature": 0.4,
        "top_p": 0.95,
        "max_tokens": 800
        }

        try:
            response = requests.post(self.endpoint_url, headers=headers, json=payload)
            await response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
        except requests.RequestException as e:
            raise SystemExit(f"Failed to make the request. Error: {e}")

        json_response = response.json()
        
        return json_response["choices"][0]["message"]["content"]

In [None]:
ENDPOINT = os.getenv("ENDPOINT_URL")
API_KEY = os.getenv("AZURE_OPENAI_API_KEY")

model = AzureOpenAI(model = "model", endpoint_url=ENDPOINT, api_key=API_KEY)

In [None]:
model.generate("Tell me a fact")

In [None]:
groundedness_metric = GEval(
    name="Groundedness",
    criteria="Determine whether each sentence in the actual output is grounded based on the context. For the actual output to be grounded, each sentence must have clear support within the context.",
    evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.CONTEXT],
    model=AzureOpenAI(model = "model", endpoint_url=ENDPOINT, api_key=API_KEY)
)

In [None]:
test_case = LLMTestCase(
    input = "Test Prompt",
    actual_output = summary_1,
    context = [data]
)

In [None]:
print(groundedness_metric.measure(test_case))