パフォーマンスと品質評価ツールの使用

In [1]:
import os
from azure.ai.evaluation import GroundednessProEvaluator, GroundednessEvaluator, RetrievalEvaluator, RelevanceEvaluator
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
#credential = DefaultAzureCredential()

# For Groundedness Evaluator
model_config = {
    "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
    "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
    "azure_deployment": os.environ.get("AZURE_OPENAI_CHAT_MODEL"),
    "api_version": os.environ.get("AZURE_OPENAI_API_VERSION"),
}

# For Groundedness Pro Evaluator
azure_ai_project = {
    "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
    "resource_group_name": os.environ.get("AZURE_RESOURCE_GROUP"),
    "project_name": os.environ.get("AZURE_PROJECT_NAME"),
}
tenant_id = os.environ.get("TENANT_ID")
credential = InteractiveBrowserCredential(tenant_id=tenant_id) 


In [2]:
with open("./answer_sample.md", "r", encoding = 'utf-8') as f:
    answer = f.read()
with open("./context_sample.txt", "r", encoding = 'utf-8') as f:
    context = f.read()
query = "パーキングブレーキについて教えてください。"

query_response = dict(
    query=query,
    context=context,
    response=answer
)

### GroundednessEvaluator

In [3]:
# Initialzing Groundedness evaluator
groundedness_eval = GroundednessEvaluator(model_config)

# Running Groundedness Evaluator on a query and response pair
groundedness_score = groundedness_eval(
    **query_response
)
print(groundedness_score)


{'groundedness': 5.0, 'gpt_groundedness': 5.0, 'groundedness_reason': 'The RESPONSE is fully grounded in the CONTEXT, providing a thorough and accurate answer to the QUERY with all relevant details included.'}


### RetrievalEvaluator

In [4]:
retrieval_eval = RetrievalEvaluator(model_config)
query_response = dict(query=query, context=context)

relevance_score = retrieval_eval(**query_response)
print(relevance_score)
relevance_score["retrieval"]

{'retrieval': 5.0, 'gpt_retrieval': 5.0, 'retrieval_reason': 'The context fully addresses the query with highly relevant information about the parking brake, and the most pertinent chunks are well-ranked at the top. This aligns with the definition of a Score of 5.'}


5.0

### RelevanceEvaluator

In [5]:
relevance_eval = RelevanceEvaluator(model_config)
query_response = dict(query=query, response=context)

relevance_score = relevance_eval(**query_response)
print(relevance_score)



### OPTIONAL

### Groundedness Pro evaluator

In [None]:
# Initialzing Groundedness Pro evaluator
# Supported regions are ueaastus2 and sweedencentral
groundedness_pro_eval = GroundednessProEvaluator(azure_ai_project=azure_ai_project, credential=credential)

groundedness_pro_score = groundedness_pro_eval(
    **query_response
)
print(groundedness_pro_score)