In [11]:
!pip install azure-ai-projects azure-identity azure-ai-evaluation azure-ai-inference

Collecting azure-ai-inference
  Downloading azure_ai_inference-1.0.0b9-py3-none-any.whl.metadata (34 kB)
Downloading azure_ai_inference-1.0.0b9-py3-none-any.whl (124 kB)
Installing collected packages: azure-ai-inference
Successfully installed azure-ai-inference-1.0.0b9


以下の環境変数をセット。

- PROJECT_CONNECTION_STRING - The project connection string, as found in the overview page of your Azure AI Foundry project.
- MODEL_DEPLOYMENT_NAME - The deployment name of the model for AI-assisted evaluators, as found under the "Name" column in the "Models + endpoints" tab in your Azure AI Foundry project.
- AZURE_OPENAI_ENDPOINT - Azure Open AI Endpoint to be used for evaluation.
- AZURE_OPENAI_API_KEY - Azure Open AI Key to be used for evaluation.
- AZURE_OPENAI_API_VERSION - Azure Open AI Api version to be used for evaluation.
- AZURE_SUBSCRIPTION_ID - Azure Subscription Id of Azure AI Project
- PROJECT_NAME - Azure AI Project Name
- RESOURCE_GROUP_NAME - Azure AI Project Resource Group Name
- AGENT_MODEL_DEPLOYMENT_NAME - The deployment name of the model for your Azure AI agent, as found under the "Name" column in the "Models + endpoints" tab in your Azure AI Foundry project.

In [1]:
from dotenv import load_dotenv

load_dotenv('../agent_evaluation/.env')

True

## Initialize Project Client

In [7]:
import os
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from azure.ai.agents.models import FunctionTool, ToolSet, AgentThreadCreationOptions
from user_functions import user_functions

project_client = AIProjectClient(
    credential=DefaultAzureCredential(),
    endpoint=os.environ["PROJECT_CONNECTION_STRING"],
)

AGENT_NAME = "Seattle Tourist Assistant"

# Add Tools to be used by Agent
functions = FunctionTool(user_functions)

toolset = ToolSet()
toolset.add(functions)

### AI Foundry Agent Service の作成

In [8]:
agent = project_client.agents.create_agent(
    model=os.environ["AGENT_MODEL_DEPLOYMENT_NAME"],
    name=AGENT_NAME,
    instructions="You are helpful agent",
    toolset=toolset,
)
print(f"Created agent, agent ID: {agent.id}")

Created agent, agent ID: asst_yTgoigfFAZyzdIBbUTTD0gCu


### Thread 作成、実行

In [12]:
thread_run = project_client.agents.create_thread_and_process_run(
    agent_id=agent.id,
    thread=AgentThreadCreationOptions(
        messages=[
            {
                "role": "user",
                "content": "What are some good places to visit in Seattle?",
            }
        ]
    )
)
print(f"Thread run status: {thread_run.status}")

for message in project_client.agents.messages.list(thread_run.thread_id):
    print(f"Role: {message.role}, Content: {message.content[0].text.value}")

Thread run status: RunStatus.COMPLETED
Role: MessageRole.AGENT, Content: Seattle is a vibrant city with plenty of attractions and activities for visitors. Here are some great places to explore:

### Iconic Attractions:
1. **Space Needle**: A must-visit landmark offering panoramic views of the city, Puget Sound, and surrounding mountains.
2. **Pike Place Market**: A lively public market famous for fresh produce, handmade goods, and the iconic fish-throwing tradition.
3. **Chihuly Garden and Glass**: A stunning exhibit showcasing Dale Chihuly's exquisite glass art installations.
4. **Museum of Pop Culture (MoPOP)**: An immersive museum featuring exhibits on music, film, gaming, and pop culture.

### Nature and Outdoor Destinations:
5. **Discovery Park**: A beautiful park with walking trails, beaches, and views of Puget Sound.
6. **Gas Works Park**: A quirky green space with industrial structures and a picturesque city skyline view.
7. **Green Lake Park**: Perfect for walking, biking, boa

### 評価

In [17]:
from azure.ai.evaluation import AIAgentConverter

# Initialize the converter that will be backed by the project.
converter = AIAgentConverter(project_client)

thread_id = thread_run.thread_id
run_id = thread_run.id
file_name = "evaluation_data.jsonl"

# Get a single agent run data
evaluation_data_single_run = converter.convert(thread_id=thread_id, run_id=run_id)
print(f"Single run evaluation data: {evaluation_data_single_run}")
evaluation_data = converter.prepare_evaluation_data(thread_ids=thread_id, filename=file_name)

Single run evaluation data: {'query': [{'role': 'system', 'content': 'You are helpful agent'}, {'createdAt': '2025-06-03T05:47:02Z', 'role': 'user', 'content': [{'type': 'text', 'text': 'What are some good places to visit in Seattle?'}]}], 'response': [{'createdAt': '2025-06-03T05:47:03Z', 'run_id': 'run_ADvtl4IeeUcHJHzFglrhq9EO', 'role': 'assistant', 'content': [{'type': 'text', 'text': "Seattle is a vibrant city with plenty of attractions and activities for visitors. Here are some great places to explore:\n\n### Iconic Attractions:\n1. **Space Needle**: A must-visit landmark offering panoramic views of the city, Puget Sound, and surrounding mountains.\n2. **Pike Place Market**: A lively public market famous for fresh produce, handmade goods, and the iconic fish-throwing tradition.\n3. **Chihuly Garden and Glass**: A stunning exhibit showcasing Dale Chihuly's exquisite glass art installations.\n4. **Museum of Pop Culture (MoPOP)**: An immersive museum featuring exhibits on music, film

### evaluator のセットアップ

In [18]:
from azure.ai.evaluation import (
    ToolCallAccuracyEvaluator,
    AzureOpenAIModelConfiguration,
    IntentResolutionEvaluator,
    TaskAdherenceEvaluator,
)
from pprint import pprint

model_config = AzureOpenAIModelConfiguration(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    azure_deployment=os.environ["AGENT_MODEL_DEPLOYMENT_NAME"],
)
# Needed to use content safety evaluators
azure_ai_project = {
    "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
    "project_name": os.environ["PROJECT_NAME"],
    "resource_group_name": os.environ["RESOURCE_GROUP_NAME"],
}

intent_resolution = IntentResolutionEvaluator(model_config=model_config)

tool_call_accuracy = ToolCallAccuracyEvaluator(model_config=model_config)

task_adherence = TaskAdherenceEvaluator(model_config=model_config)

In [19]:
from azure.ai.evaluation import evaluate

response = evaluate(
    data=file_name,
    evaluators={
        "tool_call_accuracy": tool_call_accuracy,
        "intent_resolution": intent_resolution,
        "task_adherence": task_adherence,
    },
    azure_ai_project={
        "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
        "project_name": os.environ["PROJECT_NAME"],
        "resource_group_name": os.environ["RESOURCE_GROUP_NAME"],
    },
)
pprint(f'AI Foundary URL: {response.get("studio_url")}')

[2025-06-03 15:01:14 +0900][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run azure_ai_evaluation_evaluators_intent_resolution_20250603_150113_397455, log path: C:\Users\koheisaito\.promptflow\.runs\azure_ai_evaluation_evaluators_intent_resolution_20250603_150113_397455\logs.txt
[2025-06-03 15:01:14 +0900][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run azure_ai_evaluation_evaluators_task_adherence_20250603_150113_398454, log path: C:\Users\koheisaito\.promptflow\.runs\azure_ai_evaluation_evaluators_task_adherence_20250603_150113_398454\logs.txt
[2025-06-03 15:01:14 +0900][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run azure_ai_evaluation_evaluators_tool_call_accuracy_20250603_150113_396454, log path: C:\Users\koheisaito\.promptflow\.runs\azure_ai_evaluation_evaluators_tool_call_accuracy_20250603_150113_396454\logs.txt
 Please check out C:/Users/koheisaito/.promptflow/.runs/azure_ai_evaluation_evaluators_tool_call_accuracy_2025

2025-06-03 15:01:14 +0900   31560 execution.bulk     INFO     Current thread is not main thread, skip signal handler registration in BatchEngine.
2025-06-03 15:01:14 +0900   31560 execution.bulk     INFO     Finished 1 / 1 lines.
2025-06-03 15:01:14 +0900   31560 execution.bulk     INFO     Average execution time for completed lines: 0.07 seconds. Estimated time for incomplete lines: 0.0 seconds.
2025-06-03 15:01:14 +0900   31560 execution          ERROR    1/1 flow run failed, indexes: [0], exception of index 0: (UserError) response does not have tool calls. Either provide tool_calls or response with tool calls.

Run name: "azure_ai_evaluation_evaluators_tool_call_accuracy_20250603_150113_396454"
Run status: "Completed"
Start time: "2025-06-03 15:01:13.428011+09:00"
Duration: "0:00:02.234097"
Output path: "C:\Users\koheisaito\.promptflow\.runs\azure_ai_evaluation_evaluators_tool_call_accuracy_20250603_150113_396454"

2025-06-03 15:01:18 +0900   31560 execution.bulk     INFO     Finish

EvaluationException: (InternalError) The get 'prj-build2025-demo-eastus2-001' workspace request failed with HTTP 404 - (ResourceNotFound) The Resource 'Microsoft.MachineLearningServices/workspaces/prj-build2025-demo-eastus2-001' under resource group 'rg-build2025-demo-eastus-001' was not found. For more details please go to https://aka.ms/ARMResourceNotFoundFix

In [None]:
from azure.ai.projects.models import AgentEvaluation, AgentEvaluationRequest


eval_job: AgentEvaluation = project_client.evaluations.create_agent_evaluation(
    evaluation=AgentEvaluationRequest(
        evaluators={
            
        },
        run_id=run_id
    )
)
print(f"Evaluation job created with ID: {eval_job.id}")

DeserializationError: 

In [12]:
import time

from azure.ai.projects.models import (
    AgentEvaluationRequest,
    EvaluatorIds,
    EvaluatorConfiguration,
    AgentEvaluationSamplingConfiguration,
    AgentEvaluationRedactionConfiguration,
)


thread = project_client.agents.threads.create()
print(f"Created thread, thread ID: {thread.id}")

message = project_client.agents.messages.create(
            thread_id=thread.id, role="user", content="Hello, tell me a joke"
        )
print(f"Created message, message ID: {message.id}")

run = project_client.agents.runs.create(thread_id=thread.id, agent_id=agent.id)

while run.status in ["queued", "in_progress", "requires_action"]:
    # Wait for a second
    time.sleep(1)
    run = project_client.agents.runs.get(thread_id=thread.id, run_id=run.id)
    print(f"Run status: {run.status}")

agent_evaluation_request = AgentEvaluationRequest(
    run_id=run.id,
    thread_id=thread.id,
    evaluators={
        "violence": EvaluatorConfiguration(
            id=EvaluatorIds.VIOLENCE,
        )
    },
    sampling_configuration=AgentEvaluationSamplingConfiguration(
        name="test",
        sampling_percent=100,
        max_request_rate=100,
    ),
    redaction_configuration=AgentEvaluationRedactionConfiguration(
        redact_score_properties=False,
    ),
    app_insights_connection_string=project_client.telemetry.get_connection_string(),
)

agent_evaluation_response = project_client.evaluations.create_agent_evaluation(
    evaluation=agent_evaluation_request
)

print(agent_evaluation_response)

Created thread, thread ID: thread_x3QbARV4THrjiomFrjLvgRyQ
Created message, message ID: msg_9qZ9b4ULrRWqZPhlOtIhmVch
Run status: RunStatus.COMPLETED
{'id': 'thread_x3QbARV4THrjiomFrjLvgRyQ;run_xyS5eIN9CMoxAjnAE28gEKXm', 'status': 'Running', 'result': None, 'error': None}


In [13]:
print(
    "Get an authenticated Azure OpenAI client for the parent AI Services resource, and perform a chat completion operation:"
)
with project_client.inference.get_azure_openai_client(api_version="2024-10-21") as client:

    response = client.chat.completions.create(
        model=os.environ["AGENT_MODEL_DEPLOYMENT_NAME"],
        messages=[
            {
                "role": "user",
                "content": "How many feet are in a mile?",
            },
        ],
    )

    print(response.choices[0].message.content)


Get an authenticated Azure OpenAI client for the parent AI Services resource, and perform a chat completion operation:
There are **5,280 feet** in a mile.
