# NASCAR Chatbot Sandbox - Agent Evaluation

In [1]:
import os
import getpass

os.environ['TAVILY_API_KEY'] = getpass.getpass('Enter your Tavily API key: ')
os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key: ')

In [8]:
from agent import Agent
from services.chat_service import ChatService

agent = Agent()
chat_service = ChatService(agent)
result = await chat_service.chat("Can you give me an overview of the NASCAR race that took place in Chicago in 2025?")
print(result["messages"])

[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_8zAEak4iOQOY7GQ5c57s8oZL', 'function': {'arguments': '{"__arg1":"NASCAR race in Chicago 2025"}', 'name': 'tavily_web_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 265, 'total_tokens': 292, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-nano-2025-04-14', 'system_fingerprint': 'fp_38343a2f8f', 'id': 'chatcmpl-C1cfPOrmvt3AhkugqsWmWzCPpnYaA', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--b7127f4f-1fd0-4910-b5fa-5e551f080e9e-0', tool_calls=[{'name': 'tavily_web_search', 'args': {'__arg1': 'NASCAR race in Chicago 2025'}, 'id': 'call_8zAEak4iOQOY7GQ5c57s8oZL', 'type': 'tool_call'}], usage_metadata={'input_tokens': 265, 'output

In [11]:
from langchain_openai import ChatOpenAI
from ragas.metrics import ToolCallAccuracy, AgentGoalAccuracyWithReference
from ragas.dataset_schema import MultiTurnSample
import ragas.messages as r
from ragas.integrations.langgraph import convert_to_ragas_messages

llm_evaluator = ChatOpenAI(model="gpt-4o-mini")
ragas_trace = convert_to_ragas_messages(result["messages"])
print(ragas_trace)

# Tool Call Accuracy
tool_call_sample = MultiTurnSample(
  user_input=ragas_trace,
  reference_tool_calls=[
    r.ToolCall(
      name="tavily_web_search", 
      args={"__arg1": "NASCAR race in Chicago 2025"}
    ),
  ],
)
tool_call_scorer = ToolCallAccuracy()
tool_call_scorer.llm = llm_evaluator
tool_call_score = await tool_call_scorer.multi_turn_ascore(tool_call_sample)
print(f"Tool Call Score: {tool_call_score}")

[AIMessage(content='', metadata=None, type='ai', tool_calls=[ToolCall(name='tavily_web_search', args={'__arg1': 'NASCAR race in Chicago 2025'})]), ToolMessage(content='1. 2025 NASCAR Cup Series Chicago Street Course Race Page - Jayski\n2025 NASCAR Cup Series Chicago Street Course Race Page - Jayski\'s NASCAR Silly Season Site **Pit Stall Selections:** The selections of pit stalls for the Grant Park 165 NASCAR Cup Series race at the Chicago Street Course have been made. **#88-Shane Van Gisbergen** won the Busch pole for the Grant Park 165 NASCAR Cup Series race at the Chicago Street Course with a speed of 88.338mph for his second pole of the year, first at Chicago and third of his career. Practice for the Grant Park 165 NASCAR Cup Series race at the Chicago Street Course is over. The entry list for the Grant Park 165 NASCAR Cup Series race at the Chicago Street Course is posted, 41 teams/drivers [for 40 spots] are listed.\nSource: https://www.jayski.com/nascar-cup-series/2025-nascar-cup

In [18]:
from ragas.metrics import AgentGoalAccuracyWithReference
from ragas.llms import LangchainLLMWrapper

# Accuracy Goal
accuracy_sample = MultiTurnSample(
  user_input=ragas_trace,
  reference="Details of the Chicago NASCAR race in 2025",
)
accuracy_scorer = AgentGoalAccuracyWithReference()
accuracy_scorer.llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
accuracy_score = await accuracy_scorer.multi_turn_ascore(accuracy_sample)
print(f"Accuracy Score: {accuracy_score}")

Accuracy Score: 1.0


In [20]:
from ragas.metrics import TopicAdherenceScore

# Topic Adherence
adherence_sample = MultiTurnSample(
  user_input=ragas_trace,
  reference_topics=["NASCAR", "Chicago", "2025"],
)
accuracy_scorer = TopicAdherenceScore()
accuracy_scorer.llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
adherence_score = await accuracy_scorer.multi_turn_ascore(adherence_sample)
print(f"Topic Adherence Score: {adherence_score}")

Topic Adherence Score: 0.599999999946
