### Import Dependencies

In [1]:
from pydantic import BaseModel, Field

from qdrant_client import QdrantClient
from qdrant_client.models import Prefetch, Filter, FieldCondition, MatchAny, FusionQuery, Document


from langsmith import traceable, get_current_run_tree

from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import ToolNode
from langgraph.types import Send, Command

from langchain_core.messages import AIMessage, ToolMessage, convert_to_openai_messages

from jinja2 import Template
from typing import Literal, Dict, Any, Annotated, List, Optional, Sequence
from IPython.display import Image, display
from operator import add
from openai import OpenAI

import openai

import random
import ast
import inspect
import instructor
import json

from utils.utils import get_tool_descriptions, format_ai_message
from utils.tools import get_shopping_cart, add_to_shopping_cart, remove_from_cart, get_formatted_item_context, get_formatted_reviews_context

from langgraph.checkpoint.postgres import PostgresSaver

  from .autonotebook import tqdm as notebook_tqdm


### Create Coordinator Evaluation Dataset

In [2]:
class Delegation(BaseModel):
    agent: str
    task: str

class CoordinatorAgentResponse(BaseModel):
    next_agent: str
    plan: List[Delegation]
    final_answer: bool
    answer: str

In [3]:
def coordinator_agent(state):

   prompt_template =  """You are a Coordinator Agent as part of a shopping assistant.

Your role is to create plans for solving user queries and delegate the tasks accordingly.
You will be given a conversation history, your task is to create a plan for solving the user's query.
After the plan is created, you should output the next agent to invoke and the task to be performed by that agent.
Once an agent finishes its task, you will be handed the control back, you should then review the conversation history and revise the plan.
If there is a sequence of tasks to be performed by a single agent, you should combine them into a single task.

The possible agents are:

- product_qa_agent: The user is asking a question about a product. This can be a question about available products, their specifications, user reviews etc.
- shopping_cart_agent: The user is asking to add or remove items from the shopping cart or questions about the current shopping cart.

CRITICAL RULES:
- If next_agent is "", final_answer MUST be false
(You cannot delegate the task to an agent and return to the user in the same response)
- If final_answer is true, next_agent MUST be ""
(You must wait for agent results before returning to user)
- If you need to call other agents before answering, set:
next_agent="...", final_answer=false
- After receiving agent results, you can then set:
next_agent="", final_answer=true
- One of the following has to be true:
next_agent is "" and final_answer is true
next_agent is not "" and final_answer is false

Additional instructions:

- Do not route to any agent if the user's query needs clarification. Do it yourself.
- Write the plan to the plan field.
- Write the next agent to invoke to the next_agent field.
- Once you have all the information needed to answer the user's query, you should set the final_answer field to True and output the answer to the user's query.
- The final answer to the user query should be a comprehensive answer that explains the actions that were performed to answer the query.
- Never set final_answer to true if the plan is not complete.
- You should output the next_agent field as well as the plan field.
"""

   template = Template(prompt_template)
   
   prompt = template.render()

   messages = state.messages

   conversation = []

   for message in messages:
        conversation.append(convert_to_openai_messages(message))

   client = instructor.from_openai(OpenAI())

   response, raw_response = client.chat.completions.create_with_completion(
        model="gpt-4.1-mini",
        response_model=CoordinatorAgentResponse,
        messages=[{"role": "system", "content": prompt}, *conversation],
        temperature=0,
   )

   if response.final_answer:
      ai_message = [AIMessage(
         content=response.answer,
      )]
   else:
      ai_message = []

   return {
      "messages": ai_message,
      "answer": response.answer,
      "coordinator_agent": {
         "iteration": state.coordinator_agent.iteration + 1,
         "final_answer": response.final_answer,
         "next_agent": response.next_agent,
         "plan": [data.model_dump() for data in response.plan]
      }
   }

In [4]:
class ToolCall(BaseModel):
    name: str
    arguments: dict

class RAGUsedContext(BaseModel):
    id: str = Field(description="ID of the item used to answer the question.")
    description: str = Field(description="Short description of the item used to answer the question.")

class AgentProperties(BaseModel):
    iteration: int = 0
    final_answer: bool = False
    available_tools: List[Dict[str, Any]] = []
    tool_calls: List[ToolCall] = []

class CoordinatorAgentProperties(BaseModel):
    iteration: int = 0
    final_answer: bool = False
    plan: List[Delegation] = []
    next_agent: str = ""

class State(BaseModel):
    messages: Annotated[List[Any], add] = []
    user_intent: str = ""
    product_qa_agent: AgentProperties = Field(default_factory=AgentProperties)
    shopping_cart_agent: AgentProperties = Field(default_factory=AgentProperties)
    coordinator_agent: CoordinatorAgentProperties = Field(default_factory=AgentProperties)
    answer: str = ""
    references: Annotated[List[RAGUsedContext], add] = []
    user_id: str = ""
    cart_id: str = ""

### Run Evaluator

In [5]:
from langsmith import Client
import os

client = Client()

In [6]:
def next_agent_evaluator(run, example):

    next_agent_match = run.outputs["coordinator_agent"]["next_agent"] == example.outputs["next_agent"] 
    final_answer_match = run.outputs["coordinator_agent"]["final_answer"] == example.outputs["coordinator_final_answer"]
    
    return next_agent_match and final_answer_match

In [7]:
results = client.evaluate(
    lambda x: coordinator_agent(State(messages=x["messages"])),
    data="coordinator-eval-dataset",
    evaluators=[
        next_agent_evaluator
    ],
    experiment_prefix="coordinator-eval-dataset"
)

View the evaluation results for experiment: 'coordinator-eval-dataset-e9393808' at:
https://smith.langchain.com/o/eda70326-53b4-40a9-8f01-1ae7702e6112/datasets/457e4b89-ef35-4823-a10c-09acc5e64afb/compare?selectedSessions=88ec02d0-d344-4fe8-bda2-646f8dd709ca




10it [00:13,  1.35s/it]


### Extract Evaluation Results

In [8]:
results.experiment_name

'coordinator-eval-dataset-e9393808'

In [9]:
results_resp = client.read_project(
    project_name = results.experiment_name,
    include_stats = True
)

In [10]:
results_resp

TracerSessionResult(id=UUID('88ec02d0-d344-4fe8-bda2-646f8dd709ca'), start_time=datetime.datetime(2025, 11, 25, 17, 44, 57, 293911, tzinfo=datetime.timezone.utc), end_time=None, description=None, name='coordinator-eval-dataset-e9393808', extra={'metadata': {'git': {'tags': None, 'dirty': True, 'branch': 'main', 'commit': '2e111397bebfb86ef557f428fef3e67ea2990c7b', 'repo_name': '00-ai-engineering-bootcamp-cohort-2', 'remote_url': 'git@github.com-personal:tkanodia/00-ai-engineering-bootcamp.git', 'author_name': 'tkanodia', 'commit_time': '1764087368', 'author_email': 'tkanodia@adobe.com'}, 'revision_id': '2e11139-dirty', 'dataset_splits': ['base'], 'dataset_version': '2025-11-23T20:01:12.487507+00:00', 'num_repetitions': 1}}, tenant_id=UUID('eda70326-53b4-40a9-8f01-1ae7702e6112'), reference_dataset_id=UUID('457e4b89-ef35-4823-a10c-09acc5e64afb'), run_count=10, latency_p50=datetime.timedelta(seconds=1, microseconds=483000), latency_p99=datetime.timedelta(seconds=1, microseconds=660010), t

In [11]:
results_resp.feedback_stats

{'next_agent_evaluator': {'n': 10,
  'avg': 0.6,
  'stdev': 0.4898979485566356,
  'errors': 0,
  'values': {},
  'type': 'primary',
  'contains_thread_feedback': False}}

In [12]:
results_resp.feedback_stats.get("next_agent_evaluator")

{'n': 10,
 'avg': 0.6,
 'stdev': 0.4898979485566356,
 'errors': 0,
 'values': {},
 'type': 'primary',
 'contains_thread_feedback': False}

In [13]:
results_resp.feedback_stats.get("next_agent_evaluator").get("avg")

0.6

In [14]:
results_resp.feedback_stats.get("next_agent_evaluator").get("errors")

0