In [None]:
!pip install langchain langchain_openai langchain_community langgraph-checkpoint-sqlite langgraph ipykernel python-dotenv deepagents deepeval

In [2]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from deepagents import create_deep_agent
from pprint import pprint

load_dotenv()

llm = ChatOpenAI(
    base_url="https://api.openai.com/v1",
    model="gpt-5.2",
    temperature=0.1,
    max_tokens=1000,
)

Create two tools for the sub-agents.

In [3]:
from langchain.tools import tool

@tool(
    "get_risk_level",
    description="Given the account number, assess and return the risk level. Eg. Low, Medium or High."
)
def get_risk_level(account_number: str) -> str:
  print(f">>>>>>>>>>> Account number:{account_number}")
  return "High risk"

@tool(
    "get_interest_rate",
    description="Given the risk level [Low, Medium or High], return the interest rate."
)
def get_interest_rate(risk_level: str) -> str:
  print(f">>>>>>>>>>> Risk Level:{risk_level}")
  if (risk_level.lower() == "low"):
    return "5%"
  elif (risk_level.lower() == "medium"):
    return "10%"
  elif (risk_level.lower() == "high"):
    return "15%"
  return "20%"

Create two sub-agents. Risk Analyst to give the risk level of the applicant. And, Loan Officer to give the interest rate based on the risk level.

In [4]:
risk_analyst_subagent = {
    "name": "risk-analyst",
    "description": "The purpose of this agent is to assess the risk of a loan application.",
    "system_prompt": """You are a risk analyst that assess the risk level of loan application. Given the account number, use the tools provided to get the risk level.""",
    "tools": [get_risk_level],
}

loan_officer_subagent = {
    "name": "loan-officer",
    "description": "The purpose of this agent is to decide on the interest rate based on the risk level.",
    "system_prompt": """You are a loan officer that decide on the interest rate based on the risk level of loan application.""",
    "tools": [get_interest_rate],
}

subagents = [risk_analyst_subagent, loan_officer_subagent]

Create the supervisor or main agent. Assign both sub-agents to the supervisor agent.

In [5]:
customer_officer_agent = create_deep_agent(
    model=llm,
    system_prompt="""
      You are bank customer officer that handles loan application.
      The following agents are also involved in processing loan application.
      1. risk-analyst - The purpose of this agent is to assess the risk of a loan application.
      2. loan-officer - The purpose of this agent is to decide on the interest rate based on the risk level.
      The business process for assessing risk and deciding interest rate STRICTLY follows the steps below:
      1. Given the account number, check with risk-analyst to get the risk level.
      2. Use the risk level to check with loan-officer to decide on the interest rate.
      3. Return the interest rate.
    """,
    subagents=subagents
)

Now, we can test the multi-agents application.

In [None]:
last_message = None

messages = []

messages.append(
    {
        "role": "user",
        "content": "Account number 123 would like to take a loan.",
    }
)

print("--- Starting Agent Stream ---")

# Keep track of the number of messages already printed
seen_messages_count = 0

for chunk in customer_officer_agent.stream(
    {"messages": messages},
    stream_mode="values",
):
    if "messages" in chunk:
        # Iterate only over new messages
        for message in chunk["messages"][seen_messages_count:]:
            print(f"\n---")
            if message.content:
                print(f"Role: {message.type.capitalize()}\nContent: {message.content}")
            if hasattr(message, 'tool_calls') and message.tool_calls:
                print(f"Tool Calls: {message.tool_calls}")
            if hasattr(message, 'tool_output') and message.tool_output:
                print(f"Tool Output: {message.tool_output}")
            if hasattr(message, 'name') and message.name:
                print(f"Name: {message.name}")
            last_message = message
        seen_messages_count = len(chunk["messages"])

print("\n--- Agent Stream Finished ---")

# print(f"AI:{last_message}") # This line is commented out as the detailed output is printed above


Traditional unit testing for normal application cannot be used for agentic AI. To test the output of agent, the testing tool needs to understand the context of the output. Therefore, we need to use LLM to test the agent.

In [None]:
from deepeval.metrics import (
    GEval,
    TaskCompletionMetric,
    AnswerRelevancyMetric
)
from deepeval.dataset import EvaluationDataset, Golden
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
from deepeval import evaluate

answer_relevancy = AnswerRelevancyMetric(threshold=0.8, model="gpt-4o-mini")

# Custom accuracy metric
accuracy_metric = GEval(
    name="Loan Assessment Accuracy",
    criteria="The output correctly identifies the customer, retrieves accurate credit score, calculates appropriate risk level, and provides the correct interest rate",
    evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
    threshold=0.8,
    model="gpt-4o-mini"
)

# Create test cases list to collect all evaluations
test_cases = []

dataset = EvaluationDataset(
    goldens=[
        Golden(
            input="Account number 123 would like to take a loan.",
            expected_output="For account number 123, the applicable annual interest rate for the new loan is: 15%"
        ),
    ]
)

def run_agent_and_get_output(agent_instance, user_input):
    messages = [
        {
            "role": "user",
            "content": user_input,
        }
    ]
    last_message_content = ""
    for chunk in agent_instance.stream(
        {"messages": messages},
        stream_mode="values",
    ):
        if "messages" in chunk:
            for message in chunk["messages"]:
                if message.content:
                    last_message_content = message.content
    return last_message_content

# Build test cases from dataset
for golden in dataset.goldens:
    # Run agent and get response
    response = run_agent_and_get_output(customer_officer_agent, golden.input)

    # Create test case
    test_case = LLMTestCase(
        input=golden.input,
        actual_output=response,
        expected_output=golden.expected_output
    )

    test_cases.append(test_case)

# NOW EVALUATE
evaluate(test_cases, metrics=[accuracy_metric, answer_relevancy])
