# Agent Case Study with OpenAI SDK

This notebook shows **with vs without** using the same user prompt:
- Direct Agent (no planning loop)
- Planning Agent (Planner -> Executor -> Final)

Also includes Tool and Memory demos.

## 1) Install

In [None]:
%pip -q install openai python-dotenv chromadb

## 2) Setup

In [1]:
import os
import json
import re
import shutil

from datetime import datetime
from dotenv import load_dotenv
from openai import OpenAI, AsyncOpenAI
from agents import Agent, Runner, OpenAIResponsesModel, function_tool

load_dotenv()  # load keys from .env in current working directory

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
gpt_async_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Open-model endpoint settings (for vLLM/Ollama/OpenRouter-compatible APIs)
OPEN_MODEL_BASE_URL = os.getenv("OPEN_MODEL_BASE_URL", "http://localhost:8000/v1")
OPEN_MODEL_API_KEY = os.getenv("OPEN_MODEL_API_KEY", "EMPTY")
open_async_client = AsyncOpenAI(base_url=OPEN_MODEL_BASE_URL, api_key=OPEN_MODEL_API_KEY)

# Hosted embedding endpoint (e.g., vLLM --task embed)
EMBED_BASE_URL = os.getenv("EMBED_BASE_URL", "http://localhost:8001/v1")
EMBED_API_KEY = os.getenv("EMBED_API_KEY", "EMPTY")
emb_client = OpenAI(base_url=EMBED_BASE_URL, api_key=EMBED_API_KEY)

MODEL_GPT = "gpt-4.1-mini"
MODEL_OPEN = os.getenv("OPEN_MODEL", "qwen3-4b-instruct")
ACTIVE_PROVIDER = os.getenv("ACTIVE_PROVIDER", "gpt")  # set to 'open' to use qwen/open model
MODEL = MODEL_GPT
EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-m3")

if not os.getenv("OPENAI_API_KEY"):
    raise ValueError("OPENAI_API_KEY is not set. Put it in env or .env file.")

## 2.5) Start vLLM Servers in Notebook (Optional)

In [None]:
# Run these in the command

# pip install vllm
# python -c "import sys; print(sys.executable)"
# python -c "import vllm; print(vllm.__version__)"
# nohup "$(python -c 'import sys; print(sys.executable)')" -m vllm.entrypoints.openai.api_server --model Qwen/Qwen3-4B-Instruct --host 127.0.0.1 --port 8000 > /tmp/vllm_llm.log 2>&1 &
# nohup "$(python -c 'import sys; print(sys.executable)')" -m vllm.entrypoints.openai.api_server --model BAAI/bge-m3 --task embed --host 127.0.0.1 --port 8001 > /tmp/vllm_emb.log 2>&1 &
# sleep 5
# tail -n 80 /tmp/vllm_llm.log
# tail -n 80 /tmp/vllm_emb.log

## 3) Helper

In [2]:
async def run_agent(agent_name, instructions, user_input, model=None, provider="gpt", tools=None):
    if provider == "gpt":
        selected_model = model or MODEL_GPT
        model_obj = OpenAIResponsesModel(selected_model, gpt_async_client)
    elif provider == "open":
        selected_model = model or MODEL_OPEN
        model_obj = OpenAIResponsesModel(selected_model, open_async_client)
    else:
        raise ValueError("provider must be 'gpt' or 'open'")

    agent = Agent(
        name=agent_name,
        instructions=instructions,
        model=model_obj,
        tools=tools or [],
    )
    result = await Runner.run(agent, user_input)
    return result.final_output, result

async def ask(messages_or_text, model=None, provider="gpt", temperature=0):
    if isinstance(messages_or_text, list):
        text_parts = []
        for m in messages_or_text:
            content = m.get("content", "") if isinstance(m, dict) else str(m)
            text_parts.append(str(content))
        user_input = "\n".join([x for x in text_parts if x])
    else:
        user_input = messages_or_text
    return await run_agent("Assistant", "", user_input, model=model, provider=provider)

def embed_texts(texts):
    emb = emb_client.embeddings.create(model=EMBED_MODEL, input=texts)
    return [item.embedding for item in emb.data]


def extract_json_object(text):
    m = re.search(r"\{[\s\S]*\}", text)
    if not m:
        raise ValueError("No JSON object found in output:\n" + text)
    return json.loads(m.group(0))

## 4) Tool Demo (Without Tool vs With Tool)

In [3]:
@function_tool
def get_current_date() -> dict:
    now = datetime.now()
    print("The agent is calling this tool!")
    return {"date": now.strftime("%Y-%m-%d"), "weekday": now.strftime("%A")}

q = "What day is today? Return exact date as YYYY-MM-DD and weekday."
without_tool, _ = await ask([{"role": "user", "content": q}], provider=ACTIVE_PROVIDER)
print("WITHOUT TOOL:", without_tool)


with_tool, _ = await run_agent(
    "Tool Agent",
    "Use get_current_date when user asks about current date or weekday.",
    q,
    provider=ACTIVE_PROVIDER,
    tools=[get_current_date],
)

print("WITH TOOL:", with_tool)

WITHOUT TOOL: Today is 2024-04-27, Saturday.
The agent is calling this tool!
WITH TOOL: Today is 2026-02-13, and the day is Friday.


## 5) Memory Demo with Vector Database (Without Memory vs With Vector Memory)

In [43]:
import numpy as np
import faiss

# 1) init in-memory FAISS index + record store
records = []
index = None

# 2) write one fact
record = {
    "id": "fact_jeff_birthday",
    "text": "Jeff's birthday is February 1.",
    "metadata": {"type": "birthday", "person": "Jeff"},
}
vec = embed_texts([record["text"]])[0]
x = np.array([vec], dtype="float32")
faiss.normalize_L2(x)
index = faiss.IndexFlatIP(x.shape[1])
index.add(x)
records.append(record)

In [44]:
# 3) ask without memory
q = "When is Jeff's birthday?"
without_memory, _ = await run_agent(
    "QA Agent",
    "Answer from your own knowledge. If unsure, say you don't know.",
    q,
    provider=ACTIVE_PROVIDER,
)
print("WITHOUT MEMORY:", without_memory)

WITHOUT MEMORY: I don't know when Jeff's birthday is.


In [45]:
# 4) retrieve + ask with memory
qvec = np.array([embed_texts([q])[0]], dtype="float32")
faiss.normalize_L2(qvec)
k = min(1, len(records))

docs = []
if index is not None and k > 0:
    _, I = index.search(qvec, k)
    docs = [records[i]["text"] for i in I[0] if i >= 0]
ctx = "\n".join(f"- {d}" for d in docs)

with_memory, _ = await run_agent(
    "Memory Agent",
    f"Use retrieved facts to answer.\nRetrieved facts:\n{ctx}",
    q,
    provider=ACTIVE_PROVIDER,
)
print("WITH MEMORY:", with_memory)

WITH MEMORY: Jeff's birthday is February 1.


## 6) Planning Demo: Same Prompt, Different Agent Policy

We use the **same user task** and compare:
- Direct Agent: one-shot answer
- Planning Agent: Planner -> Executor -> Final

In [4]:
TASK = "Plan a birthday party for 10 people with a total budget of $200."


async def direct_agent(task, provider="gpt", model=None):
    ans, _ = await ask([{"role": "user", "content": task}], temperature=0, provider=provider, model=model)
    return ans


async def planning_agent(task, provider="gpt", model=None):
    planner_prompt = (
        "You are Event Planning Planner.\n"
        "Return strict JSON only with this schema:\n"
        "{\n"
        "  \"goal\": \"...\",\n"
        "  \"scope\": {\"headcount\": 10, \"budget_usd\": 200, \"location_type\": \"home|park|indoor\", \"must_have\": [\"...\"]},\n"
        "  \"steps\": [\n"
        "    {\"id\": 1, \"task\": \"...\", \"done_definition\": \"...\"}\n"
        "  ]\n"
        "}\n"
        "Rules: create 5-7 non-overlapping steps and include budget allocation, shopping list, timeline, activities, and risk fallback.\n"
        f"Task: {task}"
    )

    plan_text, _ = await ask(planner_prompt, temperature=0, provider=provider, model=model)
    plan = extract_json_object(plan_text)

    step_results = []
    for s in plan["steps"]:
        exec_prompt = (
            "You are Event Planning Executor.\n"
            f"Goal: {plan['goal']}\n"
            f"Scope: {json.dumps(plan['scope'])}\n"
            f"Step: {s['task']}\n"
            f"Done definition: {s['done_definition']}\n"
            "Provide concise, practical output with concrete numbers, prices, and options for this step."
        )
        out, _ = await ask(exec_prompt, temperature=0, provider=provider, model=model)
        step_results.append({"id": s["id"], "task": s["task"], "result": out})

    final_prompt = (
        "You are Event Planning Finalizer.\n"
        "Produce an execution-ready party plan with:\n"
        "1) assumptions and constraints\n"
        "2) budget table by category with subtotal and total <= $200\n"
        "3) shopping list with quantities and estimated prices\n"
        "4) day-of timeline (setup -> party -> cleanup)\n"
        "5) activity/game plan for 10 people\n"
        "6) backup options if a store item is unavailable or over budget\n"
        "7) final checklist\n"
        f"Task: {task}\n"
        f"Plan: {json.dumps(plan, ensure_ascii=False)}\n"
        f"Step results: {json.dumps(step_results, ensure_ascii=False)}"
    )
    final_text, _ = await ask(final_prompt, temperature=0, provider=provider, model=model)

    return {
        "plan": plan,
        "step_results": step_results,
        "final": final_text,
    }

baseline = await direct_agent(TASK, provider=ACTIVE_PROVIDER)
print("=== SAME TASK ===")
print(TASK)
print("\n=== DIRECT AGENT (WITHOUT PLANNING) ===")
print(baseline)

=== SAME TASK ===
Plan a birthday party for 10 people with a total budget of $200.

=== DIRECT AGENT (WITHOUT PLANNING) ===
Sure! Here's a simple and fun birthday party plan for 10 people with a $200 budget:

### Theme
- Casual and cozy small gathering, either at home or a local park.

---

### Budget Breakdown

| Item               | Estimated Cost | Details                             |
|--------------------|----------------|-----------------------------------|
| Food & Drinks      | $100           | Pizza, snacks, drinks             |
| Cake               | $30            | Store-bought or homemade          |
| Decorations        | $20            | Balloons, streamers, tableware    |
| Entertainment      | $30            | Games, music playlist, party favors|
| Miscellaneous      | $20            | Tips, unexpected expenses          |

---

### Details

**Food & Drinks:**
- Order 3 large pizzas (~$15 each = $45)
- Chips, pretzels, or a veggie tray ($15)
- Soft drinks, juice, and wat

In [5]:
planned = await planning_agent(TASK, provider=ACTIVE_PROVIDER)
print("\n=== PLANNING AGENT (WITH PLANNING) - FINAL ===")
print(planned["final"])


=== PLANNING AGENT (WITH PLANNING) - FINAL ===
### Final Execution-Ready Birthday Party Plan for 10 People - Budget ≤ $200

---

## 1) Assumptions and Constraints

- **Date & Time:** Set by organizer; rental time 4 hours at park.
- **Location:** Lakeside Community Park Picnic Area, $50 permit/reservation.
- **Number of guests:** 10 people.
- **Budget:** $200 max, includes permit, food, drinks, decorations, seating, games, and contingency.
- **Park amenities:** Picnic tables with benches, open lawn space.
- **Seating:** Prefer to borrow or rent folding chairs; picnic blankets as backup.
- **Weather:** Backup indoor location is a friend’s home; umbrellas/tent rental option if needed.
- **Food:** Store-bought cake + light snacks, non-alcoholic refreshments.
- **Games:** Low-to-moderate cost, suitable for outdoor setting.
- **No alcohol, smoking, or pets at party site (park rules).**

---

## 2) Budget Table by Category

| Category             | Description                               |

## 7) Optional: Quick scorecard

In [6]:
def simple_score(text):
    t = text.lower()
    checks = {
        "budget_breakdown": any(k in t for k in ["budget", "subtotal", "total", "$200", "usd"]),
        "shopping_list": any(k in t for k in ["shopping list", "buy", "quantity", "price"]),
        "timeline": any(k in t for k in ["timeline", "schedule", "setup", "cleanup"]),
        "activities": any(k in t for k in ["activity", "game", "music", "entertainment"]),
        "risk_fallback": any(k in t for k in ["backup", "fallback", "if unavailable", "contingency", "over budget"]),
    }
    return sum(int(v) for v in checks.values()), checks

s1, c1 = simple_score(baseline)
s2, c2 = simple_score(planned["final"])
print("DIRECT score:", s1, c1)
print("PLANNED score:", s2, c2)

DIRECT score: 4 {'budget_breakdown': True, 'shopping_list': True, 'timeline': True, 'activities': True, 'risk_fallback': False}
PLANNED score: 5 {'budget_breakdown': True, 'shopping_list': True, 'timeline': True, 'activities': True, 'risk_fallback': True}
