# LLM API – End-to-End Examples (Single Notebook)

This notebook shows a minimal client and step-by-step examples for:

1. Create a new account
2. Login
3. Change models (admin privilege example)
4. Start a new chat and get a response
5. Continue a chat
6. See chat history
7. Websearch with agentic tool selection
8. Agentic math calculation (LLM decides to use math tool)
9. Sequential reasoning with ReAct agent (step-by-step thinking)
10. Plan-and-Execute agent (parallel tool usage)
11. Auto agent selection (smart router picks best agent)
12. Complex JSON data analysis
13. RAG: Upload and query Excel files

Set your API base URL below if different from the default.

In [None]:
import sys
!{sys.executable} -m pip install httpx

In [None]:
API_BASE_URL = "http://127.0.0.1:8000"
print("Using:", API_BASE_URL)

In [None]:
import httpx

class LLMApiClient:
    def __init__(self, base_url: str, timeout: float = 300.0):
        """
        Initialize the LLM API client.

        Args:
            base_url: API base URL
            timeout: Request timeout in seconds (default: 300s/5min for LLM requests)
        """
        self.base_url = base_url.rstrip("/")
        self.token = None
        # Create timeout config: 10s for connect, custom timeout for read/write/pool
        self.timeout = httpx.Timeout(50.0, read=timeout, write=timeout, pool=timeout)

    def _headers(self):
        h = {"Content-Type": "application/json"}
        if self.token:
            h["Authorization"] = f"Bearer {self.token}"
        return h

    def signup(self, username: str, password: str, role: str = "guest"):
        r = httpx.post(f"{self.base_url}/api/auth/signup", json={
            "username": username, "password": password, "role": role
        }, timeout=10.0)
        r.raise_for_status()
        return r.json()

    def login(self, username: str, password: str):
        r = httpx.post(f"{self.base_url}/api/auth/login", json={
            "username": username, "password": password
        }, timeout=10.0)
        r.raise_for_status()
        data = r.json()
        self.token = data["access_token"]
        return data

    def list_models(self):
        r = httpx.get(f"{self.base_url}/v1/models", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    def change_model(self, model: str):
        r = httpx.post(f"{self.base_url}/api/admin/model", json={"model": model}, headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    def chat_new(self, model: str, user_message: str, agent_type: str = "auto"):
        payload = {
            "model": model,
            "messages": [{"role": "user", "content": user_message}],
            "agent_type": agent_type
        }
        # Use the longer timeout for chat requests
        r = httpx.post(f"{self.base_url}/v1/chat/completions", json=payload, headers=self._headers(), timeout=self.timeout)
        r.raise_for_status()
        data = r.json()
        return data["choices"][0]["message"]["content"], data["x_session_id"]

    def chat_continue(self, model: str, session_id: str, user_message: str, agent_type: str = "auto"):
        payload = {
            "model": model,
            "messages": [{"role": "user", "content": user_message}],
            "session_id": session_id,
            "agent_type": agent_type
        }
        # Use the longer timeout for chat requests
        r = httpx.post(f"{self.base_url}/v1/chat/completions", json=payload, headers=self._headers(), timeout=self.timeout)
        r.raise_for_status()
        data = r.json()
        return data["choices"][0]["message"]["content"], data["x_session_id"]

    def chat_sessions(self):
        r = httpx.get(f"{self.base_url}/api/chat/sessions", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()["sessions"]

    def chat_history(self, session_id: str):
        r = httpx.get(f"{self.base_url}/api/chat/history/{session_id}", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()["messages"]

    def tools(self):
        r = httpx.get(f"{self.base_url}/api/tools/list", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()["tools"]

    def math(self, expression: str):
        r = httpx.post(f"{self.base_url}/api/tools/math", json={"expression": expression}, headers=self._headers(), timeout=30.0)
        r.raise_for_status()
        return r.json()["result"]

    def websearch(self, query: str, max_results: int = 5):
        r = httpx.post(f"{self.base_url}/api/tools/websearch", json={"query": query, "max_results": max_results}, headers=self._headers(), timeout=60.0)
        r.raise_for_status()
        return r.json()["results"]

    def answer_from_json(self, model: str, json_blob: dict, question: str):
        prompt = f"Given this JSON: {json_blob}\nAnswer: {question}"
        return self.chat_new(model, prompt)[0]

client = LLMApiClient(API_BASE_URL, timeout=3000.0)  # 5 minute timeout
print("Client ready with 3000s timeout for chat requests")

In [None]:
# 1) Create a new account
username = "leesihun"
password = "s.hun.lee"
client.signup(username, password)

In [None]:
# 2) Login
login = client.login(username, password)
login


In [None]:
# 3) Change models (admin only) – optional
client.login("admin", "administrator")
client.change_model("gemma3:12b")


In [None]:
# List models (OpenAI-compatible)
models = client.list_models()
models


In [None]:
# 4) Start a new chat and get a response
MODEL = models["data"][0]["id"]
reply, session_id = client.chat_new(MODEL, "Hello! Give me a short haiku about autumn.")
reply, session_id


In [None]:
# 5) Continue a chat
reply2, _ = client.chat_continue(MODEL, session_id, "Now do one about winter.")
reply2


In [None]:
# 6) See chat history
client.chat_sessions(), client.chat_history(session_id)


In [None]:
# 7) Websearch with agentic tool selection
# Let the agent decide to use the web search tool
client.login("leesihun", "s.hun.lee")
search_query = "Search the web and tell me who is SiHun Lee, Ph. D. Include sources."
search_reply, _ = client.chat_new(MODEL, search_query)
print("Websearch Response:")
print(search_reply)

In [None]:
# 8) Agentic tool usage - Let the LLM decide which tool to use
# Simple math question (agent will automatically use math_calculator tool)
math_reply, _ = client.chat_new(MODEL, "What is 11.951 divided by 3.751? Please calculate this precisely.")
print("Math Question Response:")
from IPython.display import display, Math, Latex
display(Latex(math_reply))
print(math_reply)
print("\n" + "="*80 + "\n")

In [None]:
# 9) Sequential reasoning with ReAct agent
# This triggers the ReAct agent because it requires step-by-step thinking
sequential_query = """
First, search the web to find the current population of Tokyo.
Then, calculate what 15% of that population would be.
Finally, tell me the result.
Think hard, try to answer to best of your knowledge
"""
react_reply, _ = client.chat_new(MODEL, sequential_query, agent_type="react")
print("Sequential Reasoning (ReAct) Response:")
print(react_reply)
print("\n" + "="*80 + "\n")

In [None]:
# 10) Plan-and-Execute agent with multiple tools
# This triggers Plan-and-Execute agent because it uses "and" for parallel tasks
parallel_query = """
Search for the latest news about artificial intelligence AND
calculate the result of (100 * 0.15 + 25) / 2 AND
tell me what you found.
"""
plan_reply, _ = client.chat_new(MODEL, parallel_query, agent_type="plan_execute")
print("Plan-and-Execute (Parallel) Response:")
print(plan_reply)
print("\n" + "="*80 + "\n")

In [None]:
# 11) Auto agent selection - Let the router decide
# The smart router will analyze the query and pick the best agent
auto_query = "If the capital of France has a population of 2.1 million, and we need to allocate 500 euros per person for a project, what's the total budget needed? First search for the actual population, then calculate."
auto_reply, _ = client.chat_new(MODEL, auto_query, agent_type="auto")
print("Auto Agent Selection Response:")
print(auto_reply)

In [None]:
# 12) Complex JSON data analysis
# Create a realistic e-commerce dataset
complex_json = {
    "company": "TechMart Inc",
    "quarter": "Q3 2025",
    "departments": [
        {
            "name": "Electronics",
            "employees": 45,
            "sales": [
                {"product": "Laptop", "units_sold": 320, "price": 1200, "revenue": 384000},
                {"product": "Smartphone", "units_sold": 856, "price": 800, "revenue": 684800},
                {"product": "Tablet", "units_sold": 142, "price": 500, "revenue": 71000}
            ]
        },
        {
            "name": "Home Appliances",
            "employees": 32,
            "sales": [
                {"product": "Refrigerator", "units_sold": 89, "price": 1500, "revenue": 133500},
                {"product": "Washing Machine", "units_sold": 124, "price": 900, "revenue": 111600},
                {"product": "Microwave", "units_sold": 267, "price": 200, "revenue": 53400}
            ]
        },
        {
            "name": "Furniture",
            "employees": 28,
            "sales": [
                {"product": "Desk", "units_sold": 178, "price": 450, "revenue": 80100},
                {"product": "Chair", "units_sold": 432, "price": 150, "revenue": 64800},
                {"product": "Bookshelf", "units_sold": 95, "price": 300, "revenue": 28500}
            ]
        }
    ]
}

# Ask the LLM to analyze this complex data structure
analysis_query = """
Based on this company data, please analyze and tell me:
1. Which department has the highest total revenue?
2. What is the average revenue per employee across all departments?
3. Which single product generated the most revenue?
4. Calculate the total units sold across all departments.
"""

json_reply, _ = client.chat_new(MODEL, f"Here is the data:\n{complex_json}\n\n{analysis_query}")
print("Complex JSON Analysis Response:")
print(json_reply)

In [None]:
# Upload (optional): use API /api/files/upload via requests with your JWT if needed.
# Here we assume files are saved server-side and we just trigger indexing by calling the upload API or reading directly if exposed.

import os
from pathlib import Path

# Example local paths on server side (adjust if needed)
username = username  # from earlier cell
pos_path = Path(f"data/uploads/{username}/폴드 긍정.xlsx")
neg_path = Path(f"data/uploads/{username}/폴드 부정.xlsx")

print(pos_path.exists(), neg_path.exists())


In [None]:
# If files exist, upload via API (to ensure user-scoped indexing)
headers = {"Authorization": f"Bearer {client.token}"}

def upload_file(path: Path):
    files = {"file": (path.name, open(path, "rb"), "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")}
    r = httpx.post(f"{API_BASE_URL}/api/files/upload", headers=headers, files=files)
    r.raise_for_status()
    return r.json()

uploads = []
if pos_path.exists():
    uploads.append(upload_file(pos_path))
if neg_path.exists():
    uploads.append(upload_file(neg_path))

uploads


In [None]:
# Query RAG across all indexed docs
r = httpx.get(f"{API_BASE_URL}/api/tools/rag/search", params={"query": "폴드에 대한 긍정/부정 기준을 요약해줘", "top_k": 5}, headers=headers)
r.raise_for_status()
r.json()
