# LLM API ‚Äì End-to-End Examples (Single Notebook)

In [None]:
import sys
# !{sys.executable} -m pip install httpx
# ! pip install pip-system-certs

# API_BASE_URL = "http://10.198.112.203:10007"
API_BASE_URL = 'http://localhost:10007'
print("Using:", API_BASE_URL)

In [None]:
import httpx
import json
from pathlib import Path
from typing import Iterator

class LLMApiClient:
    def __init__(self, base_url: str, timeout: float = 3600.0):
        self.base_url = base_url.rstrip("/")
        self.token = None
        self.timeout = httpx.Timeout(50.0, read=timeout, write=timeout, pool=timeout)

    def _headers(self):
        h = {}
        if self.token:
            h["Authorization"] = f"Bearer {self.token}"
        return h

    def signup(self, username: str, password: str, role: str = "guest"):
        r = httpx.post(f"{self.base_url}/api/auth/signup", json={
            "username": username, "password": password, "role": role
        }, timeout=10.0)
        r.raise_for_status()
        return r.json()

    def login(self, username: str, password: str):
        r = httpx.post(f"{self.base_url}/api/auth/login", json={
            "username": username, "password": password
        }, timeout=10.0)
        r.raise_for_status()
        data = r.json()
        self.token = data["access_token"]
        return data

    def list_models(self):
        # JSON endpoints still use Content-Type header
        headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
        r = httpx.get(f"{self.base_url}/v1/models", headers=headers, timeout=10.0)
        r.raise_for_status()
        return r.json()

    def change_model(self, model: str):
        headers = {"Authorization": f"Bearer {self.token}", "Content-Type": "application/json"} if self.token else {"Content-Type": "application/json"}
        r = httpx.post(f"{self.base_url}/api/admin/model", json={"model": model}, headers=headers, timeout=10.0)
        r.raise_for_status()
        return r.json()

    def chat_new(self, model: str, user_message: str, agent_type: str = "auto", files: list = None):
        messages = [{"role": "user", "content": user_message}]
        
        # Prepare form data
        data = {
            "model": model,
            "messages": json.dumps(messages),
            "agent_type": agent_type
        }
        
        # Prepare files for upload
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            r = httpx.post(
                f"{self.base_url}/v1/chat/completions",
                data=data,
                files=files_to_upload if files_to_upload else None,
                headers=self._headers(),
                timeout=self.timeout
            )
            r.raise_for_status()
            result = r.json()
            return result["choices"][0]["message"]["content"], result["x_session_id"]
        
        finally:
            # Close file handles
            for _, (_, f) in files_to_upload:
                f.close()

    def chat_new_streaming(self, model: str, user_message: str, agent_type: str = "auto", files: list = None) -> Iterator[str]:
        """
        Start new chat with streaming response (Server-Sent Events)
        
        Args:
            model: Model name
            user_message: User message
            agent_type: Agent type (auto, react, plan_execute) - Note: streaming only works for simple chat
            files: Optional list of file paths to attach
        
        Yields:
            Response tokens as they're generated
            
        Returns:
            Iterator[str]: Yields tokens, then yields session_id as final value with prefix "SESSION_ID:"
        """
        messages = [{"role": "user", "content": user_message}]
        
        # Prepare form data
        data = {
            "model": model,
            "messages": json.dumps(messages),
            "agent_type": agent_type,
            "stream": "true"  # Enable streaming
        }
        
        # Prepare files for upload
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            with httpx.stream(
                "POST",
                f"{self.base_url}/v1/chat/completions",
                data=data,
                files=files_to_upload if files_to_upload else None,
                headers=self._headers(),
                timeout=self.timeout
            ) as response:
                response.raise_for_status()
                
                session_id = None
                for line in response.iter_lines():
                    if line.startswith("data: "):
                        data_str = line[6:]  # Remove "data: " prefix
                        
                        if data_str == "[DONE]":
                            # Stream complete
                            break
                        
                        try:
                            chunk = json.loads(data_str)
                            
                            # Check for errors
                            if "error" in chunk:
                                raise Exception(f"Streaming error: {chunk['error']['message']}")
                            
                            # Extract session_id from final chunk
                            if "x_session_id" in chunk:
                                session_id = chunk["x_session_id"]
                            
                            # Yield content delta
                            if "choices" in chunk and len(chunk["choices"]) > 0:
                                delta = chunk["choices"][0].get("delta", {})
                                if "content" in delta:
                                    yield delta["content"]
                        
                        except json.JSONDecodeError:
                            # Skip malformed JSON
                            continue
                
                # Yield session_id at the end with special prefix
                if session_id:
                    yield f"SESSION_ID:{session_id}"
        
        finally:
            # Close file handles
            for _, (_, f) in files_to_upload:
                f.close()

    def chat_continue(self, model: str, session_id: str, user_message: str, agent_type: str = "auto", files: list = None):
        messages = [{"role": "user", "content": user_message}]
        
        data = {
            "model": model,
            "messages": json.dumps(messages),
            "session_id": session_id,
            "agent_type": agent_type
        }
        
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            r = httpx.post(
                f"{self.base_url}/v1/chat/completions",
                data=data,
                files=files_to_upload if files_to_upload else None,
                headers=self._headers(),
                timeout=self.timeout
            )
            r.raise_for_status()
            result = r.json()
            return result["choices"][0]["message"]["content"], result["x_session_id"]
        
        finally:
            for _, (_, f) in files_to_upload:
                f.close()

    def chat_continue_streaming(self, model: str, session_id: str, user_message: str, agent_type: str = "auto", files: list = None) -> Iterator[str]:
        messages = [{"role": "user", "content": user_message}]
        
        data = {
            "model": model,
            "messages": json.dumps(messages),
            "session_id": session_id,
            "agent_type": agent_type,
            "stream": "true"
        }
        
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            with httpx.stream(
                "POST",
                f"{self.base_url}/v1/chat/completions",
                data=data,
                files=files_to_upload if files_to_upload else None,
                headers=self._headers(),
                timeout=self.timeout
            ) as response:
                response.raise_for_status()
                
                for line in response.iter_lines():
                    if line.startswith("data: "):
                        data_str = line[6:]
                        
                        if data_str == "[DONE]":
                            break
                        
                        try:
                            chunk = json.loads(data_str)
                            
                            if "error" in chunk:
                                raise Exception(f"Streaming error: {chunk['error']['message']}")
                            
                            if "choices" in chunk and len(chunk["choices"]) > 0:
                                delta = chunk["choices"][0].get("delta", {})
                                if "content" in delta:
                                    yield delta["content"]
                        
                        except json.JSONDecodeError:
                            continue
        
        finally:
            for _, (_, f) in files_to_upload:
                f.close()

    def chat_sessions(self):
        r = httpx.get(f"{self.base_url}/api/chat/sessions", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()["sessions"]

    def chat_history(self, session_id: str):
        r = httpx.get(f"{self.base_url}/api/chat/history/{session_id}", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()["messages"]

    def tools(self):
        r = httpx.get(f"{self.base_url}/api/tools/list", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()["tools"]

    def websearch(self, query: str, max_results: int = 5):
        headers = {"Authorization": f"Bearer {self.token}", "Content-Type": "application/json"} if self.token else {"Content-Type": "application/json"}
        # Increased timeout to 1 hour (3600s) for web search + LLM answer generation
        r = httpx.post(f"{self.base_url}/api/tools/websearch", json={"query": query, "max_results": max_results}, headers=headers, timeout=3600.0)
        r.raise_for_status()
        return r.json()  # Returns full response with answer, results, and sources_used

    def answer_from_json(self, model: str, json_blob: dict, question: str):
        prompt = f"Given this JSON: {json_blob}\nAnswer: {question}"
        return self.chat_new(model, prompt)[0]

client = LLMApiClient(API_BASE_URL, timeout=3600.0)  # 1 hour timeout
print("Client ready with 3600s (1 hour) timeout for all requests")
print("‚úì Now supports multipart/form-data with optional file attachments")
print("‚úì Now supports streaming responses via chat_new_streaming() and chat_continue_streaming()")

# 1) Create a new account (skip if user already exists)

In [None]:
username = "leesihun"
password = "s.hun.lee"
try:
    result = client.signup(username, password)
    print(f"Account created: {result}")
except Exception as e:
    print(f"Signup skipped (user may already exist): {e}")
    print("Continuing with existing account...")

# 2) Login

In [None]:
login = client.login(username, password)
login

# 3) Change models (admin only) ‚Äì optional

In [None]:
client.login("admin", "administrator")

In [None]:
models = client.list_models()
models

MODEL = models["data"][0]["id"]

# 4) Start a new chat and get a response

In [None]:
reply, session_id = client.chat_new(MODEL, "Hello! Give me a short haiku about autumn.")
reply, session_id

# 5) Continue an existing chat

In [None]:
reply2, _ = client.chat_continue(MODEL, session_id, "Now do one about winter.")
reply2

# 6) See chat history

In [None]:
client.chat_sessions(), client.chat_history(session_id)

# 7) Websearch with LLM-generated answer

In [None]:
# Disabled because current server doens't have internet connection

In [None]:
# # 7b) Websearch example - Sports news
# # Another example showing the LLM answer generation
# client.login("leesihun", "s.hun.lee")
# search_query = "What was the latest game of Liverpool FC and who won? The current date is 2025/12/12"
# search_response, _ = client.chat_new(MODEL, search_query, agent_type = 'react')

# print("=== LLM-Generated Answer ===")
# print(search_response)

# 8) Agentic tool usage - Let the LLM decide which tool to use

In [None]:
math_reply, _ = client.chat_continue(MODEL, session_id, "What is 11.951/3.751?", agent_type='react')
print("Math Question Response:")
from IPython.display import display, Math, Latex
display(Latex(math_reply))
print(math_reply)
print("\n" + "="*80 + "\n")

# 9) Sequential reasoning

In [None]:
# This triggers the ReAct agent because it requires step-by-step thinking
sequential_query = """
First, search the web to find the latest population of Tokyo.
Then, calculate what 15% of that population would be.
Finally, tell me the result.
Think hard, try to answer to best of your knowledge
"""
react_reply, _ = client.chat_continue(MODEL, session_id, sequential_query)
print("Sequential Reasoning (ReAct) Response:")

display(Latex(react_reply))
print("\n" + "="*80 + "\n")

# 10) Plan-and-Execute agent with multiple tools

In [None]:
# This triggers Plan-and-Execute agent because it uses "and" for parallel tasks
parallel_query = """
Search for the latest news about artificial intelligence and
calculate the result of (100 * 0.15 + 25) / 2 and
Think about what god is and
What the best smart phone is and
what is 1007*1007/4524753.
"""
plan_reply, _ = client.chat_continue(MODEL,session_id,  parallel_query, agent_type="plan_execute")
print("Plan-and-Execute Response:")

display(Latex(plan_reply))
print("\n" + "="*80 + "\n")

# 11) Auto agent selection - Let the router decide

In [None]:
# The smart router will analyze the query and pick the best agent
auto_query = "If the capital of France has a population of 2.1 million, and we need to allocate 500 euros per person for a project, what's the total budget needed? First search for the actual population, then calculate."
auto_reply, _ = client.chat_continue(MODEL, session_id, auto_query, agent_type="auto")
print("Auto Agent Selection Response:")
print(auto_reply)

# Auto Agent + RAG Documents Demo

This notebook demonstrates how to **upload local documents to RAG** and use the **Auto Agent** to query RAG document collections.

Key concepts:
- Upload your own local documents (PDF, TXT, MD, DOCX, CSV, etc.) to RAG collections
- Browse and inspect RAG collections and their documents
- Manage collections (create, delete, remove specific documents)
- Use the `auto` agent type which intelligently routes queries to the appropriate agent
- When the query involves document retrieval, the auto agent selects the **ReAct agent**, which calls the **RAG tool**

### How the Auto Agent Routes to RAG

```
User Query
  -> Auto Agent (LLM decides: chat / react / plan_execute)
    -> ReAct Agent (if tools are needed)
      -> RAG Tool (if query involves internal documents)
        -> Semantic Search over existing collection
          -> LLM synthesizes answer from retrieved chunks
```

### Prerequisites

1. Both servers must be running (`python tools_server.py` then `python server.py`)
2. At least one RAG collection must already exist with uploaded documents
3. `RAG_DEFAULT_COLLECTION` in `config.py` must match the target collection name

In [None]:
import httpx
import json
from IPython.display import display, Markdown


class LLMApiClient:
    """Unified client for the LLM API server."""

    def __init__(self, base_url: str, timeout: float = 6000.0):
        self.base_url = base_url.rstrip("/")
        self.token = None
        self.timeout = httpx.Timeout(50.0, read=timeout, write=timeout, pool=timeout)

    def _headers(self):
        return {"Authorization": f"Bearer {self.token}"} if self.token else {}

    # ---- Auth ----

    def login(self, username: str, password: str):
        r = httpx.post(
            f"{self.base_url}/api/auth/login",
            json={"username": username, "password": password},
            timeout=10.0,
        )
        r.raise_for_status()
        self.token = r.json()["access_token"]
        return r.json()

    def list_models(self):
        r = httpx.get(f"{self.base_url}/v1/models", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    # ---- Chat (auto agent) ----

    def chat_new(self, model: str, user_message: str, agent_type: str = "auto"):
        """Start a new chat session. Returns (response_text, session_id)."""
        messages = [{"role": "user", "content": user_message}]
        data = {
            "model": model,
            "messages": json.dumps(messages),
            "agent_type": agent_type,
        }
        r = httpx.post(
            f"{self.base_url}/v1/chat/completions",
            data=data,
            headers=self._headers(),
            timeout=self.timeout,
        )
        r.raise_for_status()
        result = r.json()
        return result["choices"][0]["message"]["content"], result["x_session_id"]

    def chat_continue(self, model: str, session_id: str, user_message: str, agent_type: str = "auto"):
        """Continue an existing session. Returns (response_text, session_id)."""
        messages = [{"role": "user", "content": user_message}]
        data = {
            "model": model,
            "messages": json.dumps(messages),
            "session_id": session_id,
            "agent_type": agent_type,
        }
        r = httpx.post(
            f"{self.base_url}/v1/chat/completions",
            data=data,
            headers=self._headers(),
            timeout=self.timeout,
        )
        r.raise_for_status()
        result = r.json()
        return result["choices"][0]["message"]["content"], result["x_session_id"]

    # ---- RAG management (direct tools-server calls) ----

    def rag_list_collections(self, tools_base: str):
        """List all RAG collections for the authenticated user."""
        r = httpx.get(
            f"{tools_base}/api/tools/rag/collections",
            headers=self._headers(),
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()

    def rag_list_documents(self, tools_base: str, collection_name: str):
        """List documents in a RAG collection."""
        r = httpx.get(
            f"{tools_base}/api/tools/rag/collections/{collection_name}/documents",
            headers=self._headers(),
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()

    def rag_query_direct(self, tools_base: str, query: str, collection_name: str, max_results: int = 5):
        """Query RAG directly via the tools server (bypasses the agent)."""
        r = httpx.post(
            f"{tools_base}/api/tools/rag/query",
            headers=self._headers(),
            json={
                "query": query,
                "collection_name": collection_name,
                "max_results": max_results,
            },
            timeout=self.timeout,
        )
        r.raise_for_status()
        return r.json()

    def rag_upload_document(self, tools_base: str, collection_name: str, file_path: str):
        """Upload a local document to a RAG collection."""
        from pathlib import Path
        
        file_path = Path(file_path)
        if not file_path.exists():
            raise FileNotFoundError(f"File not found: {file_path}")
        
        with open(file_path, "rb") as f:
            files = {"file": (file_path.name, f, "application/octet-stream")}
            data = {"collection_name": collection_name}
            
            r = httpx.post(
                f"{tools_base}/api/tools/rag/upload",
                headers=self._headers(),
                files=files,
                data=data,
                timeout=self.timeout,
            )
        
        r.raise_for_status()
        return r.json()

    def rag_create_collection(self, tools_base: str, collection_name: str):
        """Create a new RAG collection."""
        r = httpx.post(
            f"{tools_base}/api/tools/rag/collections",
            headers=self._headers(),
            json={"collection_name": collection_name},
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()

    def rag_delete_collection(self, tools_base: str, collection_name: str):
        """Delete a RAG collection."""
        r = httpx.delete(
            f"{tools_base}/api/tools/rag/collections/{collection_name}",
            headers=self._headers(),
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()

    def rag_delete_document(self, tools_base: str, collection_name: str, document_id: str):
        """Delete a specific document from a collection."""
        r = httpx.delete(
            f"{tools_base}/api/tools/rag/collections/{collection_name}/documents/{document_id}",
            headers=self._headers(),
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()


# -------------------------------------------------------------------
# Configuration  (adjust to your environment)
# -------------------------------------------------------------------
API_BASE_URL   = "http://localhost:10007"   # Main API server
TOOLS_BASE_URL = "http://localhost:10007"   # Tools API server
USERNAME = "admin"
PASSWORD = "administrator"

client = LLMApiClient(API_BASE_URL, timeout=6000.0)
print("\u2713 Client initialized")
print(f"  Main server : {API_BASE_URL}")
print(f"  Tools server: {TOOLS_BASE_URL}")

In [None]:
## Step 1: Authenticate and Discover Model
client.login(USERNAME, PASSWORD)
models = client.list_models()
MODEL = models["data"][0]["id"]

print(f"\u2713 Logged in as: {USERNAME}")
print(f"\u2713 Using model : {MODEL}")
# ## Step 2: Upload Local Documents to RAG (Optional)

# If you want to add your own documents to RAG, use this section. Skip to Step 3 if you already have documents uploaded.

# ### Supported File Formats

# - **Text**: `.txt`, `.md`
# - **Documents**: `.pdf`, `.docx`
# - **Data**: `.json`, `.csv`, `.xlsx`, `.xls`
# # Create a new RAG collection and upload documents
from pathlib import Path

# Step 1: Create collection
collection_name = "default"  # Change this to your desired collection name

try:
    print(f"Creating collection '{collection_name}'...")
    result = client.rag_create_collection(TOOLS_BASE_URL, collection_name)
    
    if result.get("success"):
        print(f"‚úì Collection created successfully!")
        print(f"  Collection name: {collection_name}\n")
    else:
        print(f"‚úó Failed to create collection: {result.get('error')}\n")
except Exception as e:
    print(f"‚úó Error creating collection: {e}\n")

# Step 2: Upload your PDF files
# Replace with your actual PDF file paths
custom_pdf_files = [
    "./USB 3.2 Revision 1.0.pdf",
    "./usb_20.pdf"
]

print(f"Uploading {len(custom_pdf_files)} documents...\n")

for pdf_file in custom_pdf_files:
    # Check if file exists
    if not Path(pdf_file).exists():
        print(f"‚ö†Ô∏è  File not found: {pdf_file}\n")
        continue
    
    print(f"üì§ Uploading: {pdf_file}")
    
    try:
        result = client.rag_upload_document(TOOLS_BASE_URL, collection_name, pdf_file)
        
        if result.get('success'):
            print(f"  ‚úì Success! Chunks created: {result.get('chunks_created')}")
            print(f"  Total chunks in collection: {result.get('total_chunks')}\n")
        else:
            print(f"  ‚úó Failed: {result.get('error')}\n")
    except Exception as e:
        print(f"  ‚úó Error: {str(e)}\n")

## Step 3: Browse Existing RAG Collections

Let's see which collections and documents are available after any uploads.

In [None]:
collections_result = client.rag_list_collections(TOOLS_BASE_URL)

if collections_result.get("success"):
    collections = collections_result["collections"]
    print(f"Found {len(collections)} collection(s):\n")
    for coll in collections:
        print(f"  Collection : {coll['name']}")
        print(f"  Documents  : {coll['documents']}")
        print(f"  Chunks     : {coll['chunks']}")
        print(f"  Created    : {coll['created_at']}")
        print()
else:
    print("ERROR: Could not list collections.")
    print(collections_result)

In [None]:
# Initialize comparison utilities
from IPython.display import HTML, Markdown
import time

def compare_responses(query, collection_name, model, session_id=None, use_markdown=False):
    """Execute query on both RAG and Auto Agent, display side-by-side comparison"""
    
    print(f"Query: {query}\n")
    print("=" * 140)
    
    # --- Direct RAG Query ---
    rag_start = time.time()
    direct_result = client.rag_query_direct(
        TOOLS_BASE_URL,
        query=query,
        collection_name=collection_name,
        max_results=5,
    )
    rag_time = time.time() - rag_start
    
    # --- Auto Agent Query ---
    agent_start = time.time()
    if session_id is None:
        agent_response, new_session_id = client.chat_new(
            model=model,
            user_message=query,
            agent_type="auto",
        )
    else:
        agent_response, new_session_id = client.chat_continue(
            model=model,
            session_id=session_id,
            user_message=query,
            agent_type="auto",
        )
    agent_time = time.time() - agent_start
    
    # --- Format RAG Result ---
    if direct_result.get("success"):
        rag_answer = direct_result['answer']
        data = direct_result.get("data", {})
        rag_sources = []
        for i, doc in enumerate(data.get("documents", []), 1):
            rag_sources.append(f"  [{i}] {doc['document']} chunk {doc['chunk_index']} (score {doc.get('score', 0):.3f})")
    else:
        rag_answer = f"ERROR: {direct_result.get('error')}"
        rag_sources = []
    
    # --- Display based on format preference ---
    if use_markdown:
        # Markdown format
        markdown_output = f"""
## üîç Direct RAG Query

**Answer:**

{rag_answer}

**Metadata:**
- Results: {data.get('num_results', 0) if direct_result.get("success") else 0} chunks retrieved
- Exec time: {rag_time:.2f}s

**Sources:**
{chr(10).join(rag_sources) if rag_sources else "N/A"}

---

## ü§ñ Auto Agent Query

**Answer:**

{agent_response}

**Metadata:**
- Exec time: {agent_time:.2f}s
- Session ID: {new_session_id}
"""
        display(Markdown(markdown_output))
    else:
        # HTML format
        rag_metadata = f"""
        <div style='font-size: 0.9em; color: #666; margin-top: 10px;'>
        ‚úì Results: {data.get('num_results', 0) if direct_result.get("success") else 0} chunks retrieved<br>
        ‚úì Exec time: {rag_time:.2f}s<br>
        <strong>Sources:</strong><br>
        """
        for src in rag_sources:
            rag_metadata += f"{src}<br>"
        rag_metadata += "</div>"
        
        agent_metadata = f"""
        <div style='font-size: 0.9em; color: #666; margin-top: 10px;'>
        ‚úì Exec time: {agent_time:.2f}s<br>
        ‚úì Session ID: {new_session_id}
        </div>
        """
        
        html_output = f"""
        <div style='display: flex; gap: 20px; margin-top: 20px;'>
            <div style='flex: 1; border: 2px solid #4CAF50; border-radius: 8px; padding: 15px; background-color: #f9f9f9;'>
                <h3 style='color: #4CAF50; margin-top: 0;'>üîç Direct RAG Query</h3>
                <div style='background: white; padding: 10px; border-radius: 5px; margin-bottom: 10px;'>
                    {rag_answer.replace(chr(10), '<br>')}
                </div>
                {rag_metadata}
            </div>
            <div style='flex: 1; border: 2px solid #2196F3; border-radius: 8px; padding: 15px; background-color: #f9f9f9;'>
                <h3 style='color: #2196F3; margin-top: 0;'>ü§ñ Auto Agent Query</h3>
                <div style='background: white; padding: 10px; border-radius: 5px; margin-bottom: 10px;'>
                    {agent_response.replace(chr(10), '<br>')}
                </div>
                {agent_metadata}
            </div>
        </div>
        """
        display(HTML(html_output))
    
    print("=" * 140 + "\n")
    
    return new_session_id

# Initialize session for first query
print("‚úì Comparison utility loaded")

## Question 4: USB3.2 LTSSM and RX.Detect

In [None]:
query_4 = "USB3.2Ïùò LTSSMÏóê ÎåÄÌï¥ÏÑú ÏûêÏÑ∏Ìûà ÏÑ§Î™ÖÌï¥Ï£ºÍ≥†, ÌäπÌûà RX.DetectÏóê ÎåÄÌï¥ÏÑú ÏûêÏÑ∏Ìûà ÏÑ§Î™ÖÌï¥Ï§ò"
session_id = compare_responses(query_4, COLLECTION_NAME, MODEL, use_markdown=True)

# AI-Orchestrated Report Generator

## 1. Setup & Configuration

In [None]:
import httpx
import json
from pathlib import Path
from datetime import datetime
import time
from IPython.display import display, Latex

# Build Universal LLM API Client
class LLMApiClient:
    def __init__(self, base_url: str, timeout: float = 360000.0):
        self.base_url = base_url.rstrip("/")
        self.token = None
        self.timeout = httpx.Timeout(50.0, read=timeout, write=timeout, pool=timeout)

    def _headers(self):
        return {"Authorization": f"Bearer {self.token}"} if self.token else {}

    def login(self, username: str, password: str):
        r = httpx.post(f"{self.base_url}/api/auth/login", 
                      json={"username": username, "password": password}, timeout=10.0)
        r.raise_for_status()
        self.token = r.json()["access_token"]
        return r.json()

    def list_models(self):
        r = httpx.get(f"{self.base_url}/v1/models", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    def chat_new(self, model: str, user_message: str, agent_type: str = "auto", files: list = None):
        messages = [{"role": "user", "content": user_message}]
        data = {"model": model, "messages": json.dumps(messages), "agent_type": agent_type}
        
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            r = httpx.post(f"{self.base_url}/v1/chat/completions", data=data,
                          files=files_to_upload if files_to_upload else None,
                          headers=self._headers(), timeout=self.timeout)
            r.raise_for_status()
            result = r.json()
            return result["choices"][0]["message"]["content"], result["x_session_id"]
        finally:
            for _, (_, f) in files_to_upload:
                f.close()

    def chat_continue(self, model: str, session_id: str, user_message: str, 
                     agent_type: str = "auto", files: list = None):
        messages = [{"role": "user", "content": user_message}]
        data = {"model": model, "messages": json.dumps(messages), 
                "session_id": session_id, "agent_type": agent_type}
        
        files_to_upload = []
        if files:
            for file_path in files:
                f = open(file_path, "rb")
                files_to_upload.append(("files", (Path(file_path).name, f)))
        
        try:
            r = httpx.post(f"{self.base_url}/v1/chat/completions", data=data,
                          files=files_to_upload if files_to_upload else None,
                          headers=self._headers(), timeout=self.timeout)
            r.raise_for_status()
            result = r.json()
            return result["choices"][0]["message"]["content"], result["x_session_id"]
        finally:
            for _, (_, f) in files_to_upload:
                f.close()

    def get_session_artifacts(self, session_id: str):
        """Get list of files generated during the session"""
        r = httpx.get(f"{self.base_url}/api/chat/sessions/{session_id}/artifacts",
                     headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    def download_artifact(self, session_id: str, filename: str, save_to: str = None):
        """
        Download a generated artifact file to local disk.
        
        Args:
            session_id: The session ID that generated the file
            filename: Name of the file to download (can include subdirectory, e.g., 'temp_charts/chart.png')
            save_to: Local path to save the file (default: current directory with original filename)
        
        Returns:
            str: Path to the downloaded file
        
        Example:
            client.download_artifact(session_id, "Warpage_Report_20250126.pptx", "./downloads/report.pptx")
        """
        r = httpx.get(
            f"{self.base_url}/api/chat/sessions/{session_id}/artifacts/{filename}",
            headers=self._headers(),
            timeout=60.0
        )
        r.raise_for_status()
        
        # Determine local save path
        if save_to is None:
            save_to = Path(filename).name  # Use just the filename, not subdirectory
        
        # Create parent directories if needed
        save_path = Path(save_to)
        save_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Write file content
        with open(save_path, "wb") as f:
            f.write(r.content)
        
        return str(save_path)

# Configuration
API_BASE_URL = 'http://10.198.112.203:10007'
USERNAME = "ppt"
PASSWORD = "ppt"

# Initialize and login
client = LLMApiClient(API_BASE_URL, timeout=36000.0)# 10 hours
client.login(USERNAME, PASSWORD)
models = client.list_models()
MODEL = models["data"][0]["id"]

print(f"‚úì Logged in as: {USERNAME}")
print(f"‚úì Using model: {MODEL}")

## 2. Configure Data Files

In [None]:
# Define your data files
stats_paths = [
    Path("B8_1021_stats.json"),
    Path("B8_1027_stats.json"),
]

# Verify files exist
print(f"Configured {len(stats_paths)} data file(s):\n")
for i, path in enumerate(stats_paths, 1):
    if path.exists():
        size_kb = path.stat().st_size / 1024
        print(f"  [{i}] {path.name} ({size_kb:.1f} KB) - ‚úì")
    else:
        print(f"  [{i}] {path.name} - ‚úó NOT FOUND")

file_paths_str = [str(p) for p in stats_paths]

## 3. Phase 1: Data Analysis

The AI will analyze your data and identify key patterns.

In [None]:
analysis_prompt = f"""
Analyze {len(stats_paths)} warpage measurement JSON files attached.

Input Data Structure:
- Each file contain warpage statistics per PCB board
- Statistics: min, max, range (warpage value), mean, median, std, skewness, kurtosis
- PCA values (pc1, pc2) calculated within each source_pdf
- Filenames contain acquisition date/time (e.g., 1021 = October 21th)
- Note that usually, mean, median is not important. To assess warpage, range is the single most important feature.

Tasks:
1. Calculate overall statistics (mean, std, min, max of range across all files)
2. Identify PCA-based outliers using pc1, pc2 values. Look for PCA values that are quite a far from others
3. Compare production dates - which is better quality and why?
4. List specific outlier filenames with reasons
5. Save your results to a numpy array locally

**Required Output:**
- Total measurements count
- Outlier list with full filenames
- Production date comparison (winner + reason)
- Key concerns or patterns

Think HARD!
"""
print("=" * 80)
print("PHASE 1: DATA ANALYSIS")
print("=" * 80)

start = time.time()
analysis_result, session_id = client.chat_new(
    MODEL, analysis_prompt, agent_type="auto", files=file_paths_str
)

print(f"\n‚úì Analysis completed in {time.time() - start:.1f}s\n")
print("=" * 80)
display(Latex(analysis_result))
print("=" * 80)

## 4. Phase 2: Generate Visualizations

**Key:** AI reuses Phase 1 findings from conversation memory (not raw files).

In [None]:
viz_prompt = f"""
**PRIORITY: Use your Phase 1 analysis from conversation memory and saved numpy array.**

In Phase 1, you already:
- Analyzed {len(stats_paths)} datasets and loaded all data
- Identified PCA outliers with pc1, pc2 values
- Compared production dates
- Listed specific outlier filenames

**Avoid re-analyze raw files if possible. Use your Phase 1 findings and file.**
Files attached are ONLY for verification if needed.

**Task:** Create visualizations and classify outliers

**Outlier Classification:**
- **BAD outliers:** High mean/std/range (critical quality issues)
- **GOOD outliers:** Unusual PCA position but acceptable metrics
- **Normal:** Within PCA cluster, standard metrics

**Required Charts** (save to temp_charts/):
1. `pca_outliers_classified.png` - PC1 vs PC2 scatter (Blue=normal, Orange=good outlier, RED=bad outlier)
2. `bad_outliers_detail.png` - Bar chart comparing bad outliers vs average
3. `production_comparison.png` - Production date quality comparison
4. Additional charts as appropriate (distributions, trends, control charts, etc.)

**Style:** 300 DPI, seaborn whitegrid, professional colors

**Required Output:**
- List of generated chart files
- Bad outlier summary (file IDs + reasons)
- Production date insights

THINK HARD!
"""

print("=" * 80)
print("PHASE 2: VISUALIZATION GENERATION")
print("=" * 80)

start = time.time()
viz_result, _ = client.chat_continue(
    MODEL, session_id, viz_prompt, agent_type="auto", files=file_paths_str
)

print(f"\n‚úì Visualizations completed in {time.time() - start:.1f}s\n")
print("=" * 80)
display(Latex(viz_result))
print("=" * 80)

## 5. Phase 3: PDF Report Assembly

**Key:** AI uses Phase 1 & 2 findings from conversation memory to create a beautiful, comprehensive PDF.

In [None]:
# Get total file count
total_files = 0
for path in stats_paths:
    with open(path, 'r') as f:
        data = json.load(f)
        total_files += len(data.get('files', []))

pdf_prompt = f"""
**Task:** Generate a professional PDF report using ReportLab with the warpage analysis findings.

**Key Requirements:**
- Use A4 portrait orientation with 2.5cm margins
- Include: cover page, table of contents, executive summary, PCA scatter plot, bad outliers detail chart, production comparison chart, any additional charts from temp_charts/
- Consistent styling: blue headings (#1f4788), justified body text
- One section per page with PageBreak() between sections
- Center all images, maintaining aspect ratio (max 12cm height)
- Add header/footer on all pages except cover (page numbers)

**Structure:**
1. Cover page with title and total measurements
2. Table of contents
3. Executive summary with bullet points
4. PCA outlier classification (with image)
5. Bad outlier details (with image)
6. Production comparison (with image)
7. Additional charts from temp_charts/ directory
8. Recommendations page

**Output:** PDF file named `Warpage_Report_{{timestamp}}.pdf`

Use the attached files for visualization and context.
"""

print("=" * 80)
print("PHASE 3: PDF REPORT ASSEMBLY")
print("=" * 80)

start = time.time()
pdf_result, _ = client.chat_continue(
    MODEL, session_id, pdf_prompt, agent_type="auto", files=file_paths_str
)

print(f"\n‚úì PDF report completed in {time.time() - start:.1f}s\n")
print("=" * 80)
display(Latex(pdf_result))
print("=" * 80)

# Hyperparameter Tuning

In [None]:
import os

# ML CODE DIRECTORY (expanduser handles ~ expansion)
mother_dir = os.path.expanduser('~/scratch1/MeshGraphNets')
# docs directory
docs_path = os.path.join(mother_dir, 'CONFIG_AND_EXECUTION_GUIDE.md')


prompt = f"""
Write a python code that builds various configs for MeshGraphNets
The docs are located at {docs_path}, So first read the docs and then write the code.
First, think of various hyperparameters to tune.
Then, make a set of those hyperparameters.
Using the python code, make various config files using those hyperparameters.
Be aware that GPU time is limited, so don't make too many configs, choose your hyperparameters wisely.

When you are done, create a python script that copies the created config files to the {mother_dir} directory.
Change the names of the config files to the right ones and run the ML code.
"""

# Send with the docs file attached
response, session_id = client.chat_new(
    model=MODEL,
    user_message=prompt
)

print(response)

In [None]:
import os

# ML CODE DIRECTORY (expanduser handles ~ expansion)
mother_dir = os.path.expanduser('~/scratch1/MeshGraphNets')
# docs directory
docs_path = os.path.join(mother_dir, 'CONFIG_AND_EXECUTION_GUIDE.md')

# Now make the LLM API read the docs and run the code
prompt = f"""
You are executing a long-running hyperparameter tuning experiment for ML models.

## Your Task
1. Read the attached documentation file carefully: use python coder.
2. Understand the configuration options and execution workflow
3. Plan out the hyperparameter tuning experiment
4. Build an example config file 
5. Execute the example config file with ML code.

## Important Requirements
- **Working Directory**: All code should run from `{mother_dir}`
- **Logging**: Use the log file implemented in the code and with timestamps, distinguish filenames with config
- **Error Handling**: If a single hyperparameter combination fails, log the error and continue with the next combination
- **Results**: Save final results to a CSV/JSON file with all hyperparameter combinations and their metrics

## Execution Guidelines
- Use `cd {mother_dir}` at the start of your Python code
- Do NOT stop until all hyperparameter combinations are tested
- At the end, provide a summary of the best hyperparameters found

Begin by reading the documentation, then execute the training.
Be sure to actually execute the code, not just plan.
"""

# Send with the docs file attached
response, session_id = client.chat_new(
    model=MODEL,
    user_message=prompt,
    files=[docs_path]
)

# Save session for recovery (important for multi-day runs)
with open("active_session.txt", "w") as f:
    f.write(session_id)
print(f"Session ID saved: {session_id}")
print(response)