# Auto Agent + RAG Documents Demo

This notebook demonstrates how to **upload local documents to RAG** and use the **Auto Agent** to query RAG document collections.

Key concepts:
- Upload your own local documents (PDF, TXT, MD, DOCX, CSV, etc.) to RAG collections
- Browse and inspect RAG collections and their documents
- Manage collections (create, delete, remove specific documents)
- Use the `auto` agent type which intelligently routes queries to the appropriate agent
- When the query involves document retrieval, the auto agent selects the **ReAct agent**, which calls the **RAG tool**

### How the Auto Agent Routes to RAG

```
User Query
  -> Auto Agent (LLM decides: chat / react / plan_execute)
    -> ReAct Agent (if tools are needed)
      -> RAG Tool (if query involves internal documents)
        -> Semantic Search over existing collection
          -> LLM synthesizes answer from retrieved chunks
```

### Prerequisites

1. Both servers must be running (`python tools_server.py` then `python server.py`)
2. At least one RAG collection must already exist with uploaded documents
3. `RAG_DEFAULT_COLLECTION` in `config.py` must match the target collection name

## Setup and Configuration

In [None]:
import httpx
import json
from IPython.display import display, Markdown


class LLMApiClient:
    """Unified client for the LLM API server."""

    def __init__(self, base_url: str, timeout: float = 600.0):
        self.base_url = base_url.rstrip("/")
        self.token = None
        self.timeout = httpx.Timeout(50.0, read=timeout, write=timeout, pool=timeout)

    def _headers(self):
        return {"Authorization": f"Bearer {self.token}"} if self.token else {}

    # ---- Auth ----

    def login(self, username: str, password: str):
        r = httpx.post(
            f"{self.base_url}/api/auth/login",
            json={"username": username, "password": password},
            timeout=10.0,
        )
        r.raise_for_status()
        self.token = r.json()["access_token"]
        return r.json()

    def list_models(self):
        r = httpx.get(f"{self.base_url}/v1/models", headers=self._headers(), timeout=10.0)
        r.raise_for_status()
        return r.json()

    # ---- Chat (auto agent) ----

    def chat_new(self, model: str, user_message: str, agent_type: str = "auto"):
        """Start a new chat session. Returns (response_text, session_id)."""
        messages = [{"role": "user", "content": user_message}]
        data = {
            "model": model,
            "messages": json.dumps(messages),
            "agent_type": agent_type,
        }
        r = httpx.post(
            f"{self.base_url}/v1/chat/completions",
            data=data,
            headers=self._headers(),
            timeout=self.timeout,
        )
        r.raise_for_status()
        result = r.json()
        return result["choices"][0]["message"]["content"], result["x_session_id"]

    def chat_continue(self, model: str, session_id: str, user_message: str, agent_type: str = "auto"):
        """Continue an existing session. Returns (response_text, session_id)."""
        messages = [{"role": "user", "content": user_message}]
        data = {
            "model": model,
            "messages": json.dumps(messages),
            "session_id": session_id,
            "agent_type": agent_type,
        }
        r = httpx.post(
            f"{self.base_url}/v1/chat/completions",
            data=data,
            headers=self._headers(),
            timeout=self.timeout,
        )
        r.raise_for_status()
        result = r.json()
        return result["choices"][0]["message"]["content"], result["x_session_id"]

    # ---- RAG management (direct tools-server calls) ----

    def rag_list_collections(self, tools_base: str):
        """List all RAG collections for the authenticated user."""
        r = httpx.get(
            f"{tools_base}/api/tools/rag/collections",
            headers=self._headers(),
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()

    def rag_list_documents(self, tools_base: str, collection_name: str):
        """List documents in a RAG collection."""
        r = httpx.get(
            f"{tools_base}/api/tools/rag/collections/{collection_name}/documents",
            headers=self._headers(),
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()

    def rag_query_direct(self, tools_base: str, query: str, collection_name: str, max_results: int = 5):
        """Query RAG directly via the tools server (bypasses the agent)."""
        r = httpx.post(
            f"{tools_base}/api/tools/rag/query",
            headers=self._headers(),
            json={
                "query": query,
                "collection_name": collection_name,
                "max_results": max_results,
            },
            timeout=self.timeout,
        )
        r.raise_for_status()
        return r.json()

    def rag_upload_document(self, tools_base: str, collection_name: str, file_path: str):
        """Upload a local document to a RAG collection."""
        from pathlib import Path
        
        file_path = Path(file_path)
        if not file_path.exists():
            raise FileNotFoundError(f"File not found: {file_path}")
        
        with open(file_path, "rb") as f:
            files = {"file": (file_path.name, f, "application/octet-stream")}
            data = {"collection_name": collection_name}
            
            r = httpx.post(
                f"{tools_base}/api/tools/rag/upload",
                headers=self._headers(),
                files=files,
                data=data,
                timeout=self.timeout,
            )
        
        r.raise_for_status()
        return r.json()

    def rag_create_collection(self, tools_base: str, collection_name: str):
        """Create a new RAG collection."""
        r = httpx.post(
            f"{tools_base}/api/tools/rag/collections",
            headers=self._headers(),
            json={"collection_name": collection_name},
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()

    def rag_delete_collection(self, tools_base: str, collection_name: str):
        """Delete a RAG collection."""
        r = httpx.delete(
            f"{tools_base}/api/tools/rag/collections/{collection_name}",
            headers=self._headers(),
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()

    def rag_delete_document(self, tools_base: str, collection_name: str, document_id: str):
        """Delete a specific document from a collection."""
        r = httpx.delete(
            f"{tools_base}/api/tools/rag/collections/{collection_name}/documents/{document_id}",
            headers=self._headers(),
            timeout=10.0,
        )
        r.raise_for_status()
        return r.json()


# -------------------------------------------------------------------
# Configuration  (adjust to your environment)
# -------------------------------------------------------------------
API_BASE_URL   = "http://localhost:10007"   # Main API server
TOOLS_BASE_URL = "http://localhost:10006"   # Tools API server
USERNAME = "admin"
PASSWORD = "administrator"

client = LLMApiClient(API_BASE_URL, timeout=600.0)
print("\u2713 Client initialized")
print(f"  Main server : {API_BASE_URL}")
print(f"  Tools server: {TOOLS_BASE_URL}")

## Step 1: Authenticate and Discover Model

In [None]:
client.login(USERNAME, PASSWORD)
models = client.list_models()
MODEL = models["data"][0]["id"]

print(f"\u2713 Logged in as: {USERNAME}")
print(f"\u2713 Using model : {MODEL}")

## Step 2: Upload Local Documents to RAG (Optional)

If you want to add your own documents to RAG, use this section. Skip to Step 3 if you already have documents uploaded.

### Supported File Formats

- **Text**: `.txt`, `.md`
- **Documents**: `.pdf`, `.docx`
- **Data**: `.json`, `.csv`, `.xlsx`, `.xls`

In [None]:
# # Create a new RAG collection and upload documents
# from pathlib import Path

# # Step 1: Create collection
# collection_name = "USB"  # Change this to your desired collection name

# try:
#     print(f"Creating collection '{collection_name}'...")
#     result = client.rag_create_collection(TOOLS_BASE_URL, collection_name)
    
#     if result.get("success"):
#         print(f"‚úì Collection created successfully!")
#         print(f"  Collection name: {collection_name}\n")
#     else:
#         print(f"‚úó Failed to create collection: {result.get('error')}\n")
# except Exception as e:
#     print(f"‚úó Error creating collection: {e}\n")

# # Step 2: Upload your PDF files
# # Replace with your actual PDF file paths
# custom_pdf_files = [
#     "./USB 3.2 Revision 1.1.pdf",
#     "./usb_20.pdf"
# ]

# print(f"Uploading {len(custom_pdf_files)} documents...\n")

# for pdf_file in custom_pdf_files:
#     # Check if file exists
#     if not Path(pdf_file).exists():
#         print(f"‚ö†Ô∏è  File not found: {pdf_file}\n")
#         continue
    
#     print(f"üì§ Uploading: {pdf_file}")
    
#     try:
#         result = client.rag_upload_document(TOOLS_BASE_URL, collection_name, pdf_file)
        
#         if result.get('success'):
#             print(f"  ‚úì Success! Chunks created: {result.get('chunks_created')}")
#             print(f"  Total chunks in collection: {result.get('total_chunks')}\n")
#         else:
#             print(f"  ‚úó Failed: {result.get('error')}\n")
#     except Exception as e:
#         print(f"  ‚úó Error: {str(e)}\n")

## Step 3: Browse Existing RAG Collections

Let's see which collections and documents are available after any uploads.

In [None]:
collections_result = client.rag_list_collections(TOOLS_BASE_URL)

if collections_result.get("success"):
    collections = collections_result["collections"]
    print(f"Found {len(collections)} collection(s):\n")
    for coll in collections:
        print(f"  Collection : {coll['name']}")
        print(f"  Documents  : {coll['documents']}")
        print(f"  Chunks     : {coll['chunks']}")
        print(f"  Created    : {coll['created_at']}")
        print()
else:
    print("ERROR: Could not list collections.")
    print(collections_result)

## Step 4: Inspect Documents in a Collection

Pick the collection you want the auto agent to query.  
Set `COLLECTION_NAME` below to match one of the collections listed above.

In [None]:
# === Set this to the collection you want to use ===
COLLECTION_NAME = "USB"

docs_result = client.rag_list_documents(TOOLS_BASE_URL, COLLECTION_NAME)

if docs_result.get("success"):
    print(f"Collection   : {docs_result['collection_name']}")
    print(f"Total docs   : {docs_result['total_documents']}")
    print(f"Total chunks : {docs_result['total_chunks']}")
    print(f"\nDocuments:")
    for doc in docs_result["documents"]:
        print(f"  - {doc['name']}  ({doc['chunks']} chunks, uploaded {doc['uploaded_at']})")
else:
    print(f"ERROR: Could not list documents in '{COLLECTION_NAME}'.")
    print(docs_result)

## Step 5: Verify RAG_DEFAULT_COLLECTION

The **Auto Agent -> ReAct -> RAG tool** pipeline uses `RAG_DEFAULT_COLLECTION` from `config.py` to decide which collection to search.  
Make sure the value in `config.py` matches the collection you want to query **before starting the servers**.

```python
# In config.py:
RAG_DEFAULT_COLLECTION = "default"  # <-- must match your target collection
```

If you need to change it, edit `config.py` and restart both servers.

In [None]:
# Quick sanity check: read the current config value
import importlib, sys

# Add project root so we can import config
sys.path.insert(0, "..")
import config as cfg
importlib.reload(cfg)

print(f"RAG_DEFAULT_COLLECTION = \"{cfg.RAG_DEFAULT_COLLECTION}\"")
print(f"Target collection      = \"{COLLECTION_NAME}\"")

if cfg.RAG_DEFAULT_COLLECTION == COLLECTION_NAME:
    print("\n\u2713 Config matches. The auto agent will query the correct collection.")
else:
    print(f"\n\u2717 MISMATCH! Edit config.py to set RAG_DEFAULT_COLLECTION = \"{COLLECTION_NAME}\"")
    print("  Then restart both servers.")

## Step 6: Query with the Auto Agent

Now send a question through the **auto agent**.  
The auto agent will:
1. Analyze the query and decide it needs document retrieval
2. Route to the **ReAct agent**
3. The ReAct agent calls the **RAG tool** to search the collection
4. Return a synthesized answer based on retrieved document chunks

> **Tip**: Phrasing the query to mention "documents" or "internal docs" helps the ReAct agent select the RAG tool over web search.

In [None]:
query = "Search the internal documents and tell me what they contain."

print(f"Query: {query}")
print(f"Agent: auto")
print("Waiting for response...\n")

response, session_id = client.chat_new(
    model=MODEL,
    user_message=query,
    agent_type="auto",
)

print(f"Session ID: {session_id}\n")

display(Markdown(response))

In [None]:
query = "C-PHYÍ∞Ä 3.9GspsÎ°ú ÎèôÏûëÌï† Îïå Insertion Loss Ïä§ÌéôÏùÑ ÏïåÎ†§Ï§ò"

print(f"Query: {query}")
print(f"Agent: auto")
print("Waiting for response...\n")

response, session_id = client.chat_new(
    model=MODEL,
    user_message=query,
    agent_type="auto",
)

print(f"Session ID: {session_id}\n")
display(Markdown(response))

## Step 7: Follow-Up Query (Same Session)

Continue the conversation in the same session.  
The agent retains previous context so follow-up questions work naturally.

In [None]:
followup = "Based on the documents, give me a more detailed summary of the key technical specifications."

print(f"Follow-up: {followup}")
print("Waiting for response...\n")

response2, _ = client.chat_continue(
    model=MODEL,
    session_id=session_id,
    user_message=followup,
    agent_type="auto",
)

display(Markdown(response2))

## Step 8: Compare ‚Äî Direct RAG Query vs Auto Agent

To illustrate the difference, here we call the RAG tool **directly** (bypassing the agent) and compare it with the auto agent response for the same question.

In [None]:
comparison_query = "C-PHYÍ∞Ä 3.9GspsÎ°ú ÎèôÏûëÌï† Îïå Insertion Loss Ïä§ÌéôÏùÑ ÏïåÎ†§Ï§ò"

# --- Direct RAG query (tools server) ---
print("=" * 70)
print("[A] Direct RAG Query (via tools server)")
print("=" * 70)

direct_result = client.rag_query_direct(
    TOOLS_BASE_URL,
    query=comparison_query,
    collection_name=COLLECTION_NAME,
    max_results=5,
)

if direct_result.get("success"):
    display(Markdown(f"**Answer:**\n\n{direct_result['answer']}"))
    data = direct_result.get("data", {})
    print(f"\nResults: {data.get('num_results', 0)} chunks retrieved")
    print(f"Exec time: {direct_result.get('metadata', {}).get('execution_time', 0):.2f}s")
    for i, doc in enumerate(data.get("documents", []), 1):
        print(f"  [{i}] {doc['document']} chunk {doc['chunk_index']} (score {doc.get('score', 0):.3f})")
else:
    print(f"ERROR: {direct_result.get('error')}")

In [None]:
# --- Auto Agent query (main server) ---
print("=" * 70)
print("[B] Auto Agent Query (via main server)")
print("=" * 70)

agent_response, agent_sid = client.chat_new(
    model=MODEL,
    user_message=f"Search internal documents: {comparison_query}",
    agent_type="auto",
)

display(Markdown(f"**Answer:**\n\n{agent_response}"))
print(f"\nSession: {agent_sid}")

### Observations

| Aspect | Direct RAG | Auto Agent |
|--------|-----------|------------|
| **How it works** | Calls the RAG tool endpoint directly | LLM decides which tool to use |
| **Collection** | You specify the collection name explicitly | Uses `RAG_DEFAULT_COLLECTION` from config |
| **Extra reasoning** | None ‚Äî raw retrieval + synthesis | ReAct agent adds reasoning steps around the RAG call |
| **Session memory** | Stateless (each call is independent) | Maintains conversation history |
| **Best for** | Programmatic retrieval, batch queries | Interactive Q&A, multi-turn conversations |

## Step 8: Custom Query ‚Äî Try Your Own

Edit the query below and run to ask anything about your existing documents.

In [None]:
my_query = "Search the internal documents and explain ..."  # <-- edit this

response, sid = client.chat_new(
    model=MODEL,
    user_message=my_query,
    agent_type="auto",
)

print(f"Session: {sid}\n")
display(Markdown(response))

## Summary

This notebook demonstrated:

1. **Uploading local documents to RAG** ‚Äî Add your own documents (PDF, TXT, MD, DOCX, CSV, etc.) to RAG collections
2. **Browsing existing RAG collections** ‚Äî List collections and inspect their documents
3. **Config verification** ‚Äî Ensure `RAG_DEFAULT_COLLECTION` points to the right collection
4. **Auto Agent routing** ‚Äî The `auto` agent analyzes the query and routes to `react` when document retrieval is needed
5. **Multi-turn conversation** ‚Äî Follow-up questions within the same session retain context
6. **Direct vs Agent comparison** ‚Äî Direct RAG calls are stateless and explicit; auto agent adds reasoning and session memory
7. **Collection management** ‚Äî Create/delete collections and remove specific documents

### Agent Routing Flow

```
auto agent
  ‚îú‚îÄ‚îÄ "chat"          ‚Üí Simple Q&A (no tools)
  ‚îú‚îÄ‚îÄ "react"         ‚Üí Single tool call (websearch / python_coder / rag)
  ‚îî‚îÄ‚îÄ "plan_execute"  ‚Üí Multi-step tasks with planning
```

### Tips for Reliable RAG Routing

- Include phrases like *"search internal documents"*, *"from the documents"*, or *"based on uploaded files"* in your query
- The ReAct system prompt instructs: *"Use rag only when explicitly asked to search internal documents"*
- If the auto agent routes to `chat` instead of `react`, rephrase to make the document-search intent explicit

### API Endpoints Used

| Endpoint | Purpose |
|----------|--------|
| `POST /api/auth/login` | Authentication |
| `GET /v1/models` | Discover available models |
| `POST /api/tools/rag/upload` | Upload document to RAG collection |
| `POST /api/tools/rag/collections` | Create new collection |
| `GET /api/tools/rag/collections` | List existing collections |
| `GET /api/tools/rag/collections/{name}/documents` | List documents in a collection |
| `DELETE /api/tools/rag/collections/{name}/documents/{id}` | Delete specific document |
| `DELETE /api/tools/rag/collections/{name}` | Delete entire collection |
| `POST /api/tools/rag/query` | Direct RAG query (tools server) |
| `POST /v1/chat/completions` | Chat via auto agent (main server) |

## Step 10: Collection Management (Optional)

You can also create new collections, delete collections, or delete specific documents.

In [None]:
# Create a new collection
# Uncomment to create

# new_collection_name = "my_new_collection"
# try:
#     result = client.rag_create_collection(TOOLS_BASE_URL, new_collection_name)
#     if result.get("success"):
#         print(f"‚úì Collection '{new_collection_name}' created successfully")
#     else:
#         print(f"‚úó Failed: {result.get('error')}")
# except Exception as e:
#     print(f"‚úó Error: {e}")

In [None]:
# Delete a document from a collection
# First, list documents to get the document ID, then uncomment to delete

# collection_name = "default"
# document_id = "abc123..."  # Get this from list_documents result

# try:
#     result = client.rag_delete_document(TOOLS_BASE_URL, collection_name, document_id)
#     if result.get("success"):
#         print(f"‚úì Document deleted successfully")
#         print(f"  Deleted: {result.get('deleted_document')}")
#         print(f"  Remaining documents: {result.get('remaining_documents')}")
#     else:
#         print(f"‚úó Failed: {result.get('error')}")
# except Exception as e:
#     print(f"‚úó Error: {e}")

In [None]:
# Delete an entire collection (use with caution!)
# Uncomment to delete

# collection_to_delete = "test_collection"
# try:
#     result = client.rag_delete_collection(TOOLS_BASE_URL, collection_to_delete)
#     if result.get("success"):
#         print(f"‚úì Collection '{collection_to_delete}' deleted successfully")
#     else:
#         print(f"‚úó Failed: {result.get('error')}")
# except Exception as e:
#     print(f"‚úó Error: {e}")