In [None]:
# ========== COMPLETE TEST NOTEBOOK CELL ==========
import os
import sys
import json
import asyncio
from dotenv import load_dotenv

# Add project root to sys.path
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

load_dotenv()

from langchain_core.documents import Document
from langchain_chroma import Chroma
from ingestion.ingestion import HFEmbeddings
from agents.supervisor_agent import get_or_create_supervisor, run_supervisor_pipeline

print("üöÄ Starting comprehensive DeepAgents test...")

# 1. SETUP ENVIRONMENT
print("\nüì¶ 1. Loading Vector Store...")
CHROMA_DB_DIR = os.path.join(PROJECT_ROOT, "data", "chromadb")
print(f"Loading from: {CHROMA_DB_DIR}")

embeddings = HFEmbeddings()
if os.path.exists(CHROMA_DB_DIR):
    vectordb = Chroma(
        persist_directory=CHROMA_DB_DIR,
        embedding_function=embeddings
    )
    print("‚úÖ Vector Store loaded successfully.")
else:
    print("‚ùå ERROR: Chroma DB directory not found!")
    raise FileNotFoundError(CHROMA_DB_DIR)

# 2. FETCH DOCUMENTS FOR BM25
print("\nüìã 2. Fetching documents from Vector Store for BM25...")
collection_data = vectordb.get()
texts = collection_data.get('documents', [])
metadatas = collection_data.get('metadatas', [])

docs = []
if texts and metadatas:
    for text, meta in zip(texts, metadatas):
        if text:  # Ensure text is not None/empty
            docs.append(Document(page_content=text, metadata=meta))
print(f"‚úÖ Retrieved {len(docs)} documents from Chroma.")

# 3. TEST RETRIEVAL DIRECTLY (CRITICAL VALIDATION)
print("\nüîç 3. Direct HybridRetriever test...")
try:
    from ingestion.retrieval import HybridRetriever
    retriever = HybridRetriever(vectordb, docs)
    
    test_query = "personal protective equipment PPE servicing washer"
    chunks, diagnostics = retriever.retrieve(test_query, top_k=3, strategy="hybrid")
    
    print(f"‚úÖ Direct retrieval: {len(chunks)} chunks")
    if chunks:
        print("üìÑ First chunk preview:")
        print(f"Source: {getattr(chunks[0], 'metadata', {}).get('source', 'N/A')}")
        print(f"Text:  {chunks[0].page_content[:200]}...")
        print(f"Score: {getattr(chunks[0], 'score', 'N/A')}")
    else:
        print("‚ö†Ô∏è  WARNING: Direct retrieval returned EMPTY chunks!")
except Exception as e:
    print(f"‚ùå Retrieval test FAILED: {e}")

# 4. INSTANTIATE SUPERVISOR AGENT
print("\nü§ñ 4. Initializing DeepAgents Supervisor...")
agent = get_or_create_supervisor(vectordb, docs)
print("‚úÖ Agent initialized. Starting stream...")

# 5. SAFE AGENT EXECUTION WITH TIMEOUT
async def safe_agent_run(agent, question, timeout=90):
    print(f"\n‚ùì 5. Testing question: '{question}'")
    print("‚è≥ Streaming with timeout...")
    
    try:
        # Use astream_events for better debugging
        stream_config = {"configurable": {"thread_id": "test"}}
        async for chunk in asyncio.wait_for(
            agent.astream_events(
                {"messages": [{"role": "user", "content": question}]},
                version="v2",
                stream_mode="values"
            ), 
            timeout=timeout
        ):
            if "messages" in chunk and chunk["messages"]:
                msg = chunk["messages"][-1]
                if hasattr(msg, "pretty_print"):
                    msg.pretty_print()
                elif isinstance(msg, dict) and "content" in msg:
                    print(f"[AI] {msg['content'][:200]}...")
    except asyncio.TimeoutError:
        print(f"\n‚ùå TIMEOUT after {timeout}s - Agent is hanging!")
        print("üîç Check: DeepAgents tool binding, HybridRetriever.retrieve(), LLM timeouts")
        return False
    except Exception as e:
        print(f"\n‚ùå Agent ERROR: {e}")
        import traceback
        traceback.print_exc()
        return False
    return True

# 6. RUN TESTS
question = "What personal protective equipment (PPE) is recommended while servicing the washer?"
success = await safe_agent_run(agent, question)

# 7. FALLBACK SYNCHRONOUS TEST
if not success:
    print("\nüîÑ 7. Fallback synchronous test...")
    try:
        result = run_supervisor_pipeline(question, vectordb, docs)
        print("\nüìä Final result:")
        print(json.dumps(json.loads(result), indent=2) if result.startswith('{') else result)
    except Exception as e:
        print(f"‚ùå Sync test also failed: {e}")

print("\nüéâ Test complete!")


Loading Vector Store from: c:\Users\saich\Downloads\assignment\data\chroma_db
Vector Store loaded successfully.
Fetching documents from Vector Store for BM25...
Retrieved 353 documents from Chroma.
Agent initialized. Starting stream...

User Question: What personal protective equipment (PPE) is recommended while servicing the washer?


What personal protective equipment (PPE) is recommended while servicing the washer?

What personal protective equipment (PPE) is recommended while servicing the washer?
Tool Calls:
  task (call_2UPATYUYuJu4QCsPnuJazqxA)
 Call ID: call_2UPATYUYuJu4QCsPnuJazqxA
  Args:
    description: Determine the recommended personal protective equipment (PPE) while servicing washers, including safety measures and specific gear required.
    subagent_type: query-analyzer
Name: task

{
  "intent": "missing_data",
  "retrieval_strategy": "bm25",
  "top_k": 5,
  "query": "recommended personal protective equipment PPE while servicing washers safety measures specific gear re

CancelledError: 