In [None]:
import os
import sys
import json
import asyncio
from dotenv import load_dotenv

# Add project root to sys.path
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd()))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from ingestion.ingestion import HFEmbeddings
from agents.supervisor_agent import get_or_create_supervisor

# 1. Setup Environment
load_dotenv()

# 2. Load Existing Vector Store
CHROMA_DB_DIR = os.path.join(PROJECT_ROOT, "data", "chroma_db")
print(f"Loading Vector Store from: {CHROMA_DB_DIR}")

# Initialize Embeddings
embeddings = HFEmbeddings()

if os.path.exists(CHROMA_DB_DIR):
    vectordb = Chroma(
        persist_directory=CHROMA_DB_DIR, 
        embedding_function=embeddings
    )
    print("Vector Store loaded successfully.")
    
    # 3. Fetch Documents (Required for BM25 in Supervisor)
    # We need to reconstruct the list of Documents from the vector store for the HybridRetriever
    print("Fetching documents from Vector Store for BM25...")
    start = asyncio.get_event_loop().time()
    
    # collection.get() returns dict with 'documents', 'metadatas', 'ids'
    collection_data = vectordb.get()
    texts = collection_data.get("documents", [])
    metadatas = collection_data.get("metadatas", [])
    
    docs = []
    if texts and metadatas:
        for text, meta in zip(texts, metadatas):
            if text: # ensure text is not None
                docs.append(Document(page_content=text, metadata=meta))
    
    print(f"Retrieved {len(docs)} documents from Chroma.")
    
    # 4. Instantiate the Agent
    agent = get_or_create_supervisor(vectordb, docs)
    print("Agent initialized. Starting stream...\n")

    # 5. Stream the agent
    async def run_stream():
        # You can change the question here to something relevant to your documents
        question = "how to start machine?"
        print(f"User Question: {question}\n")
        
        async for chunk in agent.astream(
            {"messages": [{"role": "user", "content": question}]},
            stream_mode="values"
        ):
            if "messages" in chunk:
                chunk["messages"][-1].pretty_print()

    await run_stream()

else:
    print(f"ERROR: Chroma DB directory not found at {CHROMA_DB_DIR}")
    print("Please make sure you have run the ingestion process or have the 'data/chroma_db' folder.")

  from .autonotebook import tqdm as notebook_tqdm
