In [1]:
# Ensure project root is on sys.path for absolute imports
import os, sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
# import importlib
# import agents.rag_ingest as rag_ingest
# importlib.reload(rag_ingest)

from agents.rag_ingest import initialize_vectorstore_with_rag_chain

  from pydantic.v1.fields import FieldInfo as FieldInfoV1


In [4]:
from pathlib import Path

FILE_PATH = "https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"
TOP_K = 2  # reduce retrieved docs per query for speed
# Use a persistent directory instead of mkdtemp()
milvus_uri = "./milvus_demo.db"
collection_name="vectordb"

In [5]:
import os
import shutil

# Clear the old collection with mismatched dimensions
if os.path.exists(".milvus_demo.db"):
    shutil.rmtree(".milvus_demo.db")
    print("âœ“ Deleted old Milvus database to resolve dimension mismatch")

In [6]:
rag_chain = initialize_vectorstore_with_rag_chain(
    FILE_PATH=FILE_PATH,
    TOP_K=TOP_K,
    milvus_uri=milvus_uri,
    collection_name=collection_name,
)

2026-01-08 13:12:11,671 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2026-01-08 13:12:11,755 - INFO - Going to convert document batch...
2026-01-08 13:12:11,755 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2026-01-08 13:12:11,764 - INFO - Loading plugin 'docling_defaults'
2026-01-08 13:12:11,765 - INFO - Registered picture descriptions: ['vlm', 'api']
2026-01-08 13:12:11,772 - INFO - Loading plugin 'docling_defaults'
2026-01-08 13:12:11,776 - INFO - Registered ocr engines: ['auto', 'easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
2026-01-08 13:12:12,562 - INFO - Auto OCR model selected ocrmac.
2026-01-08 13:12:12,569 - INFO - Loading plugin 'docling_defaults'
2026-01-08 13:12:12,571 - INFO - Registered layout engines: ['docling_layout_default', 'docling_experimental_table_crops_layout']
2026-01-08 13:12:12,575 - INFO - Accelerator device: 'mps'
2026-01-08 13:12:15,182 - INFO - Loading plugin 'docling_defaul

In [7]:
from agents.retrieval_orchestrator_agent import create_retrieval_orchestrator_agent, Context
from agents.resoning_agent import create_reasoning_agent

  class TavilyResearch(BaseTool):  # type: ignore[override, override]
  class TavilyResearch(BaseTool):  # type: ignore[override, override]


In [8]:
retrieval_orchestrator_agent = create_retrieval_orchestrator_agent()
reasoning_agent, reasoning_prompt = create_reasoning_agent()

In [9]:
from langgraph.graph import StateGraph, MessagesState, START, END
from langgraph.types import Command
from typing import Annotated, Any

In [10]:
class MainState(MessagesState):
    retrieval_results: Annotated[dict, "The retrieval results from the retrieval orchestrator agent"]
    final_answer: Annotated[str, "The final answer generated by the reasoning agent"]
    human_approval: Annotated[bool | None, "Whether the human approved the final answer"] = None
    rag_chain: Annotated[Any, "The RAG chain to use for retrieval"]
    pending_review: Annotated[Any | None, "HITL review configs returned on interrupt"] = None

In [11]:
def invoke_retrieval_orchestration(state: MainState):
    result = retrieval_orchestrator_agent.invoke(
        {
            "messages": [
                state["messages"][0]
            ],
        },
        context=Context(rag_chain)
    )
    res_messages = result["messages"]
    retrieval_results = result["structured_response"]["results"]
    return {"messages": res_messages, "retrieval_results": retrieval_results}

In [12]:
def invoke_reasoning(state: MainState):
    messages = [("system", reasoning_prompt.format(user_question=state["messages"][0].content, context=state["retrieval_results"]))]
    cfg = {"configurable": {"thread_id": "reasoning-thread"}}

    result = reasoning_agent.invoke({"messages": messages}, config=cfg)
    res_messages = result["messages"]

    # If the agent interrupted for HITL, store review configs; graph edges decide routing
    if "__interrupt__" in result:
        review_configs = result["__interrupt__"][-1].value["review_configs"]
        return {"messages": res_messages, "pending_review": review_configs}

    # Extract final answer from the last assistant message
    final_answer = res_messages[-1].content if res_messages else ""
    return {"messages": res_messages, "final_answer": final_answer, "pending_review": None}

In [13]:
def human_review(state: MainState):
    # Present state["pending_review"] to a human and collect decision.
    # For now, auto-approve to demonstrate resume flow.
    decision = {"type": "approve"}
    cfg = {"configurable": {"thread_id": "reasoning-thread"}}

    resumed = reasoning_agent.invoke(
        Command(resume={"decisions": [decision]}),
        config=cfg,
    )

    res_messages = resumed["messages"]
    # Extract final answer from the last assistant message
    final_answer = res_messages[-1].content if res_messages else ""

    # Graph-directed: only update; builder edge sends us to END
    return {
        "messages": res_messages,
        "final_answer": final_answer,
        "human_approval": True,
        "pending_review": None,
    }

In [14]:
builder = StateGraph(MainState)

builder.add_node("invoke_retrieval_orchestration", invoke_retrieval_orchestration)
builder.add_node("invoke_reasoning", invoke_reasoning)
builder.add_node("human_review", human_review)

builder.add_edge(START, "invoke_retrieval_orchestration")
builder.add_edge("invoke_retrieval_orchestration", "invoke_reasoning")

# Route based on presence of pending_review set by invoke_reasoning
builder.add_conditional_edges("invoke_reasoning", {
    "human_review": lambda state: state["pending_review"] is not None,
    END: lambda state: state["pending_review"] is None,
})

builder.add_edge("human_review", END)

graph = builder.compile()

In [15]:
user_question = "How does the self-attention mechanism work?"

In [16]:
# Live execution logs: stream graph with built-in step updates
inputs = {
    "messages": [{"role": "user", "content": user_question}],
    "rag_chain": rag_chain,
}
# print_mode="color" gives readable console logs; use "text" if your console lacks color support
for _ in graph.stream(
    inputs,
    stream_mode=["updates", "values"],
    print_mode="color",
):
    pass

2026-01-08 13:12:35,540 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  PydanticSerializationUnexpectedValue(Expected `none` - serialized value may not be as expected [field_name='context', input_value=Context(rag_chain=Runnabl...}, config_factories=[])), input_type=Context])
  return self.__pydantic_serializer__.to_python(
2026-01-08 13:12:36,754 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2026-01-08 13:12:38,192 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
  PydanticSerializationUnexpectedValue(Expected `none` - serialized value may not be as expected [field_name='context', input_value=Context(rag_chain=Runnabl...}, config_factories=[])), input_type=Context])
  return self.__pydantic_serializer__.to_python(
2026-01-08 13:12:39,519 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2026-01-08 13:12:40,341 - INFO - HTT

 => vector dimension mismatch, expected vector size(byte) 12288, actual 6144. at /Users/zilliz/zilliz/milvus-lite-develop/milvus-lite/thirdparty/milvus/internal/core/src/query/Plan.cpp:68



MilvusException: <MilvusException: (code=2000, message= => vector dimension mismatch, expected vector size(byte) 12288, actual 6144. at /Users/zilliz/zilliz/milvus-lite-develop/milvus-lite/thirdparty/milvus/internal/core/src/query/Plan.cpp:68
: segcore error)>