In [1]:
import os
import logging

processed_docs_path = "C:\BlueAI_bkp\data\processed\wikiextractor"


  processed_docs_path = "C:\BlueAI_bkp\data\processed\wikiextractor"


In [2]:
from phoenix.otel import register

# configure the Phoenix tracer
tracer_provider = register(
    project_name="Base-RAG",                 # your project
    endpoint="http://localhost:6006/v1/traces",  # Phoenix Docker HTTP collector
    protocol="grpc",               # force HTTP instead of gRPC
    auto_instrument=True,                   # auto-instrument LangChain + others
    batch=True, 
)

OpenTelemetry Tracing Details
|  Phoenix Project: Base-RAG
|  Span Processor: BatchSpanProcessor
|  Collector Endpoint: http://localhost:6006/v1/traces
|  Transport: HTTP + protobuf
|  Transport Headers: {}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [3]:
from pathlib import Path
from typing import Iterator, Dict, Any, List
import json
from dotenv import load_dotenv
import os

from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

# NEW
from langchain_text_splitters import RecursiveCharacterTextSplitter
import tiktoken

load_dotenv('.env')


BASE_DIR = Path("data/processed/wikiextractor")
CHROMA_DIR = Path("chromadb")
EMBEDDING_MODEL = "text-embedding-3-small"


In [4]:
# os.getenv("OPENAI_API_KEY")

In [5]:
from typing import Any
from typing_extensions import Optional

from langchain.agents import create_agent, AgentState
from langchain.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnableConfig

## Chat Bot pre-setup

1. State
2. Tools
3. Memory Layer
4. Pre model hook - if needed
5. post model hook - if needed

### Model Setup


In [6]:

MODEL = "gpt-oss:20b"

from langchain_ollama import ChatOllama

llm_obj = ChatOllama(
    model=MODEL,
    temperature=0,
    reasoning=False,
)


### State 

In [7]:

class ChatAgentState(AgentState):
    initial_user_query: str

### Tools

1. similarity_search_tool : Used to run a similarity search on the chroma collection. 

In [8]:
from typing import List, Tuple

from langchain_core.documents import Document
from langchain.tools import tool

@tool
def similarity_search_tool(user_query: str, k: int = 4) -> Tuple[str, List[Document]]:
    """
    Perform a similarity search on a 

    Inputs: 
        1. Query(str) -> User query to perform vector search over vectorDB. 
        2. k:int (default 4) 

    Returns:
        A (content, artifact) tuple:
        - content: serialized text for the chat model
        - artifact: raw List[Document] for downstream use / debugging
    """

    embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)
    print(F"Input to Similarity search tool: {user_query}")
    vectordb = Chroma(
        persist_directory="chromadb",
        embedding_function=embeddings,
        collection_name="wiki_short_150",
    )

    doc_score_pairs = vectordb.similarity_search_with_relevance_scores(
        query=user_query,
        k=k,
    )

    docs: List[Document] = [doc for doc, _ in doc_score_pairs]

    # 3) Serialize in an LLM-friendly, structured way
    parts = []
    for idx, (doc, score) in enumerate(doc_score_pairs, start=1):
        part = (
            f"### Document {idx}\n"
            f"relevance_score: {score:.4f}\n"
            f"metadata: {doc.metadata}\n"
            f"content:\n{doc.page_content}\n"
        )
        parts.append(part)

    content_for_llm = "\n\n".join(parts) if parts else "No documents found."

    # 4) Return string for LLM + raw docs for downstream use
    return content_for_llm, docs

In [9]:
# similarity_search_tool.run("Who was Julius Caesar?")

### Memory Layer (Short Term Memory for turn based conversations.)

Memory layer built on top of LangMem.  

In [10]:
from langgraph.graph.message import REMOVE_ALL_MESSAGES
from langgraph.runtime import Runtime
from langmem import create_memory_store_manager, ReflectionExecutor
from langgraph.store.memory import InMemoryStore
from langchain.embeddings import init_embeddings
from langchain.messages import AIMessage, RemoveMessage, SystemMessage, ToolMessage
from pydantic import BaseModel
from langchain.agents.middleware import AgentMiddleware, ContextEditingMiddleware, ClearToolUsesEdit, ModelRequest, before_model

class SematicBreakUpMemory(BaseModel):
    title: str
    subject: str
    predicate: str
    object_of_interest: str

memory_manager = create_memory_store_manager(
    # llm_obj,
    "openai:gpt-5-mini",
    schemas=[SematicBreakUpMemory],
    instructions="Capture the title and summarize the content.",
    namespace=("conversational_memory",)
)

memory_store = InMemoryStore(
    # index = {
    #     "dims": 1536,
    #     "embed": init_embeddings(model="text-embedding-3-small", provider="openai")
    # }
)
refl_executor = ReflectionExecutor(memory_manager)

@before_model
def trim_messages(state: ChatAgentState, runtime: Runtime):
    """
    Trimming the context window to keep only the last few messages. 
    """
    messages = state['messages']
    if len(messages) <= 5: 
        return None
    # first_message = messages[0]
    recent_messages = messages[-3:]
    # new_messages = [first_message] + recent_messages
    new_messages = recent_messages
    return {
        "messages": [RemoveMessage(id=REMOVE_ALL_MESSAGES),] + new_messages
    }

class BgMemoryProcessingMiddleWare(AgentMiddleware):

    def before_model(self, state:ChatAgentState, runtime):
        conversational_memory = list(runtime.store.search(
            ("conversational_memory",),
            limit=5
        ))
        mem_text = "\n".join(str(m.value) for m in conversational_memory)

        conversational_memory_msg = SystemMessage(
            content=f"\n\nRelevant long-term memory:\n{mem_text}"
        )
        mutated_history = [conversational_memory_msg]
        return {
            "messages": mutated_history
        }

    def modify_model_request(self, state:ChatAgentState, request: ModelRequest):
        most_recent_messages = state['messages'][-4:]
        request.messages = most_recent_messages
        return request    

    def after_model(self, state, runtime):
        last_message = state['messages'][-1]
        if isinstance(last_message, ToolMessage):
            return {
                "message": {last_message}
            }
        elif isinstance(last_message, AIMessage) and len(last_message.tool_calls) > 0:
            return {
                "messages": [last_message]
            }
        else:
            memory_manager.invoke({
                "messages": [last_message]
            })
            return None 

### Agent Layer

In [11]:


# --- 5. Build the agent -------------------------------------------------------
tools = [similarity_search_tool]

# #### Rules: 
# 1. You cannot answer from beyond the retrieved documents. 
# 2. Use the similarity_search_tool when you need documents to answer a user's question. DO NOT embellish the question with your own knowledge. All knwoledge must come from chat history and retreived documents                 
# 3. When the retrieved documents do not contain the data to a question, use the auditor tool and audit remarks to improve the query being sent to the similarity_search_tool. 
# 4. Always provide the title and url as wel as the chunk_index of the passage that is being used. 
# 5. Provide the user's query directly to the similiarty_search_tool
# 6. Once audit status turns to True, you can end the process

# #### Situations where context is you are unable to answer a question
# 1. Call the RAG Auditor tool. Rephrase the question based on Audit and provide the answer 
# 2. Retry a maximum of 2 times, If data is not found, return with "I am sorry, I am unable to answer the question.

system_prompt = """
#### Role: 
- You are a chat assistant that is developed to answer the user's question. 

#### Rules: 
1. You cannot answer from beyond the retrieved documents.
2. Pass the user's question to the similarity_search_tool, **DO NOT** use your own data to rephrase questions.
3. Always provide the title and url as wel as the chunk_index of the passage that is being used. 


Inputs: 
Initial user query: {initial_user_query}
"""

agent = create_agent(
    model=llm_obj,
    tools=tools,
    system_prompt=system_prompt,
    state_schema=ChatAgentState,          # ensures state has messages + remaining_steps
    store=memory_store,
    middleware=[BgMemoryProcessingMiddleWare(),
        ContextEditingMiddleware(
            edits=[
                ClearToolUsesEdit(
                    trigger=2000,     # token threshold to start clearing
                    keep=2,           # keep last 3 tool results
                    clear_tool_inputs=True,
                    exclude_tools=[],
                    placeholder="[cleared]",
                )
            ]
        ),
        trim_messages
    ]
)



# --- 6. Simple helper for calling the agent -----------------------------------
def run_agent(user_query: str, thread_id: str = "default") -> str:
    """
    Thin wrapper to send a message into the agent and get the final reply content.
    `thread_id` controls the short-term memory thread.
    """
    config: RunnableConfig = {
        "configurable": {
            "thread_id": thread_id,
        }
    }

    state = agent.invoke(
        {
            "initial_user_query": user_query,
        },
        config=config,
    )

    final_msg = state["messages"][-1]
    return final_msg.content


In [12]:
# run_agent("Who was Julius Caesar?")

In [13]:

# --- State scaffold (adapt to your AgentState if you have a TypedDict) ---

from langchain.messages import AIMessage


def make_initial_state(max_steps: int = 8) -> Dict[str, Any]:
    return {
        "messages": [],          # type: list[BaseMessage]
        "remaining_steps": max_steps,
        'audit_status': None,
        "audit_remarks": None,
        "initial_user_query": ""
    }


# --- Single turn runner (with checkpointer config) ---

def run_one_turn(
    agent,
    state: Dict[str, Any],
    thread_id: str,
) -> Dict[str, Any]:
    """
    Runs a single 'turn' of your agent given the current state.
    Adds the required LangGraph config for the checkpointer.
    """
    config = {
        "configurable": {
            "thread_id": thread_id,
            # add "checkpoint_ns" or "checkpoint_id" here if your graph needs them
        }
    }

    new_state = agent.invoke(state, config=config)
    return new_state


# --- Turn-based conversation over a list of questions ---

def simulate_turn_based_conversation(
    agent,
    questions: List[str],
    max_steps: int = 8,
    thread_id: str | None = None,
) -> Dict[str, Any]:
    """
    1. Creates an initial state.
    2. For each question:
       - appends a HumanMessage
       - calls the agent
       - prints the latest AIMessage
    """
    if thread_id is None:
        thread_id = f"test-thread-{uuid.uuid4()}"

    state = make_initial_state(max_steps=max_steps)

    for turn_idx, question in enumerate(questions, start=1):
        print(f"\n========== TURN {turn_idx} ==========")
        print(f"User: {question}")

        # append HumanMessage instead of dict
        state["messages"].append(HumanMessage(content=question))
        state['initial_user_query'] = question

        # run the agent for this turn
        state = run_one_turn(agent, state, thread_id=thread_id)

        # find the last AIMessage and print it
        assistant_msgs = [m for m in state["messages"] if isinstance(m, AIMessage)]
        if assistant_msgs:
            last_assistant = assistant_msgs[-1]
            print(f"Assistant: {last_assistant.content}")
        else:
            print("Assistant: <no AIMessage found in state>")

        if "remaining_steps" in state:
            print(f"(remaining_steps: {state['remaining_steps']})")

    return state


### Testing

In [14]:
questions = [
    # --- PHASE 1: History Base Context ---
    "Who was Julius Caesar?",
    "Which major battle marked the end of his civil war?",
    "Without naming Caesar, tell me the river he famously crossed.",
    
    # --- PHASE 2: Geography Shift ---
    "What is the capital of Argentina?",
    "Name one UNESCO World Heritage site in that country.",
    "Compare the population of Buenos Aires to the city where Caesar was assassinated.",
    
    # --- PHASE 3: Science Injection ---
    "What is the chemical formula of water?",
    "What property of water allows insects like water striders to walk on its surface?",
    "Is that property more related to cohesion or adhesion?",
    
    # --- PHASE 4: Literature Divergence ---
    "Who wrote 'Pride and Prejudice'?",
    "Without naming the author, describe the central theme.",
    "Does that theme relate in any way to the political alliances Caesar formed?",
    
    # --- PHASE 5: Return to Geography ---
    "Earlier we spoke about a capital city. Which city was it?",
    "Now tell me one major river running through that city.",
    
    # --- PHASE 6: Animals / Biology ---
    "What is the largest species of shark?",
    "Where in the world's oceans is it most commonly found?",
    "Compare the size of this shark to the height of the tallest mountain in the world.",
    
    # --- PHASE 7: Aviation ---
    "What is the Boeing 747 commonly nicknamed?",
    "Which airline was the first to operate it commercially?",
    "How does its typical cruising altitude compare to the elevation of Mount Everest?",
    
    # --- PHASE 8: Sports ---
    "Who holds the record for the most goals in World Cup history?",
    "Which national team did he represent?",
    "Does that team share a continent with the capital city we mentioned earlier?",
    
    # --- PHASE 9: Return to Early Context ---
    "Back to chemistry: what is the pH of pure water at room temperature?",
    "And how does that compare to the acidity of lemon juice?",
    
    # --- PHASE 10: Movies ---
    "Who directed the movie 'Inception'?",
    "Name one major theme of this film.",
    "Is that theme conceptually similar to the literary theme we discussed earlier?",
    
    # --- PHASE 11: Space ---
    "What is the largest planet in our solar system?",
    "What is the name of its most famous storm?",
    "Is that storm larger or smaller than Earth?",
    
    # --- PHASE 12: Politics / Return to Caesar ---
    "Which Roman leader succeeded Caesar as the first Emperor?",
    "What relationship did he have with Caesar?",
    "Does this familial relationship relate to any theme discussed in the novel earlier?",
    
    # --- PHASE 13: Mathematics ---
    "What is the value of Pi rounded to 5 decimal places?",
    "Is Pi a rational or irrational number?",
    "Compare this mathematical concept to the precision required in aviation altimeters.",
    
    # --- PHASE 14: Companies / Technology ---
    "Who founded Microsoft?",
    "Which operating system became its early mainstream success?",
    "Is that operating system older or younger than the movie 'Inception'?",
    
    # --- PHASE 15: FINAL CONTEXT STRESS ---
    "Earlier, we talked about an animal, a city, a storm, and a political alliance. List all four without explanation.",
    "Now, from those four, which one existed first historically?",
    "Finally, relate that oldest entity to the theme of 'power' we discussed in one of the earlier topics."
]


final_state = simulate_turn_based_conversation(
    agent,
    questions,
    max_steps=8,
    thread_id="dev-session-2",  # or let it auto-generate
)



User: Who was Julius Caesar?
Input to Similarity search tool: Who was Julius Caesar?


  vectordb = Chroma(


Assistant: **Answer**

Julius Caesar (12 July 100 BC – 15 March 44 BC) was a Roman military commander, politician, and author who rose to become the dictator of the Roman Republic. He was a key member of the First Triumvirate, fought a civil war against Pompey the Great, and after winning the war, he held the title of dictator until his assassination on the Ides of March. His life and career had a profound impact on the transition from the Roman Republic to the Roman Empire.  

**Sources**

1. **Title:** Julius Caesar  
   **URL:** https://simple.wikipedia.org/wiki?curid=5940  
   **Chunk index:** 0  
   *Content excerpt:* “Gaius Julius Caesar (12 July 100 BC – 15 March 44 BC) was a military commander, politician and author at the end of the Roman Republic. Caesar became a member of the First Triumvirate. When that broke up, he fought a civil war against Pompey the Great. Winning the war, Caesar became Roman dictator until his death. On March 15, 44 BC, he was stabbed to death by a gro

Could not find existing schema in dict for global_memory_store


Assistant: The documents you retrieved focus on **literary elements, techniques, and terms**—all of which are the building blocks for discussing a work’s theme.

- **Literary element** (chunk 1, URL: <https://simple.wikipedia.org/wiki?curid=464782>)  
  *“Literary elements help in the discussion and understanding of a work of literature as basic categories of critical analysis…”*  
  Themes are one of those basic categories; they are the central ideas or messages that a text explores.

- **Literary technique** (chunk 0, URL: <https://simple.wikipedia.org/wiki?curid=464786>)  
  *“A literary technique is any method an author uses to tell their message.”*  
  Techniques are the tools authors use to convey themes, so a theme is conceptually linked to these techniques.

- **Literary term** (chunk 0, URL: <https://simple.wikipedia.org/wiki?curid=464793>)  
  *“A literary term is a word that is used in talking or writing about books and stories.”*  
  “Theme” itself is a literary term, reinf

Could not find existing schema in dict for existing


Assistant: The novel’s central theme is the **interplay between family ties and the pursuit of political power**—how the personal relationship between Julius Caesar and his adopted son‑turned‑emperor, Augustus (Octavian), shapes the course of Roman history.

- **Source 1**  
  - **Title:** *Augustus*  
  - **URL:** https://simple.wikipedia.org/wiki?curid=15583  
  - **Chunk index:** 0  
  - **Excerpt used:** “Octavian, as he was originally called, was the adopted son of the dictator of the Roman Republic, Julius Caesar.”  

- **Source 2**  
  - **Title:** *Gaius Caesar*  
  - **URL:** https://simple.wikipedia.org/wiki?curid=169183  
  - **Chunk index:** 0  
  - **Excerpt used:** “Gaius Julius Caesar (20 BC–4 AD) was one of two brothers adopted by the first Roman emperor, Augustus.”  

These passages show how the novel foregrounds the **familial bond**—adoption and lineage—as a driving force behind the consolidation of power and the transition from Republic to Empire.

User: What is the

Could not find existing schema in dict for memory_store_root


Assistant: Microsoft was founded by **Bill Gates** and **Paul Allen** in 1975.  
The founding information is found in the following source:

- **Title:** Bill Gates  
- **URL:** https://simple.wikipedia.org/wiki?curid=107672  
- **Chunk index:** 2 (contains the sentence “Gates started Microsoft in 1975 with Paul Allen in the city of Albuquerque, New Mexico.”)

User: Which operating system became its early mainstream success?
Input to Similarity search tool: Microsoft early mainstream success operating system


Could not find existing schema in dict for global_memory_store


Assistant: **Microsoft’s early mainstream‑success operating system**

| Source | URL | Chunk index | Key points |
|--------|-----|-------------|------------|
| *Windows 1.0* | <https://simple.wikipedia.org/wiki?curid=10289> | 4 | Microsoft first presented Windows on 10 Nov 1983. It required only two floppy drives and 192 KB of RAM, and acted as a device driver for MS‑DOS 2.0. It supported cooperative multitasking in tiled windows and allowed non‑well‑behaved applications to run in full screen. |
| *Windows 1.0* | <https://simple.wikipedia.org/wiki?curid=10289> | 0 | Windows 1.0 was released 20 Nov 1985 as the first version of the Windows line. It ran as a 16‑bit, multi‑tasking shell on top of MS‑DOS, providing an environment for both Windows‑specific and existing DOS software. |
| *Windows 1.0* | <https://simple.wikipedia.org/wiki?curid=10289> | 5 | Early support from major manufacturers (Compaq, Zenith, DEC) and software companies (Ashton‑Tate, Lotus). BYTE magazine noted its “remarka

In [15]:
namespaces = list(memory_store.list_namespaces(prefix=("conversational_memory",)))
print(namespaces)

[('conversational_memory',)]


In [16]:
memory_store.search(("conversational_memory"))

[]