In [1]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model="gpt-oss:20b")


In [2]:
from langchain_community.document_loaders import DirectoryLoader, TextLoader

documents = DirectoryLoader("knowledge-base", glob="**/*.md", loader_cls=TextLoader, loader_kwargs={"encoding": "utf-8"}).load()

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma

import os

if(os.path.exists("./vector_db")):
    Chroma(persist_directory="./vector_db").delete_collection()

In [4]:
from pandas._libs.hashtable import mode
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="nomic-embed-text")

In [5]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)

In [6]:
chunks = splitter.split_documents(documents)

In [7]:
vector_store = Chroma.from_documents(chunks, embedding=embeddings, persist_directory="vector_db")

In [8]:
vector_store.search("Who aims to become marketing manager?",search_type="similarity")

[Document(id='df7541b4-c0d7-4651-ab80-a1e03675f7b9', metadata={'source': 'knowledge-base\\employees\\Emily Tran.md'}, page_content="- **Professional Development Goals**:  \n  - Emily Tran aims to become a Marketing Manager within the next two years, focusing on leading larger campaigns and developing junior team members.\n\n- **Hobbies**:  \n  - Emily enjoys photography and regularly contributes to Insurellm's social media content with her own high-quality images.\n  - She is also passionate about sustainability and organizes monthly team volunteer events for environmental awareness. \n\n---\n\nEmily Tran continues to be a valuable asset to Insurellm, driving innovative marketing strategies that resonate with a diverse customer base. Her contributions have significantly enhanced the company's branding and customer outreach efforts."),
 Document(id='c71fa998-e789-48e0-b9c1-a3c7463838fc', metadata={'source': 'knowledge-base\\employees\\Lisa Anderson.md'}, page_content='## Other HR Notes\

In [13]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=4)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [14]:
from langchain.agents import create_agent


tools = [retrieve_context]
# If desired, specify custom instructions
prompt = (
    "You have access to a tool that retrieves context from employee, product, contract and company documents "
    "Use the tool when necessary to help answer user queries."
)
agent = create_agent(llm, tools, system_prompt=prompt)

In [15]:
query = "Who has aspirations to become marketing manager?"

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    print("event ==================================",event)
    event["messages"][-1].pretty_print()


Who has aspirations to become marketing manager?
Tool Calls:
  retrieve_context (65e467c0-2d55-4f88-afa0-d928594f45f3)
 Call ID: 65e467c0-2d55-4f88-afa0-d928594f45f3
  Args:
    query: aspirations to become marketing manager employee
Name: retrieve_context

Source: {'source': 'knowledge-base\\employees\\Emily Tran.md'}
Content: - **Professional Development Goals**:  
  - Emily Tran aims to become a Marketing Manager within the next two years, focusing on leading larger campaigns and developing junior team members.

- **Hobbies**:  
  - Emily enjoys photography and regularly contributes to Insurellm's social media content with her own high-quality images.
  - She is also passionate about sustainability and organizes monthly team volunteer events for environmental awareness. 

---

Emily Tran continues to be a valuable asset to Insurellm, driving innovative marketing strategies that resonate with a diverse customer base. Her contributions have significantly enhanced the company's brandi

In [None]:
# import gradio as gr

# def predict(message, history):
#     # Standard LangGraph stream usage
#     response_content = ""
#     history.append({"role": "user", "content": message})
#     for event in agent.stream(
#         {"messages": history},
#         stream_mode="values",
#     ):
#         if event["messages"]:
#             response_content = event["messages"][-1].content
    
#     return response_content

# demo = gr.ChatInterface(
#     predict,
#     title="üè¢ InsureLLM Knowledge Assistant",
#     description="Ask anything about our company documents and employees!",
#     examples=[
#         "Who aims to become marketing manager?",
#         "What are Emily Tran's hobbies?",
#         "Tell me about Lisa Anderson's career progression."
#     ]
# )

# if __name__ == "__main__":
#     demo.launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


In [None]:
import gradio as gr
import json
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage

def chat_logic(message, history, agent_state):
    """
    history: used for the UI rendering (list of dicts)
    agent_state: used to maintain full context (list of LangChain objects)
    """
    # 1. Initialize agent_state if empty
    if agent_state is None:
        agent_state = []
        
    # 2. Prepare the input for the agent
    current_human_message = HumanMessage(content=message)
    agent_state.append(current_human_message)
    
    # 3. Run the agent and capture the full state
    # We pass the full object list to ensure tool IDs are preserved
    new_agent_state = []
    for event in agent.stream({"messages": agent_state}, stream_mode="values"):
        if "messages" in event:
            new_agent_state = event["messages"]

    # 4. Prepare the JSON Debug view (extracting details manually from objects)
    serializable_debug = []
    for m in new_agent_state:
        m_dict = {"role": m.type, "content": m.content}
        if hasattr(m, "tool_calls") and m.tool_calls:
            m_dict["tool_calls"] = m.tool_calls
        if hasattr(m, "tool_call_id"):
            m_dict["tool_call_id"] = m.tool_call_id
        serializable_debug.append(m_dict)

    # 5. Prepare the UI view (history)
    # Gradio only wants User and Assistant roles (skipping raw Tool messages for the chat UI)
    ui_history = []
    for m in serializable_debug:
        role = m["role"]
        if role == "human": role = "user"
        if role == "ai": role = "assistant"
        
        # Only add valid UI roles to avoid 'Data incompatible' error
        if role in ["user", "assistant"]:
            ui_history.append({"role": role, "content": m["content"]})

    # Return: (clear input, updated UI history, raw JSON debug, updated internal agent state)
    return "", ui_history, json.dumps(serializable_debug, indent=2), new_agent_state

with gr.Blocks() as demo:
    gr.Markdown("# üè¢ InsureLLM Debugger")
    
    # Hidden state to store the actual LangChain objects
    agent_state = gr.State([])

    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(label="Conversation")
            msg = gr.Textbox(placeholder="Ask a question...", label="Input")
            submit = gr.Button("Send", variant="primary")
            
        with gr.Column(scale=1):
            debug_json_view = gr.Code(
                label="Full Context (JSON sent to LLM)",
                language="json"
            )

    # Link events - passing and returning the hidden agent_state
    submit.click(
        chat_logic, 
        [msg, chatbot, agent_state], 
        [msg, chatbot, debug_json_view, agent_state]
    )
    msg.submit(
        chat_logic, 
        [msg, chatbot, agent_state], 
        [msg, chatbot, debug_json_view, agent_state]
    )

if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.


: 