In [52]:
!pip install -U langchain --no-deps
!pip install -U langchain-community --no-deps
!pip install -U langchain-groq --no-deps
!pip install -U langchain-chroma --no-deps
!pip install -U langchain-openai --no-deps
!pip install -U llama-index --no-deps

# Extra runtime libraries (safe ones, no conflicts)
!pip install -U faiss-cpu chromadb gradio sentence-transformers pypdf unstructured[all-docs] python-pptx openpyxl duckduckgo-search ddgs




from kaggle_secrets import UserSecretsClient
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# load the Groq API key from Kaggle Secrets
secrets = UserSecretsClient()
os.environ["GROQ_API_KEY"] = secrets.get_secret("GROQ_API_KEY")
print("Groq API key loaded ✅")

Groq API key loaded ✅


In [33]:
from __future__ import annotations
import os, re, json, ast
from typing import TypedDict, List, Dict, Any, Optional

In [34]:
# === Kaggle Secrets ===
try:
    from kaggle_secrets import UserSecretsClient
    secrets = UserSecretsClient()
    os.environ["GROQ_API_KEY"] = secrets.get_secret("GROQ_API_KEY")
    print("Groq API key loaded ✅")
except Exception as e:
    print("[WARN] Could not load GROQ_API_KEY from Kaggle Secrets:", e)

Groq API key loaded ✅


In [35]:
# === LangChain / LLM ===
from langchain_groq import ChatGroq
from langchain.schema import HumanMessage, SystemMessage, AIMessage


# === Vector Memory ===
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma

from langchain.text_splitter import RecursiveCharacterTextSplitter

# === LangGraph ===
from langgraph.graph import StateGraph, END


# === Search ===  duck duck go search
from ddgs import DDGS

In [36]:
# ------------------------
# GLOBALS / CONFIG
# ------------------------
LLM_MODEL = os.environ.get("GROQ_MODEL", "qwen/qwen3-32b")
TEMPERATURE = float(os.environ.get("LLM_TEMPERATURE", "0"))
MAX_REFLECTIONS = int(os.environ.get("MAX_REFLECTIONS", "2"))
MEM_COLLECTION = os.environ.get("MEM_COLLECTION", "mini_manus_memory")
EMBED_MODEL = os.environ.get("EMBED_MODEL", "all-MiniLM-L6-v2")


# Persistent directory for Kaggle (working dir is saved in session)
PERSIST_DIR = "/kaggle/working/agent_memory"


# Initialize LLM (requires GROQ_API_KEY)
_llm = ChatGroq(model=LLM_MODEL, temperature=TEMPERATURE)


# Initialize vector memory
_embedding_fn = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
_vectorstore = Chroma(
collection_name=MEM_COLLECTION,
embedding_function=_embedding_fn,
persist_directory=PERSIST_DIR
)

# ✅ DEFINE the variable, but leave it empty. We will create it in the loop.
_vectorstore: Optional[Chroma] = None


In [37]:
from langchain_community.document_loaders import (
    TextLoader, PyPDFLoader, CSVLoader, UnstructuredExcelLoader,
    UnstructuredPowerPointLoader, UnstructuredWordDocumentLoader
)

RAG_PERSIST_DIR = "/kaggle/working/"
RAG_COLLECTION = "rag_docs"

def ingest_knowledge_base(file_path: str):
    """
    Ingests a user-uploaded file (txt, pdf, csv, excel, ppt, docx).
    """
    print(f"📂 Ingesting file: {file_path}")

    # Pick loader based on file extension
    ext = os.path.splitext(file_path)[1].lower()
    if ext == ".txt":
        loader = TextLoader(file_path, encoding="utf-8")
    elif ext == ".pdf":
        loader = PyPDFLoader(file_path)
    elif ext == ".csv":
        loader = CSVLoader(file_path)
    elif ext in [".xls", ".xlsx"]:
        loader = UnstructuredExcelLoader(file_path)
    elif ext in [".ppt", ".pptx"]:
        loader = UnstructuredPowerPointLoader(file_path)
    elif ext in [".doc", ".docx"]:
        loader = UnstructuredWordDocumentLoader(file_path)
    else:
        raise ValueError(f"❌ Unsupported file type: {ext}")

    # Load documents
    documents = loader.load()

    # Split into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_documents(documents)
    
    print(f"✅ File split into {len(chunks)} chunks.")

    # Update vector store
    rag_db = Chroma.from_documents(
        documents=chunks,
        embedding=_embedding_fn,
        persist_directory=RAG_PERSIST_DIR,
        collection_name=RAG_COLLECTION
    )

    print("✅ Knowledge base updated and saved.")
    return f"File '{file_path}' ingested successfully!"


In [38]:
# ------------------------
# Memory helpers
# ------------------------
def mem_add(text: str, kind: str = "note"):
    """Adds text to the vector store. Persistence is handled automatically."""
    _vectorstore.add_texts([f"{kind}: {text}"])
    
    # The erroneous .persist() line has been removed.
    
    # We can keep the debug print statement to see it working.
    print(f"🧠 Memory Add Request Sent: '{kind}: {text[:60]}...'")

def mem_recall(query: str, k: int = 3):
    """Recalls k most similar documents from the vector store."""
    docs = _vectorstore.similarity_search(query, k=k)
    return [d.page_content for d in docs]

# Place this with your other node functions

# Replace your global DIRECT_ANSWER_SYS variable with this:

DIRECT_ANSWER_SYS = """
You are a helpful assistant that answers questions ONLY from the provided memory context.
The memory may contain structured facts in JSON format, like {"entity": "user", "attribute": "name", "value": "haad"}.

- To answer the user's question, you MUST parse these JSON facts.
- When asked 'what is my name' or about the 'user', look for facts where 'entity' is 'user'.
- When asked 'what is your name' or about the 'agent', look for facts where 'entity' is 'agent'.
- Use this information to answer precisely. Do not mention the JSON structure in your answer.

- If the memory does NOT contain enough information to answer, you MUST respond with the exact phrase: 'NO_DIRECT_ANSWER' and nothing else.
"""

def node_direct_answer(state: GraphState) -> GraphState:
    """Checks for a direct answer in memory before planning."""
    # ✅ CHANGE: Get the log and append actions
    log = state.get("log", [])
    
    mem_list = mem_recall(state.get("user_input", ""), k=5)
    facts = mem_recall("fact:", k=5)
    mem_list.extend(facts)
    mem_list = list(set(mem_list))
    mem_text = "\n".join(mem_list) if mem_list else "<none>"

    prompt = (DIRECT_ANSWER_SYS + "\nRelevant memory:\n" + mem_text + "\nUser input:\n" + state.get("user_input", ""))
    response = _llm.invoke(prompt).content.strip()

    if "NO_DIRECT_ANSWER" in response:
        log.append("🤔 Direct Answer: No direct answer found in memory. Proceeding to planner.")
        return {"log": log}
    else:
        log.append("✅ Direct Answer: Found a direct answer in memory. Finalizing.")
        return {"final": response, "log": log}


In [39]:
# ------------------------
# Safe JSON utilities
# ------------------------
def extract_json_block(text: str) -> Optional[str]:
    import re
    match = re.search(r"(\{.*\}|\[.*\])", text, flags=re.S)
    return match.group(1) if match else None


def safe_json_loads(text: str) -> Any:
    try:
        return json.loads(text)
    except Exception:
        block = extract_json_block(text)
        if block:
            try:
                cleaned = block.replace("\n", " ").replace("\t", " ")
                return json.loads(cleaned)
            except Exception as e:
                return {"error": f"json parse fail: {e}", "raw": block}
        return {"error": "no json found", "raw": text}

In [40]:
# ------------------------
# Tools (web_search, calculator)
# ------------------------

from ddgs import DDGS

def tool_web_search(query: str, max_results: int = 5) -> Dict[str, Any]:
    results: List[Dict[str, Any]] = []
    try:
        with DDGS() as ddgs:
            for r in ddgs.text(query, max_results=max_results):
                results.append({
                    "title": r.get("title"),
                    "href": r.get("href"),
                    "body": r.get("body"),
                })
    except Exception as e:
        return {"error": f"search_failed: {e}"}
    return {"results": results}


class _MathVisitor(ast.NodeVisitor):
    allowed_nodes = (
        ast.Expression, ast.BinOp, ast.UnaryOp, ast.Num, ast.Load,
        ast.Add, ast.Sub, ast.Mult, ast.Div, ast.Mod, ast.Pow,
        ast.USub, ast.UAdd, ast.FloorDiv
    )
    def visit(self, node):
        if not isinstance(node, self.allowed_nodes):
            raise ValueError(f"disallowed expression: {type(node).__name__}")
        return super().visit(node)


def tool_calculator(expression: str) -> Dict[str, Any]:
    try:
        node = ast.parse(expression, mode="eval")
        _MathVisitor().visit(node)
        value = eval(compile(node, "<calc>", "eval"), {"__builtins__": {}}, {})
        return {"ok": True, "result": value}
    except Exception as e:
        return {"ok": False, "error": str(e)}


# ===============================================================
# ✅ Update tool_rag_search to always use the latest persisted DB
# ===============================================================
def tool_rag_search(query: str) -> Dict[str, Any]:
    try:
        rag_db = Chroma(
            persist_directory=RAG_PERSIST_DIR,
            embedding_function=_embedding_fn,
            collection_name=RAG_COLLECTION
        )
        docs = rag_db.similarity_search(query, k=3)
        results_text = "\n---\n".join([d.page_content for d in docs])
        return {"results": results_text or "No relevant info found in the uploaded file."}
    except Exception as e:
        return {"error": f"rag_search_failed: {e}"}

        

TOOLS = {
    "web_search": {
        "desc": "Search the web for general information, current events, or real-world people and places.",
        "func": lambda args: tool_web_search(args.get("query", ""), int(args.get("max_results", 3)))
    },
    "calculator": {
        "desc": "Evaluate arithmetic expressions.",
        "func": lambda args: tool_calculator(args.get("expression", ""))
    },
    # ✅ UPDATE THIS DESCRIPTION
    "rag_search": {
        "desc": "Use this tool to answer questions about the fantasy story 'The Lantern of Aetheria'. It contains specific knowledge about characters like Kael and Elara, and places like the city of Aetheria.",
        "func": lambda args: tool_rag_search(args.get("query", ""))
    }
}

# # This will automatically update the string passed to the Planner
# TOOL_LIST_STR = "\n".join([f"- {name}: {meta['desc']}" for name, meta in TOOLS.items()])


In [41]:
# ------------------------
# Graph State
# ------------------------
# In your GraphState TypedDict definition

class GraphState(TypedDict, total=False):
    user_input: str
    memory_context: str
    plan: List[Dict[str, Any]]
    observations: List[Dict[str, Any]]
    draft: str
    feedback: str
    reflections: int
    final: str
    # ✅ ADD THIS LINE: This will be our flight recorder.
    log: List[str]


# ------------------------
# Node: Planner
# ------------------------

# ✅ NEW: A prompt specifically for understanding the user's true intent.
# Replace your old INTENT_DISTILLER_PROMPT with this one
INTENT_DISTILLER_PROMPT = """
You are an intent distiller. Your job is to analyze the conversation and determine the user's real, actionable task.
- If the user has provided a file, the primary task is to process that file using the instructions in the user's text.
- If the user refers to a specific file (e.g., "in summary.txt"), separate the core question from the file reference.

Your output should be a clear, actionable instruction for the Planner AI.

Example 1:
User input: "summarize the main points of the attached file"
File Path: "/path/to/doc.txt"
Result: "The user has uploaded a file at /path/to/doc.txt and wants a summary. The first step is to add the file to the knowledge base, then search it for the main points."

Example 2:
User input: "whats the poem name in summary.txt"
File Path: null
Result: "The user wants to know the name of the poem inside the file 'summary.txt'. The task is to search within that specific file for the poem's title."

Now, distill the intent from the following:
"""

# Replace your old PLANNER_SYS prompt with this corrected version

# ✅ THIS IS THE CORRECTED PROMPT. PLEASE REPLACE THE OLD ONE WITH THIS.

# ✅ STEP 1: Replace your old PLANNER_SYS variable with this one.

PLANNER_SYS = """
You are the Planner. Your goal is to create a step-by-step plan to answer the user's task.
**VERY IMPORTANT: Before creating a new plan, check 'Relevant memory'. If the answer is already there, your plan should be a single step to state the answer directly without using tools.**

Available tools:
{tool_list}

**CRITICAL RULES for rag_search:**
1.  When the user asks about a specific file (e.g., "in summary.txt"), you MUST use the `rag_search` tool with the `source_file` argument set to the filename (e.g., "summary.txt").
2.  When using `source_file`, the `query` argument MUST be a question that represents the user's core goal. **DO NOT leave the query empty.** Reformulate the user's request into a proper question for the search.

**Example of a good plan:**
User Task: "The user wants to know the name of the poem inside the file 'summary.txt'."
Correct Plan:
{
  "steps": [
    {
      "id": 1,
      "thought": "I need to find the name of the poem inside 'summary.txt'. I will use rag_search and filter by the source file. I will also formulate a query to find the poem's name.",
      "tool": "rag_search",
      "args": {
        "query": "What is the name of the poem?",
        "source_file": "summary.txt"
      },
      "output_key": "poem_content"
    }
  ]
}

Return a STRICT JSON array named 'steps'.
"""

# ✅ STEP 2: Replace your entire old node_planner function with this one.

def node_planner(state: GraphState) -> GraphState:
    log = state.get("log", [])
    user_input = state.get("user_input", "")
    file_path = state.get("file_path")

    # --- Intent Distiller Step ---
    mem_list = mem_recall(user_input, k=4)
    mem_text = "\n".join(mem_list) if mem_list else "<none>"
    file_info = f"\nFile Path: \"{file_path}\"" if file_path else "\nFile Path: null"

    distiller_prompt = (
        INTENT_DISTILLER_PROMPT
        + "\nRelevant memory:\n" + mem_text
        + "\nUser input:\n" + user_input
        + file_info
    )

    distilled_task = _llm.invoke(distiller_prompt).content.strip()
    log.append(f"🎯 Planner: Distilled user intent to: '{distilled_task}'")

    # --- Planning Step ---
    tool_list_str = "\n".join([f"- {name}: {meta['desc']}" for name, meta in TOOLS.items()])
    
    # ✅ CHANGED: We now use the safe .replace() method instead of .format()
    planner_prompt_template = PLANNER_SYS.replace("{tool_list}", tool_list_str)

    prompt = (
        planner_prompt_template
        + "\nRelevant memory (may be empty):\n" + mem_text
        + "\nUser task:\n" + distilled_task
        + "\nRespond with ONLY JSON in the format: {\"steps\":[...] }\n"
    )
    raw = _llm.invoke(prompt).content.strip()
    parsed = safe_json_loads(raw)
    steps = parsed.get("steps") if isinstance(parsed, dict) and isinstance(parsed.get("steps"), list) else []

    log.append(f"📝 Planner: Generated a plan with {len(steps)} step(s).")
    return {"plan": steps, "log": log}



# ------------------------
# Node: Executors
# ------------------------
EXECUTOR_SYS = """
You are the Executor. Given the user's request, the plan, and tool observations,
write a clear, helpful draft answer. If observations include search results, cite them inline textually (titles/domains), but do not fabricate links.
If no tools were used, answer from general knowledge + memory context. Keep it concise unless the user asked for depth.
"""


def _run_tools(steps: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    observations: List[Dict[str, Any]] = []
    for step in steps:
        tool_name = step.get("tool")
        args = step.get("args", {}) or {}
        if tool_name and tool_name in TOOLS:
            try:
                obs = TOOLS[tool_name]["func"](args)
            except Exception as e:
                obs = {"error": f"tool_error: {e}"}
        else:
            obs = {"note": "no_tool"}
        observations.append({
            "id": step.get("id"),
            "tool": tool_name,
            "args": args,
            "observation": obs,
        })
    return observations


def node_executor(state: GraphState) -> GraphState:
    """Runs tools and generates a draft answer."""
    # ✅ CHANGE: Get the log and append actions
    log = state.get("log", [])
    plan = state.get("plan", [])
    
    tool_steps = [step for step in plan if step.get("tool")]
    if tool_steps:
        tools_str = ", ".join([step['tool'] for step in tool_steps])
        log.append(f"🛠️ Executor: Running tool(s): {tools_str}")
    else:
        log.append("🧠 Executor: No tools to run. Answering from general knowledge.")

    observations = []
    for step in plan:
        tool_name = step.get("tool")
        args = step.get("args", {}) or {}
        if tool_name and tool_name in TOOLS:
            obs = TOOLS[tool_name]["func"](args)
        else:
            obs = {"note": "no_tool"}
        observations.append({"id": step.get("id"), "tool": tool_name, "args": args, "observation": obs})

    context_blob = json.dumps({"plan": plan, "observations": observations, "user_input": state.get("user_input", "")}, ensure_ascii=False)
    draft_prompt = "You are the Executor... \nContext JSON:\n" + context_blob + "\nDraft the answer now."
    draft = _llm.invoke(draft_prompt).content.strip()
    
    return {"observations": observations, "draft": draft, "log": log}


# ------------------------
# Node: Verifier (reflection loop)
# ------------------------
VERIFIER_SYS = """
You are the Verifier. Check the draft for factuality, clarity, safety, and task completion.
Return ONLY JSON: {"approved": bool, "feedback": "...", "final": "..."}
- If approved: polish the draft lightly and place in 'final'.
- If NOT approved: explain issues in 'feedback' and leave 'final' empty.
Be conservative; prefer one more revision if unsure.
"""


def node_verifier(state: GraphState) -> GraphState:
    """Verifies the draft answer."""
    # ✅ CHANGE: Get the log and append actions
    log = state.get("log", [])
    
    prompt = (VERIFIER_SYS + "\nUser input:\n" + state.get("user_input", "") + "\nDraft:\n" + (state.get("draft", "") or "") + "\nObservations (for verification):\n" + json.dumps(state.get("observations", []), ensure_ascii=False))
    raw = _llm.invoke(prompt).content.strip()
    parsed = safe_json_loads(raw)
    
    if isinstance(parsed, dict) and parsed.get("approved") and parsed.get("final"):
        log.append("✅ Verifier: Draft approved.")
        return {"final": parsed.get("final"), "log": log}

    fb = parsed.get("feedback", "") if isinstance(parsed, dict) else "needs another pass"
    log.append(f"❌ Verifier: Draft not approved. Feedback: '{fb[:50]}...'. Replanning.")
    mem_add(f"verifier_feedback: {fb}", kind="feedback")
    return {"feedback": fb, "reflections": (state.get("reflections", 0) + 1), "log": log}


In [42]:
# ------------------------
# Node: Memory Updater (end)
# ------------------------

def node_memory_update(state: GraphState) -> GraphState:
    """Saves the conversation and extracts structured facts to memory."""
    log = state.get("log", [])
    log.append("💾 Memory: Saving final answer and user query to long-term memory.")
    
    ui = state.get("user_input", "")
    final = state.get("final", "")
    if ui: mem_add(ui, kind="user")
    if final: mem_add(final, kind="agent")

    # ✅ CHANGE: Upgraded to structured JSON fact extraction
    # This block replaces the old, simple fact extraction.
    low = ui.lower()
    
    # For the user's name
    if "my name is" in low:
        name = low.split("my name is", 1)[-1].strip().strip(".?! ")
        if name:
            fact_json = json.dumps({"entity": "user", "attribute": "name", "value": name})
            mem_add(fact_json, kind="fact")
            
    # For the agent's name
    if "your name is" in low or "ur name is" in low:
        name = low.split(" is ", 1)[-1].strip().strip(".?! ")
        if name:
            fact_json = json.dumps({"entity": "agent", "attribute": "name", "value": name})
            mem_add(fact_json, kind="fact")

    # For other facts, like a brother's name
    if "my brother name is" in low:
        name = low.split("my brother name is", 1)[-1].strip().strip(".?! ")
        if name:
            fact_json = json.dumps({"entity": "user's brother", "attribute": "name", "value": name})
            mem_add(fact_json, kind="fact")

    return {"log": log}

def finalizer(state: GraphState) -> GraphState:
    final_answer = state.get("draft") or "[Agent had no result]"
    return {**state, "final": final_answer}


In [43]:
# ------------------------
# Graph assembly
# ------------------------

def run_agent_once(user_input: str) -> Dict[str, Any]:
    global _vectorstore
    _vectorstore = Chroma(
        collection_name=MEM_COLLECTION,
        embedding_function=_embedding_fn,
        persist_directory=PERSIST_DIR
    )
    graph = build_graph()
    # ✅ CHANGE: Initialize the log in the state
    state: GraphState = {"user_input": user_input, "reflections": 0, "log": []}
    result = graph.invoke(state)
    return result

    
# Place this with your _should_reflect function
# ✅ THIS IS THE MISSING FUNCTION
def _should_reflect(state: GraphState) -> str:
    if state.get("final"):
        return "approved"
    if int(state.get("reflections", 0)) >= MAX_REFLECTIONS:
        return "give_up"
    return "replan"

def should_plan_or_finish(state: GraphState) -> str:
    """Decides whether to plan or end the process."""
    if state.get("final"):
        # A direct answer was found by the previous node.
        return "finish"
    else:
        # No direct answer, so we must proceed to planning.
        return "plan"
        
# Replace your build_graph function with this final version

def build_graph():
    workflow = StateGraph(GraphState)

    # Add the new node
    workflow.add_node("direct_answer", node_direct_answer)
    
    workflow.add_node("planner", node_planner)
    workflow.add_node("executor", node_executor)
    workflow.add_node("verifier", node_verifier)
    workflow.add_node("finalizer", finalizer)
    workflow.add_node("memory", node_memory_update)

    # The entry point is now our direct_answer node
    workflow.set_entry_point("direct_answer")

    # Add the new conditional edge
    workflow.add_conditional_edges(
        "direct_answer",
        should_plan_or_finish,
        {
            "finish": "memory", # If we have an answer, just save the convo and end.
            "plan": "planner"   # If no answer, start the main planning loop.
        }
    )
    
    # This is the original agent loop
    workflow.add_edge("planner", "executor")
    workflow.add_edge("executor", "verifier")
    workflow.add_conditional_edges(
        "verifier",
        _should_reflect,
        {
            "replan": "planner",
            "approved": "memory",
            "give_up": "finalizer"
        }
    )
    workflow.add_edge("finalizer", "memory")
    workflow.add_edge("memory", END)

    return workflow.compile()

In [44]:
# ------------------------
# Public API
# ------------------------
def run_agent_once(user_input: str) -> Dict[str, Any]:
    global _vectorstore
    
    # ✅ THE FIX: Create a new Chroma instance, loading the latest data from disk.
    # Assign it to the global variable so all your node functions can use it.
    _vectorstore = Chroma(
        collection_name=MEM_COLLECTION,
        embedding_function=_embedding_fn,
        persist_directory=PERSIST_DIR
    )
    
    graph = build_graph()
    state: GraphState = {"user_input": user_input, "reflections": 0}
    result = graph.invoke(state)
    return result


if __name__ == "__main__":
    if not os.environ.get("GROQ_API_KEY"):
        print("[WARN] GROQ_API_KEY is not set. Set it before running for LLM calls.")
    demo_q = "Give me 3 bullet points on why AI agents are useful for students."
    out = run_agent_once(demo_q)
    print("\n=== FINAL ANSWER ===\n", out.get("final"))
    print("\n--- Debug state keys ---\n", list(out.keys()))

🧠 Memory Add Request Sent: 'user: Give me 3 bullet points on why AI agents are useful for stud...'
🧠 Memory Add Request Sent: 'agent: <think>
Okay, the user is asking "who is Sahil Mulla" after ...'

=== FINAL ANSWER ===
 <think>
Okay, the user is asking "who is Sahil Mulla" after I provided bullet points on AI agents for students. Let me check the memory context.

Looking back, there's a memory entry about Sahil Mulla related to a Network Security Monitoring System project from 2022–2023. The details include his work on session tracking, user management, key logging, and a secure workspace. Also, his contact info: phone +91 9152754549 and email sahilmulla9152@gmail.com. There's a feedback note mentioning a possible name discrepancy with 'Sahil M' in a podcast URL, but it's unclear if they're the same person.

The user might be asking about the Sahil Mulla mentioned in the document. Since the memory has structured facts about him, I should use that. The answer should summarize his proj

In [45]:
# =======================
# Chat Loop for Agent
# =======================
def chat_loop():
    print("🤖 Agent ready! Type 'exit' to quit.\n")
    while True:
        user_input = input("You: ")
        if user_input.strip().lower() in ["exit", "quit", "q"]:
            print("👋 Goodbye!")
            break

        result = run_agent_once(user_input)
        
        # ✅ CHANGE: Added the block to print the log
        print("\n--- 🕵️ Agent's Thought Process ---")
        log = result.get("log", [])
        for step in log:
            print(f"➡️ {step}")
        print("---------------------------------")
        
        print("\nAgent:\n", result.get("final", "[No final output]"))

        recalls = mem_recall(user_input, k=2)
        if recalls:
            print("\n💾 Memory recall:\n", "\n".join(recalls))

# === Run chat ===
if __name__ == "__main__":
    chat_loop()


🤖 Agent ready! Type 'exit' to quit.



You:  quit


👋 Goodbye!


In [46]:
# The text of your story
knowledge_base_text = """
"Moonlit Night"

The stars shine bright in the midnight sky
A gentle breeze whispers by
The world is hushed, in quiet sleep
As the moon's soft light begins to creep

The shadows dance upon the wall
A midnight serenade, for one and all
The moon's sweet beams, illuminate the night
A peaceful scene, a wondrous sight.
"""

# The Fix: Use a relative path to save the file in /kaggle/working/
file_path = "Moonlit.txt"
with open(file_path, "w") as f:
    f.write(knowledge_base_text)

print(f"✅ '{file_path}' created successfully in /kaggle/working/.")


✅ 'Moonlit.txt' created successfully in /kaggle/working/.


In [53]:
import gradio as gr
import markdown

# Function to handle file ingestion
import os
import traceback

def upload_file(files):
    # This function will now receive a list of files
    if not files:
        return "⚠️ Please upload at least one file."

    results = []
    # Loop through each file in the list
    for file in files:
        file_path = file.name
        try:
            # Call the ingestion function for each file
            status = ingest_knowledge_base(file_path)
            results.append(status)
        except Exception as e:
            error_msg = "".join(traceback.format_exception_only(type(e), e))
            print("❌ Ingestion failed:", error_msg)
            results.append(f"❌ Error ingesting {os.path.basename(file_path)}: {error_msg}")
    
    # Return a single string with the status of all files
    return "\n".join(results)



# Function for chat
def run_agent_for_ui(user_input, history):
    print(f"Received input: {user_input}")
    result = run_agent_once(user_input)

    # Format output
    log_steps = result.get("log", [])
    final_answer = result.get("final", "I'm sorry, I encountered an error.")
    
    log_html = "<details><summary>🕵️ Click to see Agent's Thought Process</summary><ul>"
    for step in log_steps:
        safe_step = markdown.markdown(step.replace("\n", "<br>"))
        log_html += f"<li>{safe_step}</li>"
    log_html += "</ul></details>"

    return f"{final_answer}\n\n{log_html}"

print("Launching Gradio UI...")

# Build UI with file upload + chat
with gr.Blocks() as demo:
    gr.Markdown("## 📘 General AI Agent with Custom Knowledge Base")
    
    with gr.Tab("📂 Upload File"):
        file_input = gr.File(label="Upload documents", file_types=[".txt", ".pdf", ".csv", ".xlsx", ".pptx"], file_count="multiple")
        upload_output = gr.Textbox(label="Ingestion Status")
        file_input.change(upload_file, inputs=file_input, outputs=upload_output)
    
    with gr.Tab("💬 Chat with Agent"):
        gr.ChatInterface(
            fn=run_agent_for_ui,
            title="Chat with AI Agent",
            description="You can now query on your uploaded file."
        )

demo.launch(share=True, debug=True)


Launching Gradio UI...


  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://81a9c0827d7643b215.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


📂 Ingesting file: /tmp/gradio/2e753e19037bfac12b4cfef9d06ded1c984bd699c2d97669576e097961227428/2.pdf
✅ File split into 154 chunks.
✅ Knowledge base updated and saved.
📂 Ingesting file: /tmp/gradio/2e753e19037bfac12b4cfef9d06ded1c984bd699c2d97669576e097961227428/2.pdf
✅ File split into 154 chunks.
✅ Knowledge base updated and saved.
📂 Ingesting file: /tmp/gradio/7cd182f8f41b1b58b775ce1d685121caf257ad283e9e64b25b6369f3e9d49278/asjjsjsjs 1.txt
✅ File split into 1 chunks.
✅ Knowledge base updated and saved.
📂 Ingesting file: /tmp/gradio/7cd182f8f41b1b58b775ce1d685121caf257ad283e9e64b25b6369f3e9d49278/asjjsjsjs.txt
✅ File split into 1 chunks.
✅ Knowledge base updated and saved.
📂 Ingesting file: /tmp/gradio/2e753e19037bfac12b4cfef9d06ded1c984bd699c2d97669576e097961227428/2.pdf
✅ File split into 154 chunks.
✅ Knowledge base updated and saved.
📂 Ingesting file: /tmp/gradio/7cd182f8f41b1b58b775ce1d685121caf257ad283e9e64b25b6369f3e9d49278/asjjsjsjs 1.txt
✅ File split into 1 chunks.
✅ Knowledge 

