In [None]:
from typing import TypedDict
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import(
    PyPDFLoader, TextLoader, UnstructuredWordDocumentLoader, ArxivLoader
)
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import StateGraph, END

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()  # Loads .env file into environment
api_key = os.getenv("OPENAI_API_KEY")

print("‚úÖ API key loaded:", bool(api_key))

True

In [None]:
os.makedirs("./notebooks/uploads", exist_ok=True)
os.makedirs("./notebooks/vectorstore", exist_ok=True)

In [None]:
orchestrator = ChatOpenAI(model="gpt-4o-mini", temperature=0)
plan_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3)
draft_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)
critic_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
final_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.5)

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectordb = Chroma(persist_directory="./vectorstore", embedding_function=embeddings)

In [None]:
class AgentState(TypedDict):
    query: str
    intent: str
    context: str
    plan: str
    draft: str
    critique: str
    final: str
    research_mode: str 

In [None]:
# Guardrails
FORBIDDEN_TOPICS = ["politics", "religion", "violence", "illegal", "personal"]

def is_out_of_scope(query: str) -> bool:
    q = query.lower()
    return any(t in q for t in FORBIDDEN_TOPICS)

SYSTEM_ORCHESTRATOR = """
You are the Orchestrator Agent.
Classify the user's query as:
- 'general' ‚Üí for simple factual questions.
- 'research' ‚Üí for analytical or academic topics.
- 'blocked' ‚Üí if unrelated to factual or academic work.
Respond ONLY with one word: general, research, or blocked.
"""

SYSTEM_ANALYZER = """
You are the Analyzer Agent.
Search for factual and academic information, summarize objectively,
and include short source indicators like (source: arxiv, local doc, or web).
"""

SYSTEM_PLANNER = """
You are the Plan Writer Agent.
Create a clear academic outline (Introduction, Body, Conclusion)
based on the given question and retrieved context.
Do NOT write the essay ‚Äî only the plan.
"""

SYSTEM_WRITER = """
You are the Draft Writer Agent.
Expand the plan into a coherent, well-structured essay (400‚Äì600 words)
with academic tone, logical flow, and factual precision.
"""

SYSTEM_CRITIC = """
You are the Critic Agent.
Review the essay for clarity, coherence, structure, and evidence quality.
Offer concise suggestions for improvement (under 150 words).
"""

SYSTEM_FINALIZER = """
You are the Final Drafter Agent.
Polish the essay for grammar, tone, and academic conciseness.
Ensure clear formatting, strong argumentation, and no redundancy.
"""

def safe_invoke(llm, query: str, system_role: str, context: str = "") -> str:
    """Unified LLM call enforcing safety and academic tone."""
    system_map = {
        "orchestrator": SYSTEM_ORCHESTRATOR,
        "analyzer": SYSTEM_ANALYZER,
        "planner": SYSTEM_PLANNER,
        "writer": SYSTEM_WRITER,
        "critic": SYSTEM_CRITIC,
        "finalizer": SYSTEM_FINALIZER,
    }

    system_prompt = system_map.get(system_role.lower(), system_role)
    prompt = f"""
System Role:
{system_prompt}

Context:
{context}

User query:
{query}

Respond academically, factually, and concisely.
    """
    return llm.invoke(prompt).content

In [None]:
def load_uploaded_documents(upload_folder="./notebooks/uploads"):
    """Loads and embeds uploaded PDFs, TXTs, DOCXs."""
    print("üìÇ Loading uploaded documents...")
    files = [f for f in os.listdir(upload_folder) if f.lower().endswith((".pdf", ".txt", ".docx"))]
    docs = []
    for file in files:
        path = os.path.join(upload_folder, file)
        if file.endswith(".pdf"):
            loader = PyPDFLoader(path)
        elif file.endswith(".txt"):
            loader = TextLoader(path)
        else:
            loader = UnstructuredWordDocumentLoader(path)
        loaded = loader.load()
        vectordb.add_documents(filter_complex_metadata(loaded))
        docs.extend(loaded)
    print(f"‚úÖ {len(docs)} uploaded docs indexed.")
    return docs


def route_query(state: AgentState):
    query = state.get("query", "")
    if is_out_of_scope(query):
        state["intent"] = "blocked"
        state["final"] = "‚ö†Ô∏è This question is out of academic scope."
    return state

    state["intent"] = safe_invoke(orchestrator, query, "orchestrator").strip().lower()
    print(f"üß≠ Intent classified as: {state['intent']}")
    return state



def general_answer(state: AgentState):
    query = state["query"]
    state["final"] = safe_invoke(plan_llm, query, "General academic explanation")
    print("üí¨ General answer complete.")
    return state


def analyzer_collect(state: AgentState):
    """Academic content from Arxiv (main)"""
    query = state["query"]
    print("üîç Searching arXiv for relevant preprints...")
    try:
        loader = ArxivLoader(query=query, load_max_docs=3)
        docs = loader.load()
        if not docs:
            state["context"] = "No academic sources found."
            return state
        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        chunks = splitter.split_documents(docs)
        vectordb.add_documents(chunks)
        retriever = vectordb.as_retriever(search_kwargs={"k": 5})
        top_docs = retriever.invoke(query)
        state["context"] = "\n\n".join([d.page_content for d in top_docs])
        print(f"üìö {len(chunks)} arXiv chunks added.")
    except Exception as e:
        state["context"] = f"Error loading Arxiv: {e}"
    return state

def local_doc_search(state: AgentState):
    query = state["query"]
    print("üìÅ Searching uploaded docs...")
    retriever = vectordb.as_retriever(search_kwargs={"k": 5})
    docs = retriever.invoke(query)
    state["context"] = "\n\n".join([d.page_content for d in docs])
    print(f"üìò Retrieved {len(docs)} local docs.")
    return state


def list_documents(state: AgentState):
    """Return a list of the most relevant document summaries."""
    query = state.get("query", "")
    print("üìë Retrieving top documents...")
    retriever = vectordb.as_retriever(search_kwargs={"k": 5})
    docs = retriever.invoke(query)

    if not docs:
        state["final"] = "‚ö†Ô∏è No relevant documents found."
        return state

    summaries = "\n\n".join([
        f"üìÑ {i+1}. {d.page_content[:300]}..."
        for i, d in enumerate(docs)
    ])
    state["final"] = f"Top 5 relevant documents:\n\n{summaries}"
    print("üìÑ Generated top 5 relevant document list.")
    return state

def plan_writer(state: AgentState):
    query, context = state["query"], state.get("context", "")
    state["plan"] = safe_invoke(plan_llm, query, "Academic plan generator", context)
    print("üìù Plan written.")
    return state


def draft_writer(state: AgentState):
    plan = state["plan"]
    state["draft"] = safe_invoke(draft_llm, plan, "Essay writer")
    print("‚úçÔ∏è Draft complete.")
    return state


def critic_agent(state: AgentState):
    draft = state["draft"]
    state["critique"] = safe_invoke(critic_llm, draft, "Academic critic")
    print("üßæ Critique done.")
    return state


def final_drafter(state: AgentState):
    draft = state["draft"]
    state["final"] = safe_invoke(final_llm, draft, "Academic finalizer")
    vectordb.add_texts([state["final"]])
    print("‚úÖ Final draft ready.")
    return state

In [None]:
# --- Graph Definition ---
graph = StateGraph(AgentState)

# Existing nodes
graph.add_node("route_query", route_query)
graph.add_node("general_answer", general_answer)
graph.add_node("analyzer_collect", analyzer_collect)
graph.add_node("local_doc_search", local_doc_search)
graph.add_node("list_documents", list_documents)
graph.add_node("plan_writer", plan_writer)
graph.add_node("draft_writer", draft_writer)
graph.add_node("critic_agent", critic_agent)
graph.add_node("final_drafter", final_drafter)

def route_decision(state):
    intent = state.get("intent", "")
    query = state.get("query", "").lower()

    if intent == "blocked":
        return END
    elif intent == "general":
        return "general_answer"
    elif "list" in query or "show" in query or "papers" in query:
        return "list_documents"
    elif os.listdir("./notebooks/uploads"):
        return "local_doc_search"
    else:
        return "analyzer_collect"

graph.add_conditional_edges(
    "route_query",
    route_decision,
    {
        "general_answer": "general_answer",
        "list_documents": "list_documents",
        "local_doc_search": "local_doc_search",
        "analyzer_collect": "analyzer_collect",
    },
)


graph.add_edge("local_doc_search", "list_documents")
graph.add_edge("analyzer_collect", "list_documents")
graph.add_edge("list_documents", "plan_writer")
graph.add_edge("plan_writer", "draft_writer")
graph.add_edge("draft_writer", "critic_agent")
graph.add_edge("critic_agent", "final_drafter")
graph.add_edge("final_drafter", END)
graph.add_edge("general_answer", END)

graph.set_entry_point("route_query")
app = graph.compile()
app


In [None]:
if __name__ == "__main__":
    load_uploaded_documents("./notebooks/uploads")
    query = input("üîç Enter your academic question: ")
    state = {"query": query}
    final_state = app.invoke(state)
    print("\nüéì --- FINAL OUTPUT ---")
    print(final_state["final"])

In [None]:
state = {"query": "Who developed the first generative AI model?"}
final_state = app.invoke(state)
print("\n--- RESULT 1: General Question ---")
print(final_state["final"])

In [None]:
state = {"query": "5 Researches about GenAI and Agentic AI"}
final_state = app.invoke(state)
print("\n--- RESULT 2: Research Question ---")
print(final_state["final"])

In [None]:
state = {"query": "List 5 papers about large language models and reinforcement learning"}
final_state = app.invoke(state)
print("\n--- RESULT 3: List Documents ---")
print(final_state["final"])