#### **langgraph_agentic_rag_with_history_aware**

In [None]:
pip install langchain_community langchain langchain_huggingface langchain-core langgraph langchain_google_genai pypdf faiss-cpu

In [None]:
import os
from typing import TypedDict, List, Literal
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph, START, END
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document

# Configuration
DEFAULT_PDF_PATH = "/content/50 LLM Interview Questions.pdf"
FAISS_INDEX_PATH = "./faiss_index"

# Enhanced State Definition
class AgentState(TypedDict):
    messages: list
    mode: str
    pdf_path: str
    response: str
    source_documents: List[Document]

# Initialize LLM with streaming enabled
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-pro",
    temperature=0.3,
    api_key=api_key,
    streaming=True
)

# Initialize embeddings globally
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Node 1: Direct LLM
def direct_llm_node(state: AgentState):
    """Direct interaction with LLM with conversation history"""
    messages = state["messages"]
    response = llm.invoke(messages)

    return {
        "response": response.content,
        "messages": messages + [AIMessage(content=response.content)],
        "source_documents": []
    }

# Node 2: RAG Pipeline with conversation history
def rag_pipeline_node(state: AgentState):
    """Complete RAG pipeline with conversation history support"""
    messages = state["messages"]
    current_query = messages[-1].content

    pdf_path = state.get("pdf_path", DEFAULT_PDF_PATH) or DEFAULT_PDF_PATH

    # Load or create FAISS index
    if os.path.exists(FAISS_INDEX_PATH):
        print("Loading existing FAISS index...")
        vectorstore = FAISS.load_local(
            FAISS_INDEX_PATH,
            embeddings,
            allow_dangerous_deserialization=True
        )
    else:
        print("Creating new FAISS index...")
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=150
        )
        chunks = text_splitter.split_documents(documents)
        vectorstore = FAISS.from_documents(chunks, embeddings)
        vectorstore.save_local(FAISS_INDEX_PATH)
        print(f"FAISS index saved to {FAISS_INDEX_PATH}")

    # Retrieval
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    relevant_docs = retriever.invoke(current_query)
    context = "\n\n".join([doc.page_content for doc in relevant_docs])

    # Build conversation history for context
    conversation_history = []
    for msg in messages[:-1]:  # Exclude current query
        if isinstance(msg, HumanMessage):
            conversation_history.append(f"User: {msg.content}")
        elif isinstance(msg, AIMessage):
            conversation_history.append(f"Assistant: {msg.content}")

    history_text = "\n".join(conversation_history) if conversation_history else "No previous conversation."

    # Enhanced prompt with conversation history
    prompt = f"""Based on the conversation history and context below, answer the current question.

Conversation History:
{history_text}

Context from documents:
{context}

Current Question: {current_query}

Answer (be specific and helpful):"""

    response = llm.invoke(prompt)

    return {
        "response": response.content,
        "messages": messages + [AIMessage(content=response.content)],
        "source_documents": relevant_docs
    }

# Routing Function
def route_query(state: AgentState) -> Literal["direct_llm", "rag_pipeline"]:
    """Route based on mode"""
    if state["mode"] == "rag":
        return "rag_pipeline"
    return "direct_llm"

# Build Graph
workflow = StateGraph(AgentState)
workflow.add_node("direct_llm", direct_llm_node)
workflow.add_node("rag_pipeline", rag_pipeline_node)

workflow.add_conditional_edges(
    START,
    route_query,
    {
        "direct_llm": "direct_llm",
        "rag_pipeline": "rag_pipeline"
    }
)

workflow.add_edge("direct_llm", END)
workflow.add_edge("rag_pipeline", END)

# Compile
app = workflow.compile()

# ==================== CHAT FUNCTION ====================
# Maintain conversation history across multiple turns
conversation_history = []

def chat(query: str, mode: str = "rag", pdf_path: str = ""):
    """
    Chat function that maintains conversation history

    Args:
        query: User's question
        mode: "rag" or "direct"
        pdf_path: Path to PDF (empty for default)

    Returns:
        dict with response and source_documents
    """
    # Add user message to history
    conversation_history.append(HumanMessage(content=query))

    # Invoke with full conversation history
    result = app.invoke({
        "messages": conversation_history,
        "mode": mode,
        "pdf_path": pdf_path,
        "response": "",
        "source_documents": []
    })

    # Add AI response to history
    conversation_history.append(AIMessage(content=result["response"]))

    return result

def reset_conversation():
    """Clear conversation history to start fresh"""
    global conversation_history
    conversation_history = []
    print(" Conversation history cleared!")

In [None]:
# Example 1 - Direct LLM

result0 = chat("What is Generative AI", mode="direct")

print("\nResponse: ", end="")
for char in result0["response"]:
    print(char, end="", flush=True)
print("\n")


In [None]:
# Example 2 - Direct LLM - follow-up question

result00 = chat("How it was different from tradition machine learning?", mode="direct")

print("\nResponse: ", end="")
for char in result00["response"]:
    print(char, end="", flush=True)
print("\n")

#### **RAG**

In [None]:
# Example 1: First RAG query
print("=" * 50)
print("Example 1: RAG Mode (First query)")
print("=" * 50)

result1 = chat("What is Beam Search?", mode="rag")

print("\nResponse: ", end="")
for char in result1["response"]:
    print(char, end="", flush=True)

print("\n\n--- Source Documents ---")
for idx, doc in enumerate(result1["source_documents"], 1):
    print(f"\n Source {idx}:")
    print(f"   Page: {doc.metadata.get('page', 'N/A')}")
    print(f"   Content: {doc.page_content[:250]}...")
    print(f"   Source: {doc.metadata.get('source', 'N/A')}")

In [None]:
# Example 2: Follow-up question (uses conversation history!)
print("\n" + "=" * 50)
print("Example 2: Follow-up Question")
print("=" * 50)

result2 = chat("How it is different from other models?", mode="rag")

print("\nResponse: ", end="")
for char in result2["response"]:
    print(char, end="", flush=True)
print("\n")

print("\n--- Source Documents ---")
for idx, doc in enumerate(result2["source_documents"], 1):
    print(f"\nðŸ“„ Source {idx}:")
    print(f"   Page: {doc.metadata.get('page', 'N/A')}")
    print(f"   Content: {doc.page_content[:250]}...")
    print(f"   Source: {doc.metadata.get('source', 'N/A')}")
