## Loading the Dataset

In [1]:
import json

file_path = "HDFC_Faq.txt"

with open(file_path, "r", encoding="utf-8") as f:
    data = json.load(f)

print("Number of items loaded:", len(data))

print(data[0]["question"])
print(data[0]["answer"])

Number of items loaded: 2236
How do I change my password?
After you have logged in, you can change your password using the "Change password" option in the top part of the screen. You have to type your current password and the new password you have chosen in their respective boxes.


In [2]:
from dotenv import load_dotenv

load_dotenv()

True

## LLM setup and Imports

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List, Dict, Any

# Initialize Gemini LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.7
)

print("✓ Gemini API initialized successfully")

  from .autonotebook import tqdm as notebook_tqdm


✓ Gemini API initialized successfully


## Vector Store for FAQ Retrieval

In [43]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'}
)

# SINGLE STORE
texts = []
metadata_list = []

for item in data:
    question = item["question"]
    answer = item["answer"]

    texts.append(question)  # embed ONLY the question

    metadata_list.append({
        "question": question,
        "answer": answer,     # store answer here
        "full_block": f"Question: {question}\nAnswer: {answer}"
    })

vector_store = FAISS.from_texts(
    texts=texts,
    embedding=embeddings,
    metadatas=metadata_list
)

print("✓ FAISS store created (question-only embeddings, full Q&A metadata)")

✓ FAISS store created (question-only embeddings, full Q&A metadata)


In [44]:
def support_retriever(query: str, k: int = 5):
    """
    Pure retriever for Support Agent.
    - Uses FAISS question-only embeddings.
    - Returns similarity scores (0-1).
    - Returns full Q&A from metadata.
    - Does NOT decide: no escalation, no validity check.
    
    The agent workflow will use:
      - similarity
      - question
      - answer
      - full_block
    """

    # Retrieve top-k matches with FAISS similarity
    docs = vector_store.similarity_search_with_relevance_scores(query, k=k)

    results = []
    for doc, score in docs:
        results.append({
            "query": query,
            "question": doc.metadata["question"],
            "answer": doc.metadata["answer"],
            "full_block": doc.metadata["full_block"],
            "similarity": float(score)  # higher = better
        })

    return {
        "query": query,
        "k": k,
        "results": results
    }

In [45]:
query = "How do I change my password?"
response = support_retriever(query)

print(response)


{'query': 'How do I change my password?', 'k': 5, 'results': [{'query': 'How do I change my password?', 'question': 'How do I change my password?', 'answer': 'After you have logged in, you can change your password using the "Change password" option in the top part of the screen. You have to type your current password and the new password you have chosen in their respective boxes.', 'full_block': 'Question: How do I change my password?\nAnswer: After you have logged in, you can change your password using the "Change password" option in the top part of the screen. You have to type your current password and the new password you have chosen in their respective boxes.', 'similarity': 1.0}, {'query': 'How do I change my password?', 'question': 'How do I reset my password?', 'answer': 'An immediate email will be sent to you after the card is issued to you for setting up your password. For resetting or changing the password you will have to login to www.timespoint.com and visit the settings ta

## Langgraph Implementation

In [46]:
from typing import TypedDict, Optional

class AgentState(TypedDict, total=False):
    user_query: str
    retrieval: Dict[str, Any]
    k: int
    top_doc: Dict[str, Any]
    status: str
    message: str
    escalation_reason: Optional[str]
    email_sent: bool

def send_email(to: str, subject: str, body: str) -> None:
    """
    Placeholder email sender.
    Replace with real integration (SMTP, SendGrid, etc.).
    """
    print(f"[EMAIL] To: {to}\nSubject: {subject}\n\n{body}\n")


In [47]:
def node_retrieve(state: AgentState) -> AgentState:
    query = state["user_query"]
    k = state.get("k", 3)   # Always start at 3
    retrieval = support_retriever(query, k=k)

    top_doc = retrieval["results"][0] if retrieval["results"] else None

    return {
        "retrieval": retrieval,
        "k": k,
        "top_doc": top_doc,
    }


In [48]:
INVALID_PROMPT = """
You are a polite support assistant.

A user asked the following question:
"{query}"

You checked the FAQ knowledge base and found that this question does NOT match your support domain.

Your task:
- Politely inform the user the question is outside your support scope.
- Encourage them to rephrase OR contact human support.
- Keep the response short, friendly, and helpful.
"""

def node_check_validity(state: AgentState) -> AgentState:
    top = state.get("top_doc")

    # No results at all
    if not top:
        msg = llm.invoke(INVALID_PROMPT.format(query=state["user_query"]))
        return {
            "status": "invalid",
            "message": msg
        }

    similarity = top["similarity"]

    # Hard threshold = invalid / out-of-domain
    if similarity < 0.35:
        user_q = state["user_query"]
        msg = llm.invoke(INVALID_PROMPT.format(query=user_q))
        return {
            "status": "invalid",
            "message": msg
        }

    # Valid domain → continue normal flow
    return {"status": "valid"}


In [49]:
def node_check_docs_enough(state: AgentState) -> AgentState:
    top = state["top_doc"]
    similarity = top["similarity"]
    k = state["k"]

    # If similarity is low-ish and we haven't looked deep enough, ask to expand
    if similarity < 0.60 and k < 10:
        return {
            "status": "need_more_docs"
        }

    return {
        "status": "docs_ok"
    }

def node_expand_retrieval(state: AgentState) -> AgentState:
    query = state["user_query"]
    new_k = 10  # expanded search

    retrieval = support_retriever(query, k=new_k)
    top_doc = retrieval["results"][0] if retrieval["results"] else None

    return {
        "retrieval": retrieval,
        "k": new_k,
        "top_doc": top_doc,
        "status": "docs_expanded"
    }


In [50]:
RELEVANCE_PROMPT = """
You are a relevance classifier for a banking support system.

Check if the following FAQ answer is truly relevant to the user's query.

User Query:
"{query}"

FAQ:
"{faq}"

Respond with ONLY one of these:
"relevant"
"not_relevant"
"""

def node_check_relevance(state: AgentState) -> AgentState:
    query = state["user_query"]
    faq_block = state["top_doc"]["full_block"]

    judgment = llm.invoke(
        RELEVANCE_PROMPT.format(query=query, faq=faq_block)
    ).strip().lower()

    if judgment == "not_relevant":
        # CALM fallback, not critical
        return {
            "status": "irrelevant",
            "message": "I found related information but it does not seem to answer your specific question. Could you clarify a bit more?"
        }

    return {
        "status": "relevant"
    }


In [51]:
CRITICALITY_PROMPT = """
You are a BANKING RISK classifier.

Your ONLY job is to decide if the user query describes a high-risk scenario.

High-risk scenarios include:
- stolen card
- hacked account
- unauthorized transactions
- fraud or scam attempts
- suspicious money movement
- emergency financial danger
- security breach

Non-risk (NOT critical) examples:
- questions about loans
- questions about repayment
- password reset
- login issues
- interest rates
- how-to questions
- general banking queries
- account information

User Query:
"{query}"

FAQ Retrievd:
"{faq}"

Respond with ONLY one word:
critical
non_critical
"""



def node_check_critical(state: AgentState) -> AgentState:
    query = state["user_query"]
    top = state["top_doc"]
    faq_block = top["full_block"]

    ai_msg = llm.invoke(
        CRITICALITY_PROMPT.format(
            query=query,
            faq=faq_block
        )        
    )

    # Extract text from the AIMessage
    judgment = ai_msg.content.strip().lower()

    
    if judgment == "critical":
        return {
            "status": "critical",
            "escalation_reason": "Detected real risk or emergency."
        }

    return {
        "status": "non_critical"
    }


In [52]:
def node_send_email(state: AgentState) -> AgentState:
    top = state.get("top_doc")
    query = state["user_query"]

    subject = "Escalated Support Issue from Assistant"
    body = (
        f"User query:\n{query}\n\n"
        f"Top matched FAQ (for context):\n\n"
        f"{top['full_block'] if top else 'No FAQ match'}\n\n"
        f"Escalation reason: {state.get('escalation_reason', 'Not provided')}"
    )

    send_email("rahul@gmail.com", subject, body)

    return {
        "email_sent": True,
        "message": "This looks critical. I’ve forwarded your issue to a human support specialist.",
    }


In [53]:
ANSWER_PROMPT = """
You are a support Q/A assistant for HDFC bank strictly answering based on the retrieved FAQ content.

User Query:
"{query}"

Relevant FAQ Information:
"{faq}"

Your task:
- Provide a clear, friendly answer to the user.
- ONLY use information found inside the FAQ above.
- DO NOT guess, assume, or create new facts.
- If the FAQ does not fully answer the question, say so politely.

Now write the final answer:
"""

def node_answer(state: AgentState) -> AgentState:
    top = state.get("top_doc")

    if not top:
        return {
            "status": "invalid",
            "message": "I couldn't find a suitable answer to your question."
        }

    query = state["user_query"]
    faq_block = top["full_block"]

    # Create grounded response
    final_answer = llm.invoke(
        ANSWER_PROMPT.format(
            query=query,
            faq=faq_block
        )
    ).content.strip()

    return {
        "status": "answered",
        "message": final_answer
    }



In [54]:
from langgraph.graph import StateGraph, END

graph = StateGraph(AgentState)

graph.add_node("retrieve", node_retrieve)
graph.add_node("check_validity", node_check_validity)
graph.add_node("check_critical", node_check_critical)
graph.add_node("check_docs_enough", node_check_docs_enough)
graph.add_node("expand_retrieval", node_expand_retrieval)
graph.add_node("send_email", node_send_email)
graph.add_node("answer", node_answer)

graph.set_entry_point("retrieve")

# retrieve → check_validity
graph.add_edge("retrieve", "check_validity")

# validity routing
def route_validity(state):
    return "invalid" if state.get("status") == "invalid" else "valid"

graph.add_conditional_edges(
    "check_validity",
    route_validity,
    {
        "invalid": END,
        "valid": "check_critical",      # reordered
    }
)

# critical routing
def route_critical(state):
    return "critical" if state.get("status") == "critical" else "non_critical"

graph.add_conditional_edges(
    "check_critical",
    route_critical,
    {
        "critical": "send_email",       # emergency → escalate
        "non_critical": "check_docs_enough",  # continue
    }
)

# docs enough routing
def route_docs(state):
    if state.get("status") == "need_more_docs":
        return "expand"
    return "ok"

graph.add_conditional_edges(
    "check_docs_enough",
    route_docs,
    {
        "expand": "expand_retrieval",
        "ok": "answer",
    }
)

# after expansion → answer
graph.add_edge("expand_retrieval", "answer")

# ends
graph.add_edge("send_email", END)
graph.add_edge("answer", END)

agent = graph.compile()


In [55]:
with open("agent_workflow.png", "wb") as f:
    f.write(agent.get_graph().draw_mermaid_png())

In [56]:
initial_state = {
    "user_query": "How do I take a new loan"
}

final_state = agent.invoke(initial_state)

print("\n=== AGENT RESPONSE ===")
print(final_state["message"])



=== AGENT RESPONSE ===
I understand you're looking to know how to take a new loan. However, the information I have available only details how to repay an existing home loan.

Therefore, I cannot provide details on how to take a new loan based on the provided FAQ.


In [57]:
print("Chatbot ready! Type 'exit' to quit.\n")

while True:
    user_input = input("You: ")

    # Exit condition
    if user_input.lower().strip() == "exit":
        print("Goodbye!")
        break

    # Build state for agent
    state = {
        "user_query": user_input,
        "k": 3
    }

    # Run LangGraph agent
    final_state = agent.invoke(state)

    # Print response
    print("\nHuman:", user_input)
    print("Agent:", final_state.get("message", "(no message)"))
    print()


Chatbot ready! Type 'exit' to quit.


Human: what's the home loan policy
Agent: The provided FAQ information states that "No policy loans are available for this product." It does not contain details regarding the bank's general home loan policy.


Human: i have forgotten my password
Agent: If you have forgotten your password, you can re-generate it using our Instant IPIN facility online.


Human: who do i reset my password
Agent: To reset your password, please log in to www.timespoint.com and visit the settings tab. If you have forgotten your password, you can use the "Forget Password" option, and a password reset email will be sent to you.


Human: How do I change my password
Agent: To change your password, first log in to your account. After logging in, you can use the "Change password" option, which is located in the top part of the screen. You will need to type your current password and then enter the new password you have chosen in their respective boxes.

Goodbye!
