In [29]:
!pip install -U requests==2.32.5 --quiet
!pip install -U "langgraph>=0.2.26"  --quiet
!pip install langchain langchain-community sentence-transformers faiss-cpu transformers accelerate torch --quiet
!python -m pip show langgraph


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain 0.3.27 requires langchain-core<1.0.0,>=0.3.72, but you have langchain-core 1.0.4 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langgraph-prebuilt 1.0.2 requires langchain-core>=1.0.0, but you have langchain-core 0.3.79 which is incompatible.[0m[31m
[0mName: langgraph
Version: 1.0.2
Summary: Building stateful, multi-actor applications with LLMs
Home-page: 
Author: 
Author-email: 
License: 
Location: /usr/local/lib/python3.12/dist-packages
Requires: langchain-core, langgraph-checkpoint, langgraph-prebuilt, langgraph-sdk, pydantic, xxhash
Required-by: 


In [30]:
from typing import TypedDict
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langgraph.graph import StateGraph
from langgraph.checkpoint.memory import MemorySaver
from datetime import datetime
import pandas as pd



notes_db = [
    {"id": 1, "tags": ["AI", "LangGraph"], "text": "LangGraph lets you build agent workflows as graphs with memory and control flow."},
    {"id": 2, "tags": ["AI"], "text": "Retrieval-Augmented Generation (RAG) connects LLMs to external knowledge to reduce hallucinations."},
    {"id": 3, "tags": ["Python"], "text": "FastAPI is a fast, modern framework for building web APIs in Python."},
    {"id": 4, "tags": ["AI", "Embeddings"], "text": "Sentence-Transformers provide high-quality text embeddings for semantic search."}
]


In [31]:
# Initialize embedding model wrapper for LangChain
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

texts = [n["text"] for n in notes_db]
metas = [{"tags": n["tags"], "id": n["id"]} for n in notes_db]

# Build FAISS vector store (LangChain handles embeddings internally)
vectorstore = FAISS.from_texts(texts, embedding=embedding_model, metadatas=metas)
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

In [32]:
from langgraph.graph import StateGraph
from typing import TypedDict

class RAGState(TypedDict):
    query: str
    context: str
    answer: str

builder = StateGraph(state_schema=RAGState)

def retriever_node(state):
    docs = retriever.get_relevant_documents(state["query"])
    state["context"] = "\n".join([d.page_content for d in docs])
    return state

def summarizer_node(state):
    prompt = f"Answer concisely using only this context:\n{state['context']}\n\nQuestion: {state['query']}\nAnswer:"
    response = llm(prompt, max_new_tokens=150)[0]["generated_text"]
    state["answer"] = response
    return state

builder.add_node("retriever", retriever_node)
builder.add_node("summarizer", summarizer_node)
builder.add_edge("retriever", "summarizer")
builder.set_entry_point("retriever")
builder.set_finish_point("summarizer")

checkpointer = MemorySaver()

app = builder.compile(checkpointer=checkpointer)


In [41]:
from datetime import datetime
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import json

# same embedding model you used for FAISS
semantic_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


metrics = []

def run_query(query, session_id="default"):
  """Run a quey through RAG graph with session memory + metrics tracking"""
  config = {"configurable":{"thread_id": session_id}}
  state = app.invoke({"query": query, "context": "", "answer": ""}, config=config)

  # Print human-readable response
  print(f"\nüß† Session: {session_id}")
  print(f"‚ùì Query: {query}")
  print(f"üí¨ Answer: {state['answer'][:250]}\n")

  # completeness = round(min(1.0, len(state["context"].split()) / (len(query.split()) * 5)), 2)
  # faithfulness = 1.0 if any(w in state["answer"].lower() for w in query.lower().split()[:2]) else 0.8
  # mismatch = 1-faithfulness

  query_emb = semantic_model.encode(query, convert_to_tensor=True)
  answer_emb = semantic_model.encode(state["answer"], convert_to_tensor=True)
  context_emb = semantic_model.encode(state["context"], convert_to_tensor=True)

  # Faithfulness ‚Üí how close the answer is to the retrieved context
  faithfulness = float(util.cos_sim(answer_emb, context_emb))
  # Retrieval relevance ‚Üí how close the retrieved context is to the user‚Äôs query
  retrieval_relevance = float(util.cos_sim(query_emb, context_emb))
  # Completeness ‚Üí estimated by context length ratio
  completeness = round(min(1.0, len(state["context"].split()) / (len(query.split()) * 5)), 2)

  print(f"Completeness: {completeness}")
  print(f"Faithfulness: {faithfulness}")
  print(f"Retrieval Relevence: {retrieval_relevance}")

  metrics.append({
      "session": session_id,
      "query": query,
      "completeness": completeness,
      "faithfulness": faithfulness,
      "Retrieval Relevence": retrieval_relevance,
      "timestamp": datetime.now().isoformat()
  })

  print(json.dumps(metrics))

  return state


In [42]:
run_query("What is LangGraph?", "user001")
run_query("How does RAG reduce hallucinations?", "user001")
run_query("Explain FastAPI briefly", "user001")



üß† Session: user001
‚ùì Query: What is LangGraph?
üí¨ Answer: LangGraph lets you build agent workflows as graphs with memory and control flow. Sentence-Transformers provide high-quality text embeddings for semantic search

Completeness: 1.0
Faithfulness: 0.996911883354187
Retrieval Relevence: 0.4405069947242737
[{"session": "user001", "query": "What is LangGraph?", "completeness": 1.0, "faithfulness": 0.996911883354187, "Retrieval Relevence": 0.4405069947242737, "timestamp": "2025-11-08T08:10:27.510445"}]

üß† Session: user001
‚ùì Query: How does RAG reduce hallucinations?
üí¨ Answer: connects LLMs to external knowledge

Completeness: 0.96
Faithfulness: 0.445770263671875
Retrieval Relevence: 0.44643956422805786
[{"session": "user001", "query": "What is LangGraph?", "completeness": 1.0, "faithfulness": 0.996911883354187, "Retrieval Relevence": 0.4405069947242737, "timestamp": "2025-11-08T08:10:27.510445"}, {"session": "user001", "query": "How does RAG reduce hallucinations?", "com

{'query': 'Explain FastAPI briefly',
 'context': 'FastAPI is a fast, modern framework for building web APIs in Python.\nLangGraph lets you build agent workflows as graphs with memory and control flow.',
 'answer': 'FastAPI is a fast, modern framework for building web APIs in Python. LangGraph lets you build agent workflows as graphs with memory and control flow.'}