In [11]:
test2 = PyPDFLoader('sample.pdf')

In [14]:
import os
from dotenv import load_dotenv

# load env file if you use .env (optional)
load_dotenv()

from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import HumanMessage

# Load & split
loader = TextLoader("notes.txt")
docs = loader.load()
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

# Embeddings + vectorstore
# <-- no key in code if you use env var
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(chunks, embeddings)

# Retriever & QA chain
retriever = vector_store.as_retriever()

# Pass key here only if you didn't set env var
llm = ChatOpenAI(temperature=0)  

qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

def self_rag_query(question):
    print("First attempt without retrieval")
    # simple way to get answer text (langchain versions differ; 'predict' is simpler)
    try:
        first_answer = llm.predict(f"Q: {question}\nA:")
    except AttributeError:
        # fallback if predict not available in your version
        raw = llm.invoke([HumanMessage(content=f"Q: {question}\nA:")])
        first_answer = raw.content if hasattr(raw, "content") else str(raw)

    print("First answer:", first_answer)

    if "I'm not sure" in first_answer or len(first_answer.strip()) < 30:
        print("Low confidence. Retrieving context and trying again...")
        improved_answer = qa.run(question)
        return improved_answer
    else:
        return first_answer

response = self_rag_query("What is the capital of France?")
print("\nFinal answer:", response)


First attempt without retrieval


  first_answer = llm.predict(f"Q: {question}\nA:")


First answer: The capital of France is Paris.

Final answer: The capital of France is Paris.


In [None]:
import os
from dotenv import load_dotenv

# load env file if you use .env (optional)
load_dotenv()

from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import HumanMessage
from langchain.docstore.document import Document

# Load & split
loader = TextLoader("notes.txt")
docs = loader.load()
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

# Embeddings + vectorstore
# <-- no key in code if you use env var
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(chunks, embeddings)

# Retriever & QA chain
retriever = vector_store.as_retriever()

# Pass key here only if you didn't set env var
llm = ChatOpenAI(temperature=0)  

qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
def corrective_rag(question):
	first_guess =llm.predict (f"Try to answer : {question}")
	docs = [Document(page_content ="The largest cat is a tiger.")]
	db =FAISS.from_documents(docs, embeddings)
	
	qa_chain = RetrievalQA.from_chain_type(llm=llm , retriever =db.as_retriever())
	correction = qa_chain.run(question)
	
	return f"Original answer :{first_guess}\n Corrected using documents: {correction}"
	
	print(corrective_rag("what is the largest cat?"))

In [7]:
import os
import json
import re
from dotenv import load_dotenv

# load env file if you use .env (optional)
load_dotenv()

from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain.docstore.document import Document

# ---------------------------
# Config / API key handling
# ---------------------------
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise RuntimeError(
        "OPENAI_API_KEY not found. Please set it in your environment or in a .env file.\n\n"
        "Example .env content:\n"
        "  OPENAI_API_KEY=sk-your_api_key_here\n"
    )

# ---------------------------
# Corrective RAG System Prompt (JSON output requested)
# Note: all literal braces are doubled so .format() won't treat them as placeholders,
# except {user_query} and {retrieved_context} which we intend to replace.
# ---------------------------
CORRECTIVE_RAG_SYSTEM_PROMPT = """
You are a Corrective RAG system that evaluates retrieved context quality and corrects retrieval when necessary.

Primary Workflow:

Step 1: Context Evaluation
EVALUATE_CONTEXT: Rate the following retrieved context for the given query.

Query: {user_query}
Retrieved Context: {retrieved_context}

Return a JSON object EXACTLY with these fields:
{{
  "relevance": <float 0.0-1.0>,
  "completeness": <float 0.0-1.0>,
  "accuracy": <float 0.0-1.0>,
  "specificity": <float 0.0-1.0>,
  "overall": "<EXCELLENT|GOOD|FAIR|POOR>",
  "justification": "<one-sentence justification>",
  "decision": {{
     "action": "<RETRIEVE_AGAIN|PROCEED_WITH_ANSWER>",
     "new_query": "<refined_query or empty string>",
     "reasoning": "<short reason>",
     "confidence": "<high|medium|low or empty>"
  }}
}}

Important:
- All numeric scores must be between 0.0 and 1.0.
- "overall" must be one of: EXCELLENT, GOOD, FAIR, POOR.
- Fill "new_query" only when action == "RETRIEVE_AGAIN" (otherwise empty string).
- Return ONLY the JSON object (no extra commentary).
"""

# ---------------------------
# Setup: load docs, embeddings, vectorstore
# ---------------------------
loader = TextLoader("notes.txt")  # make sure this file exists
docs = loader.load()
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vector_store = FAISS.from_documents(chunks, embeddings)
retriever = vector_store.as_retriever()

llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0)

# ---------------------------
# Helper: extract JSON substring
# ---------------------------
def extract_json_substring(s: str) -> str:
    brace_stack = []
    start_idx = None
    for i, ch in enumerate(s):
        if ch == "{":
            if start_idx is None:
                start_idx = i
            brace_stack.append(i)
        elif ch == "}":
            if brace_stack:
                brace_stack.pop()
                if not brace_stack and start_idx is not None:
                    return s[start_idx:i + 1]
    m = re.search(r"\{.*\}", s, flags=re.S)
    return m.group(0) if m else ""

# ---------------------------
# Defensive evaluation function
# ---------------------------
def evaluate_retrieved_context_json(user_query: str, retrieved_context: str) -> dict:
    prompt = CORRECTIVE_RAG_SYSTEM_PROMPT.format(
        user_query=user_query.replace('"', "'"),
        retrieved_context=retrieved_context.replace('"', "'")
    ) + "\n\nReturn the JSON now."

    try:
        raw = llm.predict(prompt)
    except Exception as e:
        return {
            "relevance": 0.0,
            "completeness": 0.0,
            "accuracy": 0.0,
            "specificity": 0.0,
            "overall": "POOR",
            "justification": f"LLM call failed: {str(e)}",
            "raw_evaluation": "",
            "decision": {"action": "RETRIEVE_AGAIN", "new_query": "", "reasoning": "LLM call error", "confidence": ""}
        }

    parsed = None
    try:
        parsed = json.loads(raw)
    except Exception:
        candidate = extract_json_substring(raw)
        if candidate:
            try:
                parsed = json.loads(candidate)
            except Exception:
                parsed = None

    if not parsed or not isinstance(parsed, dict):
        return {
            "relevance": 0.0,
            "completeness": 0.0,
            "accuracy": 0.0,
            "specificity": 0.0,
            "overall": "POOR",
            "justification": "Evaluation JSON parse failed. See raw_evaluation for details.",
            "raw_evaluation": raw,
            "decision": {"action": "RETRIEVE_AGAIN", "new_query": "", "reasoning": "Parsing failure", "confidence": ""}
        }

    safe = {}
    for key in ("relevance", "completeness", "accuracy", "specificity"):
        try:
            val = float(parsed.get(key, 0.0))
            safe[key] = max(0.0, min(1.0, val))
        except Exception:
            safe[key] = 0.0

    overall = str(parsed.get("overall", "")).upper()
    safe["overall"] = overall if overall in ("EXCELLENT", "GOOD", "FAIR", "POOR") else "POOR"
    safe["justification"] = str(parsed.get("justification", "")) or ""
    decision = parsed.get("decision", {}) or {}
    safe["decision"] = {
        "action": str(decision.get("action", "RETRIEVE_AGAIN")).upper(),
        "new_query": str(decision.get("new_query", "") or ""),
        "reasoning": str(decision.get("reasoning", "") or ""),
        "confidence": str(decision.get("confidence", "") or "")
    }
    safe["raw_evaluation"] = parsed.get("raw_evaluation", "") or raw
    return safe

# ---------------------------
# Refined-query helper
# ---------------------------
def generate_refined_query(user_query: str, retrieved_context: str) -> str:
    refine_prompt = (
        "You are a query-refinement assistant. Produce a short keyword-focused query (one line) "
        "that will retrieve more relevant documents for the user's intent.\n\n"
        f"User query: {user_query}\n\nRetrieved context:\n{retrieved_context}\n\nRefined query:"
    )
    try:
        out = llm.predict(refine_prompt)
        return out.splitlines()[0].strip()
    except Exception:
        return user_query

# ---------------------------
# Helper to run QA chain and extract result + sources
# ---------------------------
def run_qa_chain(qa_chain, question: str):
    """
    Invoke the chain safely and return tuple (answer_text, source_documents_list)
    """
    # Newer LangChain chains accept a dict input and return a dict of outputs
    outputs = qa_chain.invoke({"query": question})
    # fallback to calling chain as callable if invoke not present
    if outputs is None:
        outputs = qa_chain({"query": question})

    # outputs is a dict; get the main result
    answer = outputs.get("result") or outputs.get("output_text") or outputs.get("answer") or ""
    sources = outputs.get("source_documents") or outputs.get("source_documents", []) or []
    return answer, sources

# ---------------------------
# Main corrective_rag function (fixed to use run_qa_chain)
# ---------------------------
def corrective_rag(question: str, debug=False) -> str:
    hits = retriever.get_relevant_documents(question)
    if not hits:
        return "No documents retrieved to evaluate."

    top_context = "\n\n---\n\n".join([d.page_content for d in hits[:1]])
    eval_json = evaluate_retrieved_context_json(question, top_context)

    if debug and eval_json.get("raw_evaluation"):
        print("DEBUG - raw_evaluation:\n", eval_json.get("raw_evaluation"))

    overall = (eval_json.get("overall") or "POOR").upper()
    relevance = eval_json.get("relevance", 0.0)
    completeness = eval_json.get("completeness", 0.0)
    accuracy = eval_json.get("accuracy", 0.0)
    specificity = eval_json.get("specificity", 0.0)
    justification = eval_json.get("justification", "")

    eval_summary_lines = [
        f"🔍 Context Quality: {overall}",
        f"Relevance Score: {relevance}",
        f"Completeness Score: {completeness}",
        f"Accuracy Score: {accuracy}",
        f"Specificity Score: {specificity}",
        f"Justification: {justification}",
    ]
    eval_block = "\n".join(eval_summary_lines)

    decision = eval_json.get("decision", {}) or {}
    action = (decision.get("action") or "RETRIEVE_AGAIN").upper()
    refined_query_from_decision = decision.get("new_query") or ""
    reasoning = decision.get("reasoning") or ""

    if action == "RETRIEVE_AGAIN" or overall in ("POOR", "FAIR"):
        refined_query = refined_query_from_decision or generate_refined_query(question, top_context)
        reasoning = reasoning or "Retrieved context rated low — re-retrieving with refined query."

        new_hits = retriever.get_relevant_documents(refined_query)
        if not new_hits:
            return (
                eval_block
                + "\n\nACTION: RETRIEVE_AGAIN\nNEW_QUERY: "
                + refined_query
                + "\nREASONING: "
                + reasoning
                + "\n\nNo documents found for the refined query."
            )

        db = FAISS.from_documents(new_hits, embeddings)
        qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever(), return_source_documents=True)

        # Use the safe runner
        final_answer, sources = run_qa_chain(qa_chain, question)

        # Optionally format sources for display
        sources_text = ""
        if sources:
            sources_text = "\n\nSOURCES:\n" + "\n".join(
                [f"- (len={len(getattr(s, 'page_content', ''))}) {getattr(s, 'metadata', {})}" for s in sources]
            )

        return (
            eval_block
            + "\n\nACTION: RETRIEVE_AGAIN\nNEW_QUERY: "
            + refined_query
            + "\nREASONING: "
            + reasoning
            + "\n\nCorrected Answer (from re-retrieved docs):\n"
            + final_answer
            + sources_text
        )

    else:
        qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
        final_answer, sources = run_qa_chain(qa_chain, question)

        sources_text = ""
        if sources:
            sources_text = "\n\nSOURCES:\n" + "\n".join(
                [f"- (len={len(getattr(s, 'page_content', ''))}) {getattr(s, 'metadata', {})}" for s in sources]
            )

        confidence = decision.get("confidence") or "medium"
        return (
            eval_block
            + f"\n\nACTION: PROCEED_WITH_ANSWER\nCONFIDENCE: {confidence}\n\nAnswer:\n"
            + final_answer
            + sources_text
        )

# ---------------------------
# Example usage
# ---------------------------
if __name__ == "__main__":
    q = "what is the largest cat?"
    print("Question:", q)
    print("Running corrective RAG...\n")
    result = corrective_rag(q, debug=True)
    print(result)


Question: what is the largest cat?
Running corrective RAG...

DEBUG - raw_evaluation:
 {
  "relevance": 0.0,
  "completeness": 0.0,
  "accuracy": 0.0,
  "specificity": 0.0,
  "overall": "POOR",
  "justification": "The retrieved context is completely irrelevant to the query.",
  "decision": {
     "action": "RETRIEVE_AGAIN",
     "new_query": "",
     "reasoning": "The current retrieved context does not provide any information related to the query.",
     "confidence": "low"
  }
}
🔍 Context Quality: POOR
Relevance Score: 0.0
Completeness Score: 0.0
Accuracy Score: 0.0
Specificity Score: 0.0
Justification: The retrieved context is completely irrelevant to the query.

ACTION: RETRIEVE_AGAIN
NEW_QUERY: largest feline species size characteristics
REASONING: The current retrieved context does not provide any information related to the query.

Corrected Answer (from re-retrieved docs):
The largest cat is the Siberian tiger, also known as the Amur tiger. They can weigh up to 660 pounds and gro

In [10]:
import os
import json
import re
import math
from dotenv import load_dotenv

# load env file if you use .env (optional)
load_dotenv()

from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document

# ---------------------------
# Config / API key handling
# ---------------------------
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise RuntimeError(
        "OPENAI_API_KEY not found. Please set it in your environment or in a .env file.\n\n"
        "Example .env content:\n"
        "  OPENAI_API_KEY=sk-your_api_key_here\n"
    )

# ---------------------------
# System prompt for corrective evaluation (kept as reference)
# Note: braces doubled so .format() won't treat them as placeholders.
# ---------------------------
CORRECTIVE_RAG_SYSTEM_PROMPT = """
You are a Corrective RAG system that evaluates retrieved context quality and corrects retrieval when necessary.

Primary Workflow:

Step 1: Context Evaluation
EVALUATE_CONTEXT: Rate the following retrieved context for the given query.

Query: {user_query}
Retrieved Context: {retrieved_context}

Return a JSON object EXACTLY with these fields:
{{
  "relevance": <float 0.0-1.0>,
  "completeness": <float 0.0-1.0>,
  "accuracy": <float 0.0-1.0>,
  "specificity": <float 0.0-1.0>,
  "overall": "<EXCELLENT|GOOD|FAIR|POOR>",
  "justification": "<one-sentence justification>",
  "decision": {{
     "action": "<RETRIEVE_AGAIN|PROCEED_WITH_ANSWER>",
     "new_query": "<refined_query or empty string>",
     "reasoning": "<short reason>",
     "confidence": "<high|medium|low or empty>"
  }}
}}

Important:
- All numeric scores must be between 0.0 and 1.0.
- "overall" must be one of: EXCELLENT, GOOD, FAIR, POOR.
- Fill "new_query" only when action == "RETRIEVE_AGAIN" (otherwise empty string).
- Return ONLY the JSON object (no extra commentary).
"""

# ---------------------------
# Load docs and build vectorstore
# ---------------------------
loader = TextLoader("notes.txt")  # ensure notes.txt exists
docs = loader.load()
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

# embeddings + FAISS
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vector_store = FAISS.from_documents(chunks, embeddings)

# High-k retriever so we can rerank locally
retriever = vector_store.as_retriever(search_kwargs={"k": 10})

# LLM (deterministic)
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0)

# ---------------------------
# Utility helpers
# ---------------------------
def extract_json_substring(s: str) -> str:
    brace_stack = []
    start_idx = None
    for i, ch in enumerate(s):
        if ch == "{":
            if start_idx is None:
                start_idx = i
            brace_stack.append(i)
        elif ch == "}":
            if brace_stack:
                brace_stack.pop()
                if not brace_stack and start_idx is not None:
                    return s[start_idx:i + 1]
    m = re.search(r"\{.*\}", s, flags=re.S)
    return m.group(0) if m else ""

def _cosine_sim(a, b):
    # expects iterable of floats
    dot = sum(x * y for x, y in zip(a, b))
    norm_a = math.sqrt(sum(x * x for x in a)) if a else 0.0
    norm_b = math.sqrt(sum(y * y for y in b)) if b else 0.0
    if norm_a == 0 or norm_b == 0:
        return 0.0
    return dot / (norm_a * norm_b)

def run_qa_chain(qa_chain, question: str):
    """
    Safely invoke a QA chain that returns multiple outputs.
    Returns tuple: (answer_text, source_documents_list)
    """
    outputs = None
    try:
        outputs = qa_chain.invoke({"query": question})
    except Exception:
        try:
            outputs = qa_chain({"query": question})
        except Exception:
            outputs = None

    if not outputs:
        return "", []

    # keys differ across versions; choose best available
    answer = outputs.get("result") or outputs.get("output_text") or outputs.get("answer") or outputs.get("result_text") or ""
    sources = outputs.get("source_documents") or outputs.get("source_document") or outputs.get("sources") or []
    return answer, sources

# ---------------------------
# Reranker: use embeddings cosine similarity
# ---------------------------
def rerank_docs_by_similarity(query: str, docs: list, top_n: int):
    """
    Return top_n documents from docs sorted by cosine similarity to the query.
    """
    # embed query
    try:
        q_vec = embeddings.embed_query(query)
    except Exception:
        q_vec = embeddings.embed_documents([query])[0]

    texts = [d.page_content for d in docs]
    try:
        doc_vecs = embeddings.embed_documents(texts)
    except Exception:
        # fallback zero vectors
        doc_vecs = [[0.0] * len(q_vec) for _ in texts]

    sims = [_cosine_sim(q_vec, dv) for dv in doc_vecs]
    scored = list(zip(sims, docs))
    scored.sort(key=lambda x: x[0], reverse=True)
    return [doc for _, doc in scored[:top_n]]

# ---------------------------
# Query refinement helper
# ---------------------------
def generate_refined_query(user_query: str, retrieved_context: str) -> str:
    """
    Short keyword style refined query
    """
    refine_prompt = (
        "You are a query-refinement assistant. Produce a short keyword-focused query (one line) "
        "that will retrieve more relevant documents for the user's intent.\n\n"
        f"User query: {user_query}\n\nRetrieved context:\n{retrieved_context}\n\nRefined query:"
    )
    try:
        out = llm.predict(refine_prompt)
        return out.splitlines()[0].strip()
    except Exception:
        return user_query

# ---------------------------
# Verifier prompt (returns JSON) - escape braces by doubling
# ---------------------------
VERIFY_PROMPT = """
You are an objective verifier. Given a model answer and a set of source snippets, check each factual claim in the answer and determine whether it is:
- SUPPORTED (directly backed by at least one source snippet),
- PARTIALLY_SUPPORTED (some support but not precise),
- UNSUPPORTED (no support),
- CONTRADICTED (source contradicts claim).

Return a JSON object exactly like:
{{
  "verdict": "<OK|ISSUES>",
  "claims": [
    {{"claim": "<short text of claim>", "status": "<SUPPORTED|PARTIALLY_SUPPORTED|UNSUPPORTED|CONTRADICTED>", "evidence": "<which snippet index(es) or empty>"}}
  ],
  "summary": "<one-sentence summary>"
}}

Model answer:
\"\"\"{answer}\"\"\"

Source snippets (numbered): 
{snippets}
Return only the JSON object.
"""

def verify_answer_against_sources(answer: str, snippets: list) -> dict:
    """
    Ask the LLM to verify the answer against the snippets; returns parsed JSON with claims statuses.
    """
    processed = []
    for i, s in enumerate(snippets):
        # truncate snippet and remove double quotes safely
        clean = s[:800].replace('"', '')
        processed.append(f"{i+1}. {clean}")
    numbered = "\n".join(processed)

    prompt = VERIFY_PROMPT.format(answer=answer.replace('"', "'"), snippets=numbered)
    try:
        raw = llm.predict(prompt)
    except Exception as e:
        return {
            "verdict": "ISSUES",
            "claims": [],
            "summary": f"Verification failed: {e}",
            "raw": str(e)
        }

    # parse JSON robustly
    try:
        parsed = json.loads(raw)
    except Exception:
        candidate = extract_json_substring(raw)
        try:
            parsed = json.loads(candidate) if candidate else {
                "verdict": "ISSUES",
                "claims": [],
                "summary": "Unable to parse verifier output",
                "raw": raw
            }
        except Exception:
            parsed = {
                "verdict": "ISSUES",
                "claims": [],
                "summary": "Unable to parse verifier output",
                "raw": raw
            }
    return parsed

# ---------------------------
# High-accuracy answer routine (rerank + QA + verify + optional retry)
# ---------------------------
TOP_K_FOR_ANSWER = 5
MAX_VERIFICATION_ATTEMPTS = 2

def answer_with_verification(question: str, max_attempts: int = MAX_VERIFICATION_ATTEMPTS, debug: bool = False):
    """
    Retrieve, rerank, answer, verify and optionally re-retrieve & re-answer if verification fails.
    Returns (final_answer, verification_result, used_snippets)
    """
    # initial retrieval
    candidates = retriever.get_relevant_documents(question)
    if not candidates:
        return "No documents retrieved", {"verdict": "ISSUES", "claims": [], "summary": "No candidates"}, []

    # rerank locally and pick top_k
    top_docs = rerank_docs_by_similarity(question, candidates, top_n=TOP_K_FOR_ANSWER)
    snippets = [d.page_content for d in top_docs]
    context_for_prompt = "\n\n---\n\n".join(snippets[:TOP_K_FOR_ANSWER])

    # build a temporary FAISS for the top docs and run QA
    db = FAISS.from_documents(top_docs, embeddings)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever(), return_source_documents=True)
    final_answer, sources = run_qa_chain(qa_chain, question)

    verification = verify_answer_against_sources(final_answer, snippets)
    if debug:
        print("INITIAL VERIFICATION:", json.dumps(verification, indent=2))

    attempts = 1
    while attempts < max_attempts and verification.get("verdict", "ISSUES") != "OK":
        # refine query using verification summary
        refine_hint = verification.get("summary") or "find supporting evidence for claims"
        refined_q = generate_refined_query(question + " " + refine_hint, context_for_prompt)
        if debug:
            print(f"Verification found issues. Attempt {attempts+1} refined query ->", refined_q)
        candidates = retriever.get_relevant_documents(refined_q)
        if not candidates:
            break
        top_docs = rerank_docs_by_similarity(refined_q, candidates, top_n=TOP_K_FOR_ANSWER)
        snippets = [d.page_content for d in top_docs]
        db = FAISS.from_documents(top_docs, embeddings)
        qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever(), return_source_documents=True)
        final_answer, sources = run_qa_chain(qa_chain, question)
        verification = verify_answer_against_sources(final_answer, snippets)
        if debug:
            print("RE-VERIFICATION:", json.dumps(verification, indent=2))
        attempts += 1

    return final_answer, verification, snippets

# ---------------------------
# Example usage
# ---------------------------
if __name__ == "__main__":
    q = "what is the largest cat?"
    print("Question:", q)
    print("Running high-accuracy pipeline...\n")
    ans, verification, used_snippets = answer_with_verification(q, max_attempts=2, debug=True)

    print("\nFINAL ANSWER:\n", ans)
    print("\nVERIFICATION:\n", json.dumps(verification, indent=2))
    if used_snippets:
        print("\nSOURCES (first 300 chars each):")
        for i, s in enumerate(used_snippets):
            # preprocess to avoid backslashes in f-string expression
            snippet_preview = s[:300].replace("\n", " ")
            print(f"{i+1}. {snippet_preview}\n")


Question: what is the largest cat?
Running high-accuracy pipeline...

INITIAL VERIFICATION: {
  "verdict": "ISSUES",
  "claims": [
    {
      "claim": "The largest cat is the Siberian tiger, also known as the Amur tiger.",
      "status": "UNSUPPORTED",
      "evidence": ""
    },
    {
      "claim": "They can weigh up to 660 pounds and grow up to 11 feet in length, including their tail.",
      "status": "UNSUPPORTED",
      "evidence": ""
    }
  ],
  "summary": "The model answer makes unsupported claims about the Siberian tiger."
}
Verification found issues. Attempt 2 refined query -> What is the largest species of cat in the world?
RE-VERIFICATION: {
  "verdict": "ISSUES",
  "claims": [
    {
      "claim": "The largest cat is the Siberian tiger, also known as the Amur tiger.",
      "status": "UNSUPPORTED",
      "evidence": ""
    },
    {
      "claim": "They can weigh up to 660 pounds and grow up to 11 feet in length, including their tail.",
      "status": "UNSUPPORTED",
   

In [None]:
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.docstore.document import Document

docs =[
       Document(page_content ="The largest cat is a tiger."),
       Document(page_content ="The largest dog is a great dane."),
       Document(page_content ="The largest animal is a blue whale.")
       ]

embeddings = OpenAIEmbeddings()
db =FAISS.from_documents(docs, embeddings)

fusion_rag_chain = RetrievalQA.from_chain_type(llm=llm , retriever =db.as_retriever(),return_source_documents=False)

print(fusion_rag_chain.run("what is the largest cat?"))


In [12]:
pip install python-dotenv


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
from dotenv import load_dotenv
load_dotenv()


True

In [7]:
pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp311-cp311-win_amd64.whl (18.2 MB)
     ---------------------------------------- 18.2/18.2 MB 4.6 MB/s eta 0:00:00
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.11.0-cp311-cp311-win_amd64.whl (884 kB)
     -------------------------------------- 884.4/884.4 kB 6.2 MB/s eta 0:00:00
Installing collected packages: tiktoken
Successfully installed tiktoken-0.11.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
pip install openai

Collecting openai
  Downloading openai-1.108.0-py3-none-any.whl (948 kB)
     -------------------------------------- 948.1/948.1 kB 6.7 MB/s eta 0:00:00
Collecting jiter<1,>=0.4.0
  Downloading jiter-0.11.0-cp311-cp311-win_amd64.whl (204 kB)
     -------------------------------------- 204.3/204.3 kB 6.1 MB/s eta 0:00:00
Installing collected packages: jiter, openai
Successfully installed jiter-0.11.0 openai-1.108.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
test2.load()

[Document(metadata={'producer': 'Mac OS X 10.5.4 Quartz PDFContext', 'creator': 'Pages', 'creationdate': "D:20080701052447Z00'00'", 'title': 'sample', 'author': 'Philip Hutchison', 'moddate': "D:20080701052447Z00'00'", 'source': 'sample.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Sample PDFThis is a simple PDF ﬁle. Fun fun fun.\nLorem ipsum dolor sit amet, consectetuer adipiscing elit. Phasellus facilisis odio sed mi. \nCurabitur suscipit. Nullam vel nisi. Etiam semper ipsum ut lectus. Proin aliquam, erat eget \npharetra commodo, eros mi condimentum quam, sed commodo justo quam ut velit. \nInteger a erat. Cras laoreet ligula cursus enim. Aenean scelerisque velit et tellus. \nVestibulum dictum aliquet sem. Nulla facilisi. Vestibulum accumsan ante vitae elit. Nulla \nerat dolor, blandit in, rutrum quis, semper pulvinar, enim. Nullam varius congue risus. \nVivamus sollicitudin, metus ut interdum eleifend, nisi tellus pellentesque elit, tristique \naccumsan eros qu

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") 

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [3]:
texts =["The quick brows fox jumps over the lazy dog", 
"Langchain makes it easy to work with LLM",
"chroma is another vector store similar to FAISS"]
vectorstore = Chroma.from_texts(texts,embedding = embeddings)

NameError: name 'Chroma' is not defined

In [1]:
pip install chromadb

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
pip install langchain-huggingface sentence-transformers


Collecting sentence-transformers
  Downloading sentence_transformers-5.1.0-py3-none-any.whl (483 kB)
     -------------------------------------- 483.4/483.4 kB 7.5 MB/s eta 0:00:00
Collecting transformers<5.0.0,>=4.41.0
  Downloading transformers-4.56.1-py3-none-any.whl (11.6 MB)
     ---------------------------------------- 11.6/11.6 MB 5.5 MB/s eta 0:00:00
Collecting torch>=1.11.0
  Downloading torch-2.8.0-cp311-cp311-win_amd64.whl (241.4 MB)
     -------------------------------------- 241.4/241.4 MB 2.8 MB/s eta 0:00:00
Collecting sympy>=1.13.3
  Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
     ---------------------------------------- 6.3/6.3 MB 7.2 MB/s eta 0:00:00
Collecting networkx
  Downloading networkx-3.5-py3-none-any.whl (2.0 MB)
     ---------------------------------------- 2.0/2.0 MB 8.7 MB/s eta 0:00:00
Collecting regex!=2019.12.17
  Downloading regex-2025.9.1-cp311-cp311-win_amd64.whl (276 kB)
     -------------------------------------- 276.2/276.2 kB 2.4 MB/s eta


[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install langchain-huggingface

Collecting langchain-huggingface
  Downloading langchain_huggingface-0.3.1-py3-none-any.whl (27 kB)
Collecting tokenizers>=0.19.1
  Downloading tokenizers-0.22.0-cp39-abi3-win_amd64.whl (2.7 MB)
     ---------------------------------------- 2.7/2.7 MB 5.2 MB/s eta 0:00:00
Collecting huggingface-hub>=0.33.4
  Downloading huggingface_hub-0.34.6-py3-none-any.whl (562 kB)
     -------------------------------------- 562.6/562.6 kB 5.1 MB/s eta 0:00:00
Collecting filelock
  Downloading filelock-3.19.1-py3-none-any.whl (15 kB)
Collecting fsspec>=2023.5.0
  Downloading fsspec-2025.9.0-py3-none-any.whl (199 kB)
     ------------------------------------- 199.3/199.3 kB 11.8 MB/s eta 0:00:00
Collecting tqdm>=4.42.1
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
     ---------------------------------------- 78.5/78.5 kB ? eta 0:00:00
Installing collected packages: tqdm, fsspec, filelock, huggingface-hub, tokenizers, langchain-huggingface
Successfully installed filelock-3.19.1 fsspec-2025.9.0 h


[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
from langchain_community.document_loaders import PyPDFLoader

In [3]:
test.load()

[Document(metadata={'source': 'notes.txt'}, page_content='Hello Satheeshkumar Subramanian, GENAI Architect!')]

In [2]:
test = TextLoader('notes.txt')

In [1]:
from langchain_community.document_loaders import TextLoader

In [5]:
pip install pypdf


Collecting pypdf
  Downloading pypdf-6.0.0-py3-none-any.whl (310 kB)
     -------------------------------------- 310.5/310.5 kB 1.1 MB/s eta 0:00:00
Installing collected packages: pypdf
Successfully installed pypdf-6.0.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
