In [3]:
from dotenv import load_dotenv 
load_dotenv()

True

In [None]:
qa_chain("what is langchain and how to write factorial program?")
|
Your RAG is "hallucinating" when the query mixes two unrelated topics.
|
contains two completely different questions:

What is LangChain? → present in your documents

How to write factorial program? → NOT in your documents

Your RetrievalQA chain does this:

Looks for relevant documents

Finds only LangChain-related chunks

Sends your FULL question + LangChain context to the LLM

LLM tries to answer both parts

Because context is only about LangChain, it fuses them → hallucination

That's why it produced this nonsense
To write a factorial program using LangChain...
This is a classic RAG hallucination.
|
✔️ Why this happens (important to understand)

Retriever result for your long question:

Relevant chunks = only LangChain text
Missing = factorial program chunk

But the prompt to LLM still contains:
======================================
Context:
<LangChain text>
Question:
"what is langchain and how to write factorial program?"

So LLM thinks:

“The question asks two things. But the only context I see is LangChain. So maybe factorial program is also related to LangChain?”

Therefore, hallucination.
|
|
 Solution: Prevent LLM from answering questions NOT supported by context

You need a strict RAG safety prompt:

Strict RAG Prompt
|
template = """
You are a Retrieval based QA assistant.

Answer ONLY from the given context.
If the answer is not present in the context, say:
"I don't know, the document does not contain this information."

Context:
{context}

Question:
{question}

Answer:
"""
Now LLM will NOT hallucinate.
|
|
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a Retrieval QA assistant.

Answer ONLY using the information in the context.
If the answer is not present in the context, reply:
"I don't know, the document does not contain this information."

Context:
{context}

Question:
{question}

Answer:
"""
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm_obj,
    retriever=retriever_obj,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True
)
Now try:
qa_chain("what is langchain and how to write factorial program?")
 Expected output:
I don't know, the document does not contain this information.
And NOT hallucination.

If you want hybrid behavior (RAG + fallback to model), use this:
If retrieval fails, LLM uses its own knowledge.
Hybrid Prompt
template = """
Use the context to answer the question.
If the context does not contain the answer, answer from your general knowledge.

Context:
{context}

Question:
{question}

Answer:
"""
 Summary
| Prompt Type    | Behavior                                         |
| -------------- | ------------------------------------------------ |
| Strict RAG     | Avoid hallucination. Only answer from docs.      |
| Hybrid RAG     | Answer from docs + fallback to LLM knowledge.    |
| Default Prompt | High chance of hallucination in mixed questions. |


In [4]:
import os

In [5]:
# Retrive from loaded documents only not from trained LLM
# ------------------------------------------------------------
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
import os

# Step-1 Load your text file
loader = TextLoader("my_docs.txt")
documents = loader.load()

# Step-2 Split into chunks
splitter = CharacterTextSplitter(chunk_size=250, chunk_overlap=20)
docs = splitter.split_documents(documents)

# Step-3 Embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Step-4 Vector DB
vectorstore = FAISS.from_documents(docs, embeddings)

# Step-5 Retriever
retriever = vectorstore.as_retriever()

# Step-6 Groq LLM
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    api_key=os.getenv("GROQ_API_KEY")
)

# Step-7 STRICT RAG Prompt
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a strict Retrieval QA assistant.
Answer ONLY using the information given in the context below.
If the answer is not present in the context, reply exactly with:
"I don't know, the document does not contain this information."

Context:
{context}

Question:
{question}

Answer:
"""
)

# Step-8 Build the RAG Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True
)

# Try Questions
print(qa_chain("What is LangChain?"))
print(qa_chain("How to write factorial program in C?"))
print(qa_chain("What is LangChain and how to write factorial program?"))

Created a chunk of size 467, which is longer than the specified 250
  return forward_call(*args, **kwargs)
  print(qa_chain("What is LangChain?"))
  return forward_call(*args, **kwargs)


{'query': 'What is LangChain?', 'result': 'LangChain is a framework for developing applications powered by large language models (LLMs).', 'source_documents': [Document(id='e567c9c0-8a0e-4e80-8867-12b5ce3ade2b', metadata={'source': 'my_docs.txt'}, page_content='LangChain is a framework for developing applications powered by large language models (LLMs).\n\nLangChain simplifies every stage of the LLM application lifecycle:'), Document(id='19de1199-a38c-4a8f-86a9-21ca17aa2878', metadata={'source': 'my_docs.txt'}, page_content="Development: Build your applications using LangChain's open-source components and third-party integrations. Use LangGraph to build stateful agents with first-class streaming and human-in-the-loop support.\nProductionization: Use LangSmith to inspect, monitor and evaluate your applications, so that you can continuously optimize and deploy with confidence.\nDeployment: Turn your LangGraph applications into production-ready APIs and Assistants with LangGraph Platform.

  return forward_call(*args, **kwargs)


{'query': 'How to write factorial program in C?', 'result': "I don't know, the document does not contain this information.", 'source_documents': [Document(id='b4088c1c-743a-4525-8226-f4c9c772e400', metadata={'source': 'my_docs.txt'}, page_content='factorial value 5! is 120'), Document(id='19de1199-a38c-4a8f-86a9-21ca17aa2878', metadata={'source': 'my_docs.txt'}, page_content="Development: Build your applications using LangChain's open-source components and third-party integrations. Use LangGraph to build stateful agents with first-class streaming and human-in-the-loop support.\nProductionization: Use LangSmith to inspect, monitor and evaluate your applications, so that you can continuously optimize and deploy with confidence.\nDeployment: Turn your LangGraph applications into production-ready APIs and Assistants with LangGraph Platform."), Document(id='e567c9c0-8a0e-4e80-8867-12b5ce3ade2b', metadata={'source': 'my_docs.txt'}, page_content='LangChain is a framework for developing applic

  return forward_call(*args, **kwargs)


In [6]:
# Hybrid RAG Answer from docs + fallback to LLM knowledge
#
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
import os

# Step-1 load data
loader = TextLoader("my_docs.txt")
documents = loader.load()

# Step-2 split
splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = splitter.split_documents(documents)

# Step-3 embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Step-4 vector DB
vectorstore = FAISS.from_documents(docs, embeddings)

# Step-5 retriever
retriever = vectorstore.as_retriever()

# Step-6 Groq LLM
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    api_key=os.getenv("GROQ_API_KEY")
)

# Step-7 Better Prompt → fallback enabled
template = """
Use the following context to answer the question.
If the answer is not present in the context, answer from your general knowledge.

Context:
{context}

Question:
{question}

Answer:
"""
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

# Step-8 Questions
print(qa_chain("what is langchain?"))
print(qa_chain("How to write factorial program in C language?"))

Created a chunk of size 467, which is longer than the specified 200
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


{'query': 'what is langchain?', 'result': 'Based on the given context, LangChain is a framework for developing applications powered by large language models (LLMs).', 'source_documents': [Document(id='1066df08-024c-4d54-880a-905510d320b8', metadata={'source': 'my_docs.txt'}, page_content='LangChain is a framework for developing applications powered by large language models (LLMs).\n\nLangChain simplifies every stage of the LLM application lifecycle:'), Document(id='2a2b5331-94aa-4820-a847-3fe56c35b050', metadata={'source': 'my_docs.txt'}, page_content="Development: Build your applications using LangChain's open-source components and third-party integrations. Use LangGraph to build stateful agents with first-class streaming and human-in-the-loop support.\nProductionization: Use LangSmith to inspect, monitor and evaluate your applications, so that you can continuously optimize and deploy with confidence.\nDeployment: Turn your LangGraph applications into production-ready APIs and Assista

  return forward_call(*args, **kwargs)


{'query': 'How to write factorial program in C language?', 'result': 'The given context does not provide any information about writing a factorial program in C language. However, I can provide a general answer based on my knowledge.\n\nTo write a factorial program in C language, you can use the following code:\n\n```c\n#include <stdio.h>\n\nint factorial(int n) {\n    if (n == 0) {\n        return 1;\n    } else {\n        return n * factorial(n - 1);\n    }\n}\n\nint main() {\n    int num;\n    printf("Enter a number: ");\n    scanf("%d", &num);\n\n    if (num < 0) {\n        printf("Factorial is not defined for negative numbers.");\n    } else {\n        int result = factorial(num);\n        printf("Factorial of %d is %d\\n", num, result);\n    }\n\n    return 0;\n}\n```\n\nThis program calculates the factorial of a given number using recursion. The `factorial` function calls itself with decreasing values until it reaches the base case (n = 0), and then it starts returning the produc