In [86]:
import json

with open("faqs.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# data now contains the contents of faqs.json
# You can print or process it as needed
print(data)

[{'question': 'How can I create an account?', 'answer': "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process."}, {'question': 'What payment methods do you accept?', 'answer': 'We accept major credit cards, debit cards, and PayPal as payment methods for online orders.'}, {'question': 'How can I track my order?', 'answer': "You can track your order by logging into your account and navigating to the 'Order History' section. There, you will find the tracking information for your shipment."}, {'question': 'What is your return policy?', 'answer': 'Our return policy allows you to return products within 30 days of purchase for a full refund, provided they are in their original condition and packaging. Please refer to our Returns page for detailed instructions.'}, {'question': 'Can I cancel my order?', 'answer': 'You can cancel your order if it has not been shipped yet. Please contact our cus

In [87]:
len(data)

95

In [88]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Convert each FAQ entry to a Document object
docs = [Document(page_content=f"{item['question']}\n{item['answer']}") for item in data]

# Split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
split_docs = text_splitter.split_documents(docs)

print("Total number of documents: ", len(split_docs))

Total number of documents:  95


In [89]:
docs[10]

Document(metadata={}, page_content='Do you offer gift wrapping services?\nYes, we offer gift wrapping services for an additional fee. During the checkout process, you can select the option to add gift wrapping to your order.')

In [90]:
from sentence_transformers import SentenceTransformer

# Load a free, local embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Example: embed a single string
vector = model.encode("hello, world!")
print(vector[:5])

# Example: embed a list of texts (e.g., your FAQ questions/answers)
texts = [doc.page_content for doc in docs]  # docs from your previous code
embeddings = model.encode(texts)
print(embeddings.shape)

[-0.03817719  0.0329111  -0.00545939  0.01436992 -0.04029099]
(95, 384)
(95, 384)


In [91]:
from langchain_chroma import Chroma
from sentence_transformers import SentenceTransformer

# Wrapper to use sentence-transformers with Chroma
class SentenceTransformerEmbeddings:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)
    def embed_documents(self, texts):
        return self.model.encode(texts).tolist()
    def embed_query(self, text):
        return self.model.encode([text])[0].tolist()

embeddings = SentenceTransformerEmbeddings()

vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory="./chroma_db"
)
print("Vectorstore persisted to ./chroma_db")

Vectorstore persisted to ./chroma_db


In [98]:
# Retrieve similar documents using the Chroma vectorstore and sentence-transformers embeddings
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

# Define full_input for debugging
full_input = "What is delivery modes available?"

retrieved_docs = retriever.invoke(full_input)
print("DEBUG: Retrieved docs count:", len(retrieved_docs))
for i, doc in enumerate(retrieved_docs[:3]):
    print(f"DEBUG: Doc {i+1}: {doc.page_content[:150]}...")

query = "What is new in yolov9?"
retrieved_docs = retriever.invoke(query)

for i, doc in enumerate(retrieved_docs, 1):
    print(f"Result {i}:")
    print(doc.page_content)
    print("-" * 40)

DEBUG: Retrieved docs count: 10
DEBUG: Doc 1: What carriers do you use for shipping?
UPS, FedEx, USPS domestically; DHL internationally. Chosen for reliability and tracking capabilities....
DEBUG: Doc 2: What carriers do you use for shipping?
UPS, FedEx, USPS domestically; DHL internationally. Chosen for reliability and tracking capabilities....
DEBUG: Doc 3: What carriers do you use for shipping?
UPS, FedEx, USPS domestically; DHL internationally. Chosen for reliability and tracking capabilities....
Result 1:
What are the tiers in the loyalty program?
Bronze (0-500 points): basic earnings; Silver (501-2000): bonus points; Gold (2001+): free shipping. Tiers upgrade automatically based on annual spend.
----------------------------------------
Result 2:
What are the tiers in the loyalty program?
Bronze (0-500 points): basic earnings; Silver (501-2000): bonus points; Gold (2001+): free shipping. Tiers upgrade automatically based on annual spend.
----------------------------------------
Re

In [93]:
# Retrieve similar documents using the Chroma vectorstore and sentence-transformers embeddings
query = "Do you offer product installation support?"
results = vectorstore.similarity_search_with_score(query, k=10)

# For cosine distance, lower is more similar. Set a threshold (e.g., 0.7) for 'in context'.
threshold = 0.7

if results and results[0][1] > threshold:
    print("Sorry, I don't have enough information to answer that question.")
else:
    for i, (doc, score) in enumerate(results, 1):
        print(f"Result {i} (score={score:.2f}):")
        print(doc.page_content)
        print("-" * 40)

Result 1 (score=0.45):
Do you offer product installation support?
For select items, we provide guides and virtual support. Contact us for complex setups; third-party services may be recommended for in-person help.
----------------------------------------
Result 2 (score=0.45):
Do you offer product installation support?
For select items, we provide guides and virtual support. Contact us for complex setups; third-party services may be recommended for in-person help.
----------------------------------------
Result 3 (score=0.45):
Do you offer product installation support?
For select items, we provide guides and virtual support. Contact us for complex setups; third-party services may be recommended for in-person help.
----------------------------------------
Result 4 (score=1.22):
Is there a live chat option for support?
Yes, live chat is available on our website during business hours. It's ideal for quick queries on orders, products, or issues for immediate assistance.
-------------------

In [94]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.3, max_tokens=500)

In [95]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise.\n\n{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
])

# Create a chain that stuffs retrieved docs into the prompt and calls the LLM
stuff_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, stuff_chain)

# Example usage:
question = "What is your cgpa?"
response = rag_chain.invoke({"input": question})
print(response["answer"])

I don't know the answer to that question. The provided context does not contain information about a CGPA.


In [96]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise.\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
])

# Create a chain that stuffs retrieved docs into the prompt and calls the LLM
stuff_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, stuff_chain)

# Example usage:
question = "What is your cgpa?"
response = rag_chain.invoke({"input": question})
print(response["answer"])

I don't know the answer to that question.


In [97]:
# Create a chain that stuffs retrieved docs into the prompt and calls the LLM
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# Example usage:
question = "What is delivery modes available?"
response = rag_chain.invoke({"input": question})
print(response["answer"])

For domestic shipping, UPS, FedEx, and USPS are used, while DHL handles international deliveries. There is also an option to expedite shipping on an existing order by contacting support before shipment, which may involve rerouting via express carriers for faster delivery.
