In [None]:
!pip install -U langchain-community

In [None]:
from google.colab import files
uploaded = files.upload()

In [3]:
import pandas as pd


# Load Dataset

In [None]:
# Load CSV
df = pd.read_csv("dataset2.csv")

# Quick look at the data
print(df.head())

In [5]:
# Combine question and answer into one field for embedding
df["content"] = df["question"] + "\n\n" + df["answer"]

# Optional: drop rows with missing values
df = df.dropna(subset=["content"])

In [None]:
# Data cleaning
df["content"] = df["content"].str.replace(r"\[\{\(/NL/\)\}\]", "\n", regex=True)
print(df["content"].iloc[0])


In [None]:
!pip install langchain chromadb langchain-chroma openai tiktoken


In [None]:
!pip install langchain-community

In [9]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

Enter API key for OpenAI: ··········


In [19]:
import time

from langchain_community.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.docstore.document import Document
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Make ChromaDB

In [None]:
# 1. Convert content into LangChain Documents
documents = [Document(page_content=row) for row in df["content"].tolist()]

# 2. Create embedding function
embedding_function = OpenAIEmbeddings()

# 3. Create Chroma vectorstore
# Break into batches
batch_size = 200  # Keep this conservative to avoid hitting TPM
vectorstore = None
persist_dir = "chroma_store"

for i in range(0, len(documents), batch_size):
    batch_docs = documents[i:i+batch_size]

    if vectorstore is None:
        # Use the first batch to create a persistent Chroma vectorstore
        vectorstore = Chroma.from_documents(
            documents=batch_docs,
            embedding=embedding_function,
            persist_directory=persist_dir
        )
    else:
        # Add more batches
        vectorstore.add_documents(batch_docs)

    print(f"Embedded batch {i//batch_size + 1}")


In [27]:
#load a saved chromadb

vectorstore = Chroma(
    persist_directory="chroma_store",
    embedding_function=embedding_function
)

# Build the Retriever

In [28]:
retriever = vectorstore.as_retriever(
    search_type="mmr",  # or use "similarity" + score_threshold
    search_kwargs={
        "k": 2,
        "score_threshold": 0.4  # optional: helps filter weak results
    }
)

# Build a QA Chain with Retrieval

In [36]:
from langchain.chains import RetrievalQA, LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory


In [41]:
memory = ConversationBufferMemory(
    memory_key="chat_history",      # This is required!
    return_messages=True            # So chat format is preserved
)

In [55]:
# Set up the LLM
llm = ChatOpenAI(model_name="gpt-4o-mini",
    temperature=0.9,
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()])

# Create the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=False
)

# Build Query ReWriter

# Step 6: Ask Questions!

In [56]:
while True:
    query = input("\nYou: ")
    if query.lower() in ["exit", "quit"]:
        break
    print("Bot:", end=" ", flush=True)
    qa_chain(query)


You: Please summarize the pillars of Islam
Bot: The pillars of Islam consist of five core beliefs and practices: 

1. **Belief (Shahadah)**: The declaration of faith, stating that there is no god but Allah, and that Muhammad is His messenger.
2. **Prayers (Salaah)**: The performance of ritual prayers five times a day.
3. **Obligatory Charity (Zakah)**: The giving of a fixed portion of one's wealth to those in need, typically calculated as 2.5% of savings.
4. **Fasting (Sawm)**: The observance of fasting during the month of Ramadan, abstaining from food and drink from dawn until sunset.
5. **Pilgrimage (Hajj)**: The journey to the holy city of Mecca, which is required of all Muslims who are able to undertake it at least once in their lifetime.
You: Make a drawing of mosque
Bot: I'm sorry, but I can't create drawings. However, I can provide a description of a mosque if you'd like!
You: What is a mosque
Bot: I don't know.
You: Who is ALLAH?
Bot: Allah is the Arabic word for "one God". He