# Simple RAG

**Tech Stack** 
1. vectordatabase - ChramaDB
2. sentence embedding - all-MiniLM-L6-v2
3. llm - llama3-8b


In [None]:

from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from langchain_ollama import ChatOllama
load_dotenv()

llm = ChatGroq(model_name="Llama3-8b-8192")

# llm = ChatOllama(
#     model = "deepseek-r1:1.5b",
#     temperature = 0,
#     num_predict = 256,
#     # other params ...
# )



def embd_load_vectordb(filepath):
    # Initialize the embedding model
    embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        # Load and split the PDF document
    docs = PyPDFLoader(filepath).load_and_split()
    # Create a Chroma vector store with a specified directory for persistence
    vectordb = Chroma.from_documents(docs, embedding, persist_directory="./test_db")
    print("Vector database created and persisted.")
    return vectordb


def load_vectordb():
    embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    loaded_db = Chroma(persist_directory="./test_db", embedding_function=embedding)
    return loaded_db
    
vectordb = load_vectordb()



def response_generator(vectordb, query, llm):
    template = """Use the following pieces of context to answer the question at the end. 
    If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. {context} Question: {question} Helpful Answer:"""


    QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)

    qa_chain = RetrievalQA.from_chain_type(llm, 
                                           retriever=vectordb.as_retriever(), 
                                           return_source_documents=True, 
                                           chain_type_kwargs={"prompt":QA_CHAIN_PROMPT})

    ans = qa_chain.invoke(query)
    return ans["result"]


query = "what are the side effects of Ondansetron"
ans = response_generator(vectordb, query, llm)
print(ans)

In [None]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from langchain_ollama import ChatOllama
load_dotenv()

In [2]:
llm = ChatGroq(model_name="Llama3-8b-8192")

# llm = ChatOllama(
#     model = "deepseek-r1:1.5b",
#     temperature = 0,
#     num_predict = 256,
#     # other params ...
# )


In [3]:
def embd_load_vectordb(filepath):
    # Initialize the embedding model
    embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        # Load and split the PDF document
    docs = PyPDFLoader(filepath).load_and_split()
    # Create a Chroma vector store with a specified directory for persistence
    vectordb = Chroma.from_documents(docs, embedding, persist_directory="./test_db")
    print("Vector database created and persisted.")
    return vectordb


In [6]:
def load_vectordb():
    embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    loaded_db = Chroma(persist_directory="./test_db", embedding_function=embedding)
    return loaded_db

In [4]:
def response_generator(vectordb, query, llm):
    template = """Use the following pieces of context to answer the given question. If you don't know the answer, just say that you don't know, don't try to make up an answer.

    Context: {context}
    
    Question: {question}
    
    Instructions:
    1. Base your answer only on the provided context
    2. If the context doesn't contain relevant information, say "I don't have enough information to answer this question."
    
    Answer:"""


    QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)

    qa_chain = RetrievalQA.from_chain_type(llm, 
                                           retriever=vectordb.as_retriever(), 
                                           return_source_documents=True, 
                                           chain_type_kwargs={"prompt":QA_CHAIN_PROMPT})

    ans = qa_chain.invoke(query)
    return ans["result"]


In [None]:
emed_data = embd_load_vectordb("./knowledge/health_products_data.pdf")
vectordb = load_vectordb()

In [None]:
query = "what are the side effects of Ondansetron"
answer = response_generator(vectordb, query, llm)
print(answer)

# Advanced RAG (Dense Passage Retrieval (DPR) Technique)

In [None]:
from sentence_transformers import SentenceTransformer
from langchain_community.document_loaders import PyPDFLoader
import faiss
import numpy as np

# Load DPR Model
query_encoder = SentenceTransformer('facebook-dpr-question_encoder-single-nq-base')
passage_encoder = SentenceTransformer('facebook-dpr-ctx_encoder-single-nq-base')

documents = PyPDFLoader("./knowledge/health_products_data.pdf").load_and_split()
passages = [doc.page_content for doc in documents]  # Extract text from Document objects

# Encode Passages into Dense Vectors
passage_embeddings = passage_encoder.encode(passages, convert_to_numpy=True)

# Create FAISS Index
dimension = passage_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(passage_embeddings)

# Encode Query
query = "which medicine could be usefull for knee pain"
query_embedding = query_encoder.encode([query], convert_to_numpy=True)

# Perform Similarity Search
k = 2  # Retrieve top-2 passages
distances, indices = index.search(query_embedding, k)

# Print Results
print("Query:", query)
print("\nTop Relevant Passages:")
for i in range(k):
    print(f"{i+1}. {passages[indices[0][i]]} (Distance: {distances[0][i]:.4f})")


In [15]:
from sentence_transformers import SentenceTransformer
from langchain_community.document_loaders import PyPDFLoader
import faiss
import numpy as np

from langchain_groq import ChatGroq


In [16]:
llm = ChatGroq(model_name="Llama3-8b-8192")

In [17]:
# Load DPR Model
query_encoder = SentenceTransformer('facebook-dpr-question_encoder-single-nq-base')
passage_encoder = SentenceTransformer('facebook-dpr-ctx_encoder-single-nq-base')


In [18]:
documents = PyPDFLoader("./knowledge/health_products_data.pdf").load_and_split()
passages = [doc.page_content for doc in documents]  # Extract text from Document objects

# Encode Passages into Dense Vectors
passage_embeddings = passage_encoder.encode(passages, convert_to_numpy=True)


In [19]:
# # Create FAISS Index
dimension = passage_embeddings.shape[1]
vectordb = faiss.IndexFlatL2(dimension)
vectordb.add(passage_embeddings)

In [20]:
def response_generator(passages, query, llm):
    query_embedding = query_encoder.encode([query], convert_to_numpy=True)

    # Perform Similarity Search
    k = 2  # Retrieve top-2 passages
    distances, indices = vectordb.search(query_embedding, k)

    # Extract relevant passages
    context = [passages[i] for i in indices[0].tolist()]  # Convert NumPy array to list

    # Construct prompt
    template = f"""
    You are an intelligent assistant designed to provide accurate and concise answers based on the context provided. 
    Follow these rules strictly:
    1. Use ONLY the information provided in the context to answer the question.
    2. If the context does not contain enough information to answer the question, say "I don't know."
    3. Do not make up or assume any information outside of the context.
    4. Keep your answer concise and to the point (maximum 3 sentences).

    Context:
    {context}

    Question:
    {query}

    Helpful Answer:
    """

    # Generate response using LLM
    res = llm.invoke(template)
    return res.content



In [21]:
query = "which medicine is used to treat depression"
ans = response_generator(passages, query, llm)
print(ans)

Wellbutrin (bupropion), Remeron (mirtazapine), and Trazodone are antidepressant medications used to treat depression.


In [None]:
from sentence_transformers import SentenceTransformer
import faiss

from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from langchain_ollama import ChatOllama
load_dotenv()
import numpy as np

llm = ChatGroq(model_name="Llama3-8b-8192")

# llm = ChatOllama(
#     model = "deepseek-r1:1.5b",
#     temperature = 0,
#     num_predict = 256,
#     # other params ...
# )

# Load DPR Model
query_encoder = SentenceTransformer('facebook-dpr-question_encoder-single-nq-base')
passage_encoder = SentenceTransformer('facebook-dpr-ctx_encoder-single-nq-base')

documents = PyPDFLoader("./knowledge/health_products_data.pdf").load_and_split()
passages = [doc.page_content for doc in documents]  # Extract text from Document objects


# Encode Passages into Dense Vectors
passage_embeddings = passage_encoder.encode(passages, convert_to_numpy=True)

# Create FAISS Index
dimension = passage_embeddings.shape[1]
vectordb = faiss.IndexFlatL2(dimension)
vectordb.add(passage_embeddings)

def response_generator(passages, query, llm):

    query_embedding = query_encoder.encode([query], convert_to_numpy=True)
    # Perform Similarity Search
    k = 2  # Retrieve top-2 passages
    indices = vectordb.search(query_embedding, k)
    context = [passages[i] for i in indices[0]] 

    template = f"""
    You are an intelligent assistant designed to provide accurate and concise answers based on the context provided. 
    Follow these rules strictly:
    1. Use ONLY the information provided in the context to answer the question.
    2. If the context does not contain enough information to answer the question, say "I don't know."
    3. Do not make up or assume any information outside of the context.
    4. Keep your answer concise and to the point (maximum 3 sentences).

    Context:
    {context}

    Question:
    {query}

    Helpful Answer:
    """
    res = llm.invoke(template)
    return res.content


query = "which medicine is used to treat depression"
ans= response_generator(passages, query, llm)
print(ans)

In [117]:
from sentence_transformers import SentenceTransformer
import faiss

from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from langchain_ollama import ChatOllama
load_dotenv()
import numpy as np


In [118]:
llm = ChatGroq(model_name="Llama3-8b-8192")

# llm = ChatOllama(
#     model = "deepseek-r1:1.5b",
#     temperature = 0,
#     num_predict = 256,
#     # other params ...
# )


In [119]:
# Load DPR Model
query_encoder = SentenceTransformer('facebook-dpr-question_encoder-single-nq-base')
passage_encoder = SentenceTransformer('facebook-dpr-ctx_encoder-single-nq-base')


In [120]:
documents = PyPDFLoader("./knowledge/health_products_data.pdf").load_and_split()
passages = [doc.page_content for doc in documents]  # Extract text from Document objects


In [121]:
# Encode Passages into Dense Vectors
passage_embeddings = passage_encoder.encode(passages, convert_to_numpy=True)

# Create FAISS Index
dimension = passage_embeddings.shape[1]
vectordb = faiss.IndexFlatL2(dimension)
vectordb.add(passage_embeddings)

In [122]:
def response_generator(passages, query, llm):

    query_embedding = query_encoder.encode([query], convert_to_numpy=True)
    # Perform Similarity Search
    k = 2  # Retrieve top-2 passages
    indices = vectordb.search(query_embedding, k)
    context = [passages[i] for i in indices[0]] 

    template = f"""
    You are an intelligent assistant designed to provide accurate and concise answers based on the context provided. 
    Follow these rules strictly:
    1. Use ONLY the information provided in the context to answer the question.
    2. If the context does not contain enough information to answer the question, say "I don't know."
    3. Do not make up or assume any information outside of the context.
    4. Keep your answer concise and to the point (maximum 3 sentences).

    Context:
    {context}

    Question:
    {query}

    Helpful Answer:
    """
    res = llm.invoke(template)
    return res.content


In [None]:
query = "which medicine is used to treat depression"
ans= response_generator(passages, query, llm)
print(ans)

# Modular RAG

# Graph RAG