RAG Pipeline with MMR

In [8]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.schema import StrOutputParser
from langchain.prompts import PromptTemplate
from dotenv import  load_dotenv
import os


In [9]:
# Step 1: Prepare documents#

docs = [
    'Green tea contains antioxidants that improve health.',
    'Black tea is rich in caffeine and boosts energy.',
    'Catechins in green tea help reduce cholesterol.',
    'Tea ceremonies are an important cultural tradition in Japan.',
    'Green tea may improve metabolism and aid in weight loss.'
]

In [10]:
# Step 2: Build vector store

load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

# Convert documents into embeddings
embeddings = HuggingFaceBgeEmbeddings(model_name='all-MiniLM-L6-v2')

# Create FAISS index for similarity search
vectorstore = FAISS.from_texts(docs, embeddings)

vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x18102fe8490>

In [11]:
# Step 3: Configure retriever with MMR

# Use Maximal Marginal Relevance (MMR) to balance relevance and diversity
retriever = vectorstore.as_retriever(
    serach_type='mmr',
    search_kwargs={
        'k': 3, 
        'fetch_k': 10,
        'lambda_mult': 0.7 # balance relevance vs diversity
    }
)

In [12]:
# Step 4: Create prompt template
prompt = PromptTemplate.from_template(
    '''
    Use the following context to answer the question:
    Context: {context}
    
    Question: {question}
    
    Answer
    '''
)

In [13]:
# Step 4: Initialize LLM
llm = ChatOpenAI(model='gpt-4', temperature=0)

# Build RAG chain: prompt → llm → parser
rag_chain = prompt | llm | StrOutputParser()

In [14]:
# Step 5: Run query
query = 'What are the health benefits of green tea?'

# Retrieve documents using MMR
retrieved_docs = retriever.get_relevant_documents(query)

# Join retrieved docs into a single context string
context = [doc.page_content for doc in retrieved_docs]

# Run the chain
response = rag_chain.invoke({
    'question': query,
    'context': context
})

print(f'Final Answer: {response}')
#print(response)

Final Answer: Green tea contains antioxidants that improve health and may improve metabolism and aid in weight loss.
