In [1]:
# Core LangChain imports
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.schema import StrOutputParser

# Vector store + embeddings
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

from dotenv import load_dotenv
import os


In [2]:
# Step 1: Prepare documents#

docs = [
    'Green tea contains antioxidants that improve health.',
    'Black tea is rich in caffeine and boosts energy.',
    'Catechins in green tea help reduce cholesterol.',
    'Tea ceremonies are an important cultural tradition in Japan.',
    'Green tea may improve metabolism and aid in weight loss.'
]

In [3]:
# Build Vector Store
embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
vectorstore = FAISS.from_texts(docs, embeddings)

  embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')


In [4]:
# Configure Retriever (MMR)
retriever = vectorstore.as_retriever(
    search_type='mmr', # Enable MMR reranking
    search_kwargs={
        'k':3, 
        'fetch_k': 10, 
        'lambda_mult': 0.7
    }
)

Prompt for Answer Generation

In [5]:
prompt_answer = PromptTemplate.from_template(
    '''
    Use the following context to answer the question:

    Context: {context}

    Question: {question}

    Answer:
    '''
)

Prompt for Document Ranking

In [6]:
prompt_rank = PromptTemplate.from_template(
    """
    You are a helpful assistant. Your task is to rank the following documents 
    from most to least relevant to the user's question.
    
    User Question: '{question}'
    
    Documents:
    {documents}
    
    Instructions:
    - Think carefully about the relevance of each document to the user's question.
    - Return a list of document indices in ranked order, starting from the most relevant.
    
    Output format:
    Comma-separated document indices (e.g., 2, 3, 0, ...)
    """
)

LLM + Parser Chains

In [7]:
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

llm = ChatOpenAI(model='gpt-4', temperature=0)

# Answer generation chain
rag_chain_answer = prompt_answer | llm | StrOutputParser()

# Document ranking chain
rag_chain_rank = prompt_rank | llm | StrOutputParser()

Run Query

In [8]:
query = 'What are the health benefits of green tea?'

# Retrieve docs with MMR
retrieved_docs = retriever.get_relevant_documents(query)

# Prepare context for answer chain
context = '\n'.join([doc.page_content for doc in retrieved_docs])

# Prepare documents for ranking chain
documents = [doc.page_content for doc in retrieved_docs]

# Run answer generation
answer = rag_chain_answer.invoke({
    'question': query,
    'context': context
})

# Run ranking
ranking = rag_chain_rank.invoke({
    'question': query,
    'documents': documents
})

print(f'Final Answer: {answer}\n')
print(f'\nRanked Document Indices: {ranking}\n')

  retrieved_docs = retriever.get_relevant_documents(query)


Final Answer: Green tea contains antioxidants that improve health and may improve metabolism and aid in weight loss.


Ranked Document Indices: 0, 1, 2



In [9]:
#  Display ranked documents

# Ranking result is a string of indices, e.g. "0, 2, 1"
# Parse into integers
ranked_indices = [
    int(i.strip())
    for i in ranking.split(',') 
    if i.strip().isdigit()
]

print('Ranked Documents:\n')

#filter out any invalid indices in one step
for idx in ranked_indices:
    if idx < len(documents):
        print(f'Doc {idx}: {documents[idx]}')

Ranked Documents:

Doc 0: Green tea contains antioxidants that improve health.
Doc 1: Green tea may improve metabolism and aid in weight loss.
Doc 2: Black tea is rich in caffeine and boosts energy.


In [10]:
#  Focused answer using top-ranked document

# Parse the ranking string into a list of valid indices
ranked_indices = [
    int(i.strip()) 
    for i in ranking.split(",") 
    if i.strip().isdigit() and int(i.strip()) < len(documents)
]

# If we have at least one ranked index, take the top one
if ranked_indices:
    top_idx = ranked_indices[0]
    top_doc = documents[top_idx]

    # Run the answer chain using only the top-ranked document
    focused_answer = rag_chain_answer.invoke({
        "question": query,
        "context": top_doc
    })

    print("Focused Answer (Top-Ranked Document Only):\n")
    print(focused_answer)
else:
    print("No valid ranked indices returned by the LLM.")


Focused Answer (Top-Ranked Document Only):

The context mentions that green tea improves health through its antioxidant content.


In [11]:
# -----------------------------
# Cell: Compare broad vs focused answers
# -----------------------------

# Broad answer: uses all retrieved docs
broad_answer = rag_chain_answer.invoke({
    "question": query,
    "context": context
})

# Focused answer: uses only the top-ranked doc
ranked_indices = [
    int(i.strip()) 
    for i in ranking.split(",") 
    if i.strip().isdigit() and int(i.strip()) < len(documents)
]

focused_answer = None
if ranked_indices:
    top_idx = ranked_indices[0]
    top_doc = documents[top_idx]
    focused_answer = rag_chain_answer.invoke({
        "question": query,
        "context": top_doc
    })

# Print side-by-side comparison
print("=== Broad Answer (All Retrieved Docs) ===\n")
print(broad_answer)

print("\n=== Focused Answer (Top-Ranked Doc Only) ===\n")
print(focused_answer if focused_answer else "No valid ranked indices returned.")


=== Broad Answer (All Retrieved Docs) ===

Green tea contains antioxidants that improve health and may improve metabolism and aid in weight loss.

=== Focused Answer (Top-Ranked Doc Only) ===

The context mentions that green tea improves health due to its antioxidant content. However, specific health benefits are not provided in the context.
