# **Fusion RAG**

## **1. Installation**

In [1]:
!pip install -qU langchain langchain-openai langchain-community faiss-cpu sentence-transformers langchain-groq

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.0/69.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m93.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m75.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import os
import getpass
from langchain.docstore.document import Document
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS

# --- SETUP ---
os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your Groq API Key: ")

Enter your Groq API Key: ··········


## **1. CREATE A SAMPLE DOCUMENT STORE**

In [10]:
documents = [
    Document(page_content="RAG-Fusion combines generative AI with a fusion-based retrieval process to improve answer quality."),
    Document(page_content="Reciprocal Rank Fusion (RRF) is an algorithm used to combine multiple ranked lists into a single, more robust list."),
    Document(page_content="Multi-query retrieval involves generating several variations of a user's query to broaden the search scope."),
    Document(page_content="For complex questions, breaking them down into sub-queries can yield more accurate results from a vector database."),
    Document(page_content="Vector search finds documents based on semantic similarity rather than exact keyword matches."),
    Document(page_content="The final step in RAG is generation, where an LLM synthesizes an answer from the retrieved context."),
    Document(page_content="Advanced RAG techniques often involve query transformations to better match the user's intent with the stored data."),
]

## **2. USE A FREE, LOCAL EMBEDDING MODEL**

In [15]:
from langchain_community.embeddings import HuggingFaceEmbeddings

print("Loading local HuggingFace embedding model...")
model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
print("Embedding model loaded.")

Loading local HuggingFace embedding model...
Embedding model loaded.


## **3. CREATE THE VECTOR STORE (FREE)**

In [16]:
print("Creating vector store using local embeddings...")
vector_store = FAISS.from_documents(documents, embeddings)
retriever = vector_store.as_retriever()
print("Vector store created successfully.")

Creating vector store using local embeddings...
Vector store created successfully.


## **4. RAG-FUSION LOGIC WITH A FREE GENERATION MODEL**

In [17]:
# from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq

# Use Groq's Llama 3 model for all text generation tasks (query generation and final answer)
llm = ChatGroq(temperature=0, model_name="llama3-8b-8192")

query_gen_template = """
You are a helpful assistant that generates multiple search queries based on a single input query.
Generate {num_queries} search queries, one on each line, related to this input: {original_query}
"""
query_gen_prompt = ChatPromptTemplate.from_template(query_gen_template)
query_generator = (
    query_gen_prompt
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

def reciprocal_rank_fusion(retrieved_lists, k=60):
    fused_scores = {}
    for doc_list in retrieved_lists:
        for rank, doc in enumerate(doc_list):
            doc_str = doc.page_content
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            fused_scores[doc_str] += 1 / (k + rank)
    reranked_results = [
        (Document(page_content=doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

def rag_fusion_retrieval(original_query, retriever, num_queries=4):
    print(f"\nOriginal Query: {original_query}")
    generated_queries = query_generator.invoke({"original_query": original_query, "num_queries": num_queries})
    print(f"Generated Queries: {generated_queries}")
    all_retrieved_docs = [retriever.get_relevant_documents(q) for q in generated_queries]
    final_ranked_docs = reciprocal_rank_fusion(all_retrieved_docs)
    return final_ranked_docs

## **5. EXECUTION AND FINAL ANSWER GENERATION (NOW FULLY FREE)**

In [18]:
original_query = "how does RAG-Fusion work?"
fused_documents_with_scores = rag_fusion_retrieval(original_query, retriever)

print("\n--- Top Fused Documents ---")
for doc, score in fused_documents_with_scores[:4]:
    print(f"Score: {score:.4f}\tContent: {doc.page_content}")

final_context = "\n".join([doc.page_content for doc, _ in fused_documents_with_scores[:4]])
generation_prompt_template = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.

Question: {question}
Context: {context}
Answer:
"""
generation_prompt = ChatPromptTemplate.from_template(generation_prompt_template)
final_chain = generation_prompt | llm | StrOutputParser()

final_answer = final_chain.invoke({
    "context": final_context,
    "question": original_query
})

print("\n--- Final Generated Answer ---")
print(final_answer)


Original Query: how does RAG-Fusion work?
Generated Queries: ['Here are 4 search queries related to "how does RAG-Fusion work?":', '', '1. "RAG-Fusion mechanism of action"', '2. "How does RAG-Fusion therapy work for cancer treatment"', '3. "RAG-Fusion gene editing technique explanation"', '4. "RAG-Fusion protein fusion mechanism and applications"', '', 'Let me know if you need anything else!']

--- Top Fused Documents ---
Score: 0.1304	Content: RAG-Fusion combines generative AI with a fusion-based retrieval process to improve answer quality.
Score: 0.1158	Content: The final step in RAG is generation, where an LLM synthesizes an answer from the retrieved context.
Score: 0.0973	Content: Advanced RAG techniques often involve query transformations to better match the user's intent with the stored data.
Score: 0.0799	Content: Reciprocal Rank Fusion (RRF) is an algorithm used to combine multiple ranked lists into a single, more robust list.

--- Final Generated Answer ---
Based on the provi