In [45]:
import os
import sys
from dotenv import load_dotenv
from langchain.docstore.document import Document
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from typing import List
from rank_bm25 import BM25Okapi
import numpy as np
from langchain.llms import Ollama 

load_dotenv()
from helper_functions import replace_t_with_space, show_context

# Set the OpenAI API key environment variable
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')


In [46]:
path = "Aka Book.pdf"

In [47]:
vector_store_path = os.path.join(os.getcwd(), "vector_stores")

# Paths to the saved FAISS vector stores

detailed_store_path = os.path.join(vector_store_path, "detailed_store")

# Initialize OpenAI embeddings (ensure your API key is set)
embeddings = OpenAIEmbeddings()

# Load the stored vector databases
detailed_store = FAISS.load_local(detailed_store_path, embeddings, allow_dangerous_deserialization=True)

In [48]:
def create_bm25_index(documents: List[Document]) -> BM25Okapi:
    """
    Create a BM25 index from the given documents.

    BM25 (Best Matching 25) is a ranking function used in information retrieval.
    It's based on the probabilistic retrieval framework and is an improvement over TF-IDF.

    Args:
    documents (List[Document]): List of documents to index.

    Returns:
    BM25Okapi: An index that can be used for BM25 scoring.
    """
    # Tokenize each document by splitting on whitespace
    # This is a simple approach and could be improved with more sophisticated tokenization
    tokenized_docs = [doc.page_content.split() for doc in documents]
    return BM25Okapi(tokenized_docs)

In [49]:
# Retrieve all documents from the detailed vector store
all_docs = detailed_store.similarity_search("", k=detailed_store.index.ntotal)
bm25 = create_bm25_index(all_docs)

In [50]:
def fusion_retrieval(vectorstore, bm25, query: str, k: int = 5, alpha: float = 0.5) -> List[Document]:
    """
    Perform fusion retrieval combining keyword-based (BM25) and vector-based search.

    Args:
    vectorstore (VectorStore): The vectorstore containing the documents.
    bm25 (BM25Okapi): Pre-computed BM25 index.
    query (str): The query string.
    k (int): The number of documents to retrieve.
    alpha (float): The weight for vector search scores (1-alpha will be the weight for BM25 scores).

    Returns:
    List[Document]: The top k documents based on the combined scores.
    """
    
    epsilon = 1e-8

    # Step 1: Get all documents from the vectorstore
    all_docs = vectorstore.similarity_search("", k=vectorstore.index.ntotal)

    # Step 2: Perform BM25 search
    bm25_scores = bm25.get_scores(query.split())

    # Step 3: Perform vector search
    vector_results = vectorstore.similarity_search_with_score(query, k=len(all_docs))
    
    # Step 4: Normalize scores
    vector_scores = np.array([score for _, score in vector_results])
    vector_scores = 1 - (vector_scores - np.min(vector_scores)) / (np.max(vector_scores) - np.min(vector_scores) + epsilon)

    bm25_scores = (bm25_scores - np.min(bm25_scores)) / (np.max(bm25_scores) -  np.min(bm25_scores) + epsilon)

    # Step 5: Combine scores
    combined_scores = alpha * vector_scores + (1 - alpha) * bm25_scores  

    # Step 6: Rank documents
    sorted_indices = np.argsort(combined_scores)[::-1]
    
    # Step 7: Return top k documents
    return [all_docs[i] for i in sorted_indices[:k]]

In [51]:
class FusionRAG:
    def __init__(self, detailed_store, bm25, k: int = 5, alpha: float = 0.5):
        """
        Initializes the FusionRAG with a detailed vector store and a BM25 index.
        
        Args:
            detailed_store: A FAISS vector store containing detailed embeddings.
            bm25: A BM25 index built from the same documents.
            k: The number of top documents to retrieve.
            alpha: The weight for the vector search scores in the fusion (1-alpha is the BM25 weight).
        """
        self.detailed_store = detailed_store
        self.bm25 = bm25
        self.k = k
        self.alpha = alpha

        # Initialize the LLM (using GPT-3.5 Turbo here)
        self.llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=4000)

        # Create the prompt template for answering the query based on the retrieved context.
        prompt_template = (
            "Use the following pieces of context to answer the question at the end. \n"
            "If you don't know the answer, just say that you don't know—don't try to make up an answer.\n\n"
            "{context}\n\n"
            "Question: {question}\n"
            "Answer:"
        )
        self.prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
        self.llm_chain = self.prompt | self.llm

    def answer(self, query: str) -> str:
        """
        Answers a query by performing fusion retrieval on the detailed store and BM25 index,
        then uses an LLM to generate the final answer.
        
        Args:
            query (str): The query string.
            
        Returns:
            str: The LLM-generated answer.
        """
        # Retrieve documents using the fusion retrieval method.
        results = fusion_retrieval(self.detailed_store, self.bm25, query, k=self.k, alpha=self.alpha)
        
        # Optional: Print retrieved chunks for inspection.
        for chunk in results:
            page_info = chunk.metadata.get("page", "Unknown")
            print(f"Page: {page_info}")
            print(f"Content: {chunk.page_content[:100]}...")  # First 100 characters for brevity
            print("---")
        
        # Combine retrieved context into one string; fallback if none is found.
        if results:
            context = "\n".join([chunk.page_content for chunk in results])
        else:
            context = "No relevant context found."
        
        # Prepare input data and generate the answer.
        input_data = {"context": context, "question": query}
        answer = self.llm_chain.invoke(input_data)
        return answer.content

In [52]:
query = "What is the optimal roasting time for cocoa?"
fusion_rag = FusionRAG(detailed_store, bm25, k=5, alpha=0.5)
final_answer = fusion_rag.answer(query)
print("Final Answer:")
print(final_answer)

Page: 734
Content: minimum level of protection is defined by the TRIPS agreement (Agreement on 
Trade‐related Aspects o...
---
Page: 95
Content: 56   Chapter 3
Ziegleder and Oberparleiter (1996) have proposed a moisture treatment prior 
to roast...
---
Page: 734
Content: includes literary and artistic works. Intellectual property is an asset and can be 
bought, sold or ...
---
Page: 615
Content: shows schematically the light source and the line camera. An incremental 
encoder on the fifth roll ...
---
Page: 554
Content: Additionally, it is important to consider the amount of time given for rinsing 
between samples. Whe...
---
Final Answer:
The optimal roasting time for cocoa is 10-15 minutes at 40-60 °C (104-140 °F) after a moisture treatment with steam.


In [53]:

class FusionRAG_Mistral:
    def __init__(self, detailed_store, bm25, k: int = 5, alpha: float = 0.5):
        """
        Initializes the FusionRAG with a detailed vector store and a BM25 index,
        using Ollama's Mistral model.

        Args:
            detailed_store: A FAISS vector store containing detailed embeddings.
            bm25: A BM25 index built from the same documents.
            k: The number of top documents to retrieve.
            alpha: The weight for the vector search scores in the fusion (1-alpha is the BM25 weight).
        """
        self.detailed_store = detailed_store
        self.bm25 = bm25
        self.k = k
        self.alpha = alpha

        # Initialize the LLM using Ollama's Mistral model.
        # Note: max_tokens parameter is removed because it's not allowed.
        self.llm = Ollama(temperature=0, model="mistral")

        # Create the prompt template for answering the query based on the retrieved context.
        prompt_template = (
            "Use the following pieces of context to answer the question at the end. \n"
            "If you don't know the answer, just say that you don't know—don't try to make up an answer.\n\n"
            "{context}\n\n"
            "Question: {question}\n"
            "Answer:"
        )
        self.prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
        self.llm_chain = self.prompt | self.llm

    def answer(self, query: str) -> str:
        """
        Answers a query by performing fusion retrieval on the detailed store and BM25 index,
        then uses the LLM to generate the final answer.

        Args:
            query (str): The query string.

        Returns:
            str: The LLM-generated answer.
        """
        # Retrieve documents using the fusion retrieval method.
        results = fusion_retrieval(self.detailed_store, self.bm25, query, k=self.k, alpha=self.alpha)
        
        # Optional: Print retrieved chunks for inspection.
        for chunk in results:
            page_info = chunk.metadata.get("page", "Unknown")
            print(f"Page: {page_info}")
            print(f"Content: {chunk.page_content[:100]}...")  # First 100 characters for brevity
            print("---")
        
        # Combine retrieved context into one string; fallback if none is found.
        if results:
            context = "\n".join([chunk.page_content for chunk in results])
        else:
            context = "No relevant context found."
        
        # Prepare input data and generate the answer.
        input_data = {"context": context, "question": query}
        answer = self.llm_chain.invoke(input_data)
        return answer  # answer is returned as a string





In [54]:
query = "What is the optimal roasting time for cocoa?"
fusion_rag_mistral = FusionRAG_Mistral(detailed_store, bm25, k=5, alpha=0.5)
final_answer = fusion_rag_mistral.answer(query)
print("Final Answer:")
print(final_answer)


Page: 734
Content: minimum level of protection is defined by the TRIPS agreement (Agreement on 
Trade‐related Aspects o...
---
Page: 95
Content: 56   Chapter 3
Ziegleder and Oberparleiter (1996) have proposed a moisture treatment prior 
to roast...
---
Page: 734
Content: includes literary and artistic works. Intellectual property is an asset and can be 
bought, sold or ...
---
Page: 615
Content: shows schematically the light source and the line camera. An incremental 
encoder on the fifth roll ...
---
Page: 554
Content: Additionally, it is important to consider the amount of time given for rinsing 
between samples. Whe...
---
Final Answer:
 The text does not provide specific information about the optimal roasting time for cocoa beans. However, it mentions that the highest roasting temperature depends upon the required roast intensity and the equipment used, and that a slow reduction in moisture content to about 3% followed by a rapid heating to the final roast temperature is the optima