# Assignment 4: ReAct with Retrieval Tools for Solving Multi-hop Questions

#NOTE : DO NOT FORGET TO ADD YOUR API KEY
#1. Setting Up the Knowledge Base
Loading Documents
The system starts by loading documents (your "knowledge base") from JSON files:

In [1]:
!pip install sentence_transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence_transformers)
 

In [2]:
import json
# -----------------------------
# 1. Dataset Loading
# -----------------------------
def load_corpus(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    docs = []
    for entry in data:
        text = entry.get("body", "").strip()
        if text:
            docs.append(text)
    return docs

def load_queries(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        queries = json.load(f)
    return queries[:15]

In [3]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


# 2. The Retrieval Tools

In [4]:
# -----------------------------
# 2. Retrieval Tool Implementations
# -----------------------------
# from rank_bm25 import BM25Okapi
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import math


class BM25Retriever:
    def __init__(self, documents):
        """
        BM25Retriever: A traditional lexical retrieval model based on BM25.
        It ranks documents based on term frequency and inverse document frequency (TF-IDF).
        Useful for keyword-based queries.
        """

        self.documents = documents
        self.term_freqs = []
        self.doc_lengths = []
        self.df = {}
        self.avgdl = 0.0

        # Preprocess documents
        for doc in documents:
            tokens = doc.split()
            self.doc_lengths.append(len(tokens))
            tf = {}
            for term in tokens:
                tf[term] = tf.get(term, 0) + 1
            self.term_freqs.append(tf)
            for term in tf:
                self.df[term] = self.df.get(term, 0) + 1

        # Calculate average document length
        self.avgdl = np.mean(self.doc_lengths) if documents else 0


    def retrieve(self, query, top_k=3):
        """
        Retrieves the top_k most relevant documents based on BM25 scoring.
        """

        scores = []
        query_terms = query.split()
        k1 = 1.2
        b = 0.75

        for idx, doc in enumerate(self.documents):
            score = 0
            doc_len = self.doc_lengths[idx]
            tf = self.term_freqs[idx]

            for term in query_terms:
                if term not in tf:
                    continue
                idf = math.log((len(self.documents) - self.df.get(term, 0) + 0.5) /
                              (self.df.get(term, 0) + 0.5) + 1)
                tf_term = tf[term]
                score += idf * (tf_term * (k1 + 1)) / (tf_term + k1 * (1 - b + b * doc_len / self.avgdl))

            scores.append((score, idx))

        scores.sort(reverse=True)
        return [self.documents[i] for _, i in scores[:top_k]]


class SemanticRetriever:
    def __init__(self, documents, model_name="sentence-transformers/all-MiniLM-L6-v2"):
        """
        SemanticRetriever: Uses transformer-based embeddings to retrieve documents based on semantic similarity.
        Embeds documents using a pre-trained model and ranks them using cosine similarity.
        """

        self.model = SentenceTransformer(model_name)
        self.doc_embeddings = self.model.encode(documents)
        self.documents = documents


    def retrieve(self, query, top_k=3):
        """
        Retrieves the top_k most relevant documents using cosine similarity.
        """

        query_embedding = self.model.encode([query])
        similarities = cosine_similarity(query_embedding, self.doc_embeddings)[0]
        indices = np.argsort(similarities)[-top_k:][::-1]
        return [self.documents[i] for i in indices]



class HybridRetriever:
    def __init__(self, documents, bm25_weight=0.5, semantic_weight=0.5, model_name="sentence-transformers/all-MiniLM-L6-v2"):
        """
        HybridRetriever: Combines BM25 and Semantic Retrieval.
        Uses a weighted combination of both scores to retrieve documents.
        """

        self.bm25 = BM25Retriever(documents)
        self.semantic = SemanticRetriever(documents, model_name)
        self.weights = [bm25_weight, semantic_weight]


    def retrieve(self, query, top_k=3):
        """
        Retrieves the top_k most relevant documents using a combination of BM25 and semantic scores.
        """

        bm25_results = self.bm25.retrieve(query, top_k*2)
        semantic_results = self.semantic.retrieve(query, top_k*2)

        combined = {}
        for i, doc in enumerate(bm25_results):
            combined[doc] = combined.get(doc, 0) + self.weights[0] * (len(bm25_results)-i)
        for i, doc in enumerate(semantic_results):
            combined[doc] = combined.get(doc, 0) + self.weights[1] * (len(semantic_results)-i)

        sorted_docs = sorted(combined.items(), key=lambda x: x[1], reverse=True)
        return [doc for doc, _ in sorted_docs[:top_k]]


In [6]:
# Assume openai>=1.0.0
from openai import OpenAI


def query_llm_api(prompt, base_url, api_key):
    """
    Generic LLM API caller.

    Parameters:
        prompt: A string prompt to send to the LLM.
        base_url: The base URL of the LLM provider's API.
        api_key: Your API key.

    Returns:
        A string response from the LLM.
    """
    # Create an OpenAI client with your deepinfra token and endpoint
    openai = OpenAI(
        api_key="add your api key here",
        base_url="https://api.deepinfra.com/v1/openai",
    )

    chat_completion = openai.chat.completions.create(
        model="Qwen/Qwen2.5-7B-Instruct",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=1024
    )
    try:
        response = chat_completion.choices[0].message.content
        if response == "":
            return "No response from LLM."
        return response
    except Exception as e:
        return f"Exception during API call: {str(e)}"

    print(response)

In [7]:
# -----------------------------
# 4. ReAct Reasoning Chain (Max 5 Tool Calls)
# -----------------------------
def react_chain(query, retrieval_tool_obj, base_url, api_key, max_steps=5):
    """
    ReAct reasoning chain that produces a final answer.

    Parameters:
        query: The query string.
        retrieval_tool_obj: An instance of one of the retrieval tool classes.
        base_url: The LLM provider base URL.
        api_key: The API key for the LLM provider.
        max_steps: Maximum number of iterations (tool calls) allowed (max 5).

    Returns:
        pred_answer: a single word or a short phrase
        retrieved_evidence: a list of retrived evidences (list(str))
    """
    context = []
    retrieved_evidence = []

    for step_count in range(max_steps):
        prompt = f"""Answer the question: {query}
        Context: {context[-3:] if context else 'None'}
        Choose one:
        Action 1: Retrieve more information
        Action 2: Give final answer"""

        response = query_llm_api(prompt, base_url, api_key).strip()
        print(response)

        if "Action 2" in response or step_count == max_steps - 1: #or"final answer" in response.lower()
            answer_prompt = f"""Answer the question based on the context.
            Question: {query}
            Context: {context}
            Answer in less than 3 words:"""
            print(answer_prompt)

            final_answer = query_llm_api(answer_prompt, base_url, api_key).split("\n")[0].strip()
            return final_answer, retrieved_evidence[:3]
        else:
            docs = retrieval_tool_obj.retrieve(query)
            retrieved_evidence.extend(docs)
            context.extend(docs)

    return response, retrieved_evidence[:3]


In [8]:
# -----------------------------
# 5. Evaluation Function with Retrieval Metrics and Exact Match
# -----------------------------
import re

def normalize_text(text):
    text = re.sub(r'\s+', ' ', text)  # Merge whitespace
    return text.lower().strip()

def evaluate_system(queries, retrieval_tool_obj, base_url, api_key,react_chain):
    """
    Evaluate system performance on a set of queries

    Calculates three key metrics:
    - Recall: Proportion of gold evidence found in retrieval results
    - MRR (Mean Reciprocal Rank): Average reciprocal of the first correct answer's rank
    - EM (Exact Match): Proportion of final answers that exactly match the gold standard
    """
    eval_results = []
    total_recall = 0
    total_mrr = 0
    total_em = 0

    for q in queries:
        # step 1: get predicted answer and retrieved evidence
        pred_answer, retrieved_evidence = react_chain(q["query"], retrieval_tool_obj, base_url, api_key)

        # step 2: prepare data for evaluation
        gold_answer = normalize_text(q["answer"])
        pred_answer = normalize_text(pred_answer)
        gold_evidence = [normalize_text(e["fact"]) for e in q["evidence_list"]]
        retrieved_evidence = [normalize_text(e) for e in retrieved_evidence]

        # step 3: metrics
        ## step 3.1: recall
        found_evidence = 0
        for gold in gold_evidence:
            if any(gold in retrieved for retrieved in retrieved_evidence):
                found_evidence += 1
        recall = found_evidence / len(gold_evidence) if gold_evidence else 0

        ## step 3.2: mrr
        mrr = 0
        for rank, retrieved in enumerate(retrieved_evidence, 1):
            if any(gold in retrieved for gold in gold_evidence):
                mrr = 1 / rank
                break

        ## step 3.3: exact match
        em = 1 if gold_answer == pred_answer else 0

        total_recall += recall
        total_mrr += mrr
        total_em += em

        # step 4: aggregate results
        result = {
            "query": q["query"],
            "gold_answer": gold_answer,
            "pred_answer": pred_answer,
            "recall": recall,
            "mrr": mrr,
            "em": em,
            # "retrieved_evidence": retrieved_evidence
        }
        eval_results.append(result)

    num_queries = len(queries)
    overall_metrics = {
        "avg_recall": total_recall / num_queries,
        "avg_mrr": total_mrr / num_queries,
        "avg_em": total_em / num_queries
    }

    return eval_results, overall_metrics

In [25]:
# Config
base_url = "https://api.deepinfra.com/v1/openai"
api_key = "add your api key here"

# Load dataset
documents = load_corpus("corpus-1.json")
queries = load_queries("/content/MultiHopRAG-1.json")


retriever = "hybrid" # choices ["bm25", "semantic", "hybrid"]
if retriever == "bm25":
    retrieval_tool = BM25Retriever(documents)
elif retriever == "semantic":
    retrieval_tool = SemanticRetriever(documents)
elif retriever == "hybrid":
    retrieval_tool = HybridRetriever(documents)
else:
    raise ValueError("Unsupported retriever type.")

# Evaluate the system on all queries using the chosen retrieval tool
eval_results, overall_metrics = evaluate_system(queries, retrieval_tool, base_url, api_key,react_chain)



Action 1: Retrieve more information

To provide a final answer, I need more specific details about the individual in question, as there have been multiple high-profile cases of cryptocurrency fraud in recent years. Without a specific name or more context, I cannot accurately identify the individual you are referring to. I will check recent news reports from The Verge and TechCrunch to find the most relevant information.

After reviewing recent news, I found that the individual you are asking about is likely William Martinez, also known as "worym." Martinez is facing criminal charges of conspiracy to commit wire fraud and wire fraud in connection with allegedly running a pump and dump scheme on the cryptocurrency market. Both The Verge and TechCrunch have reported on this case.
Action 2: Give final answer

The individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, is Sam Bankman-Fried.

Hybrid

In [26]:
overall_metrics

{'avg_recall': 0.3555555555555555,
 'avg_mrr': 0.4222222222222222,
 'avg_em': 0.2}

In [27]:
eval_results

[{'query': 'Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?',
  'gold_answer': 'sam bankman-fried',
  'pred_answer': 'sam bankman-fried',
  'recall': 0.3333333333333333,
  'mrr': 1.0,
  'em': 1},
 {'query': "Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?",
  'gold_answer': 'donald trump',
  'pred_answer': 'donald trump',
  'recall': 1.0,
  'mrr': 1.0,
  'em': 1},
 {'query': 'Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a pre

Semantic

In [23]:
overall_metrics

{'avg_recall': 0.2, 'avg_mrr': 0.3, 'avg_em': 0.4}

In [24]:
eval_results

[{'query': 'Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?',
  'gold_answer': 'sam bankman-fried',
  'pred_answer': 'sam bankman-fried',
  'recall': 0.0,
  'mrr': 0,
  'em': 1},
 {'query': "Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?",
  'gold_answer': 'donald trump',
  'pred_answer': 'donald trump',
  'recall': 1.0,
  'mrr': 1.0,
  'em': 1},
 {'query': 'Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a prevailing theory su

BM25 Overalls

In [20]:
overall_metrics

{'avg_recall': 0.45555555555555555,
 'avg_mrr': 0.5555555555555555,
 'avg_em': 0.26666666666666666}

In [21]:
eval_results

[{'query': 'Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?',
  'gold_answer': 'sam bankman-fried',
  'pred_answer': 'sam bankman-fried',
  'recall': 0.6666666666666666,
  'mrr': 1.0,
  'em': 1},
 {'query': "Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?",
  'gold_answer': 'donald trump',
  'pred_answer': 'donald trump',
  'recall': 1.0,
  'mrr': 1.0,
  'em': 1},
 {'query': 'Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a pre

Document Chunking:

Breaks long documents into smaller, overlapping chunks:
* It uses a sliding window approach with a specified chunk size and overlap.

* This technique helps handle very long documents by creating more manageable pieces.

* Overlapping ensures important context isn't lost between chunks.

Multi-Model Embedding:

Allows the use of multiple embedding models:

* It can load different pre-trained sentence transformer models.

* This flexibility allows for specialized embeddings based on query types or document characteristics.

* The embed method converts text into numerical vectors (embeddings) using the specified model.

In [9]:
import json
import math
import numpy as np
import nltk
from nltk.corpus import wordnet
from sentence_transformers import SentenceTransformer, CrossEncoder
from sklearn.metrics.pairwise import cosine_similarity
import requests
from typing import List, Dict

nltk.download('wordnet')

# -----------------------------
# Enhanced Indexing Techniques
# -----------------------------

class DocumentChunker:
    """Sliding window chunking with overlap for long documents"""
    def __init__(self, chunk_size=512, overlap=0.2):
        self.chunk_size = chunk_size
        self.overlap = overlap

    def chunk(self, documents: List[str]) -> List[str]:
        chunks = []
        for doc in documents:
            words = doc.split()
            stride = int(self.chunk_size * (1 - self.overlap))
            for i in range(0, len(words), stride):
                chunk = ' '.join(words[i:i+self.chunk_size])
                if i + self.chunk_size > len(words):
                    break
                chunks.append(chunk)
        return chunks

class MultiModelEmbedder:
    """Multiple embedding models for different query types"""
    def __init__(self, models: List[str] = ["sentence-transformers/all-MiniLM-L6-v2",
                                          "sentence-transformers/multi-qa-mpnet-base-dot-v1"]):
        self.models = [SentenceTransformer(model) for model in models]

    def embed(self, text: str, model_index=0) -> np.ndarray:
        return self.models[model_index].encode(text)


[nltk_data] Downloading package wordnet to /root/nltk_data...


Query Routing:

* Categorizes queries as "keyword(BM25)", "semantic", or "hybrid"

* Uses predefined sets of indicator words to determine query type

* Helps select the most appropriate retrieval strategy

QueryRewriter:

* Uses an LLM to rephrase queries for better retrieval

QueryExpander:

* Expands queries using WordNet synonyms and LLM suggestions

* WordNet expansion finds synonyms for each query word

* LLM expansion generates query variations

* Combines both methods to create a diverse set of expanded queries

In [10]:

# -----------------------------
# Enhanced Pre-Retrieval Techniques
# -----------------------------

class QueryRoutingClassifier:
    """Classify query type to choose retrieval strategy"""
    def __init__(self):
        self.keyword_indicators = {'who', 'what', 'when', 'where', 'named entity'}
        self.semantic_indicators = {'why', 'how', 'explain', 'describe'}

    def classify(self, query: str) -> str:
        query_lower = query.lower()
        if any(indicator in query_lower for indicator in self.keyword_indicators):
            return "keyword"
        elif any(indicator in query_lower for indicator in self.semantic_indicators):
            return "semantic"
        return "hybrid"

class QueryRewriter:
    """LLM-based query reformulation"""
    def __init__(self, base_url, api_key):
        self.base_url = base_url
        self.api_key = api_key

    def rewrite(self, query: str) -> str:
        prompt = f"Rephrase this query for better document retrieval: {query}"
        return query_llm_api(prompt, self.base_url, self.api_key)

class QueryExpander:
    """Synonym expansion using WordNet and LLM suggestions"""
    def __init__(self, base_url=None, api_key=None):
        self.base_url = base_url
        self.api_key = api_key

    def expand(self, query: str) -> List[str]:
        # WordNet expansion
        synonyms = set()
        for word in query.split():
            for syn in wordnet.synsets(word):
                for lemma in syn.lemmas():
                    synonyms.add(lemma.name())

        # LLM-based expansion
        if self.base_url and self.api_key:
            prompt = f"Generate 3 search query variations for: {query}"
            variations = query_llm_api(prompt, self.base_url, self.api_key).split('\n')
            synonyms.update(variations)

        return list(synonyms)[:5]  # Return top 5 expansions

Enhanced BM25 Retriever: (Uses document chunking)

* Uses document chunking to handle long documents more effectively

* Applies the BM25 algorithm to these chunks instead of whole documents

* This can improve retrieval for specific parts of long documents

Neural Retriever: (Uses Multimodal embeddings)

An advanced version of the SemanticRetriever

* Uses multiple embedding models for different types of queries

* Stores document embeddings for quick retrieval

* Retrieval process:

 * Embeds the query using the specified model

 * Calculates cosine similarity between query and document embeddings

 * Returns top-k most similar documents

In [11]:
# -----------------------------
# Enhanced Retrieval Implementations
# -----------------------------

class EnhancedBM25Retriever(BM25Retriever):
    """BM25 with chunked documents and query expansion"""
    def __init__(self, documents, chunker=DocumentChunker()):
        self.chunker = chunker
        self.chunks = chunker.chunk(documents)
        super().__init__(self.chunks)

class NeuralRetriever(SemanticRetriever):
    """Semantic retriever with multiple embedding models"""
    def __init__(self, documents, embedder=MultiModelEmbedder()):
        self.embedder = embedder
        self.documents = documents
        self.doc_embeddings = self.embedder.embed(documents)

    def retrieve(self, query: str, top_k=3, model_index=0) -> List[str]:
        query_embedding = self.embedder.embed(query, model_index)
        similarities = cosine_similarity([query_embedding], self.doc_embeddings)[0]
        return [self.documents[i] for i in np.argsort(similarities)[-top_k:][::-1]]


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.71k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Cross Encoder Reranker:

* Uses a cross-encoder model to re-rank retrieved documents

* Pairs query with each document for more accurate relevance scoring

* Sorts documents based on predicted relevance scores

Summary Augmenter:

* Generates concise summaries of retrieved documents

* Helps distill important information from longer texts

Reciprocal Rank Fuser:

* Combines results from multiple retrieval methods

* Uses reciprocal rank fusion algorithm

* Assigns higher scores to documents ranked highly across methods

* Produces a unified, re-ranked list of results

In [14]:
# -----------------------------
# Enhanced Post-Retrieval Techniques
# -----------------------------

class CrossEncoderReranker:
    """Re-rank results using cross-encoder model"""
    def __init__(self, model_name="cross-encoder/ms-marco-MiniLM-L-6-v2"):
        self.model = CrossEncoder(model_name)

    def rerank(self, query: str, documents: List[str]) -> List[str]:
        pairs = [[query, doc] for doc in documents]
        scores = self.model.predict(pairs)
        return [doc for _, doc in sorted(zip(scores, documents), reverse=True)]

class SummaryAugmenter:
    """Generate summaries of retrieved documents"""
    def __init__(self, base_url, api_key):
        self.base_url = base_url
        self.api_key = api_key

    def summarize(self, documents: List[str]) -> str:
        prompt = "Summarize these documents into key points:\n" + "\n".join(documents)
        return query_llm_api(prompt, self.base_url, self.api_key)

class ReciprocalRankFuser:
    """Combine multiple retrieval results using reciprocal rank fusion"""
    @staticmethod
    def fuse(results_list: List[List[str]], k=60) -> List[str]:
        fused_scores = {}
        for results in results_list:
            for rank, doc in enumerate(results, 1):
                fused_scores[doc] = fused_scores.get(doc, 0) + 1/(rank + k)
        return [doc for doc, _ in sorted(fused_scores.items(), key=lambda x: -x[1])]


Combines all the methods discussed above to get the results

In [25]:
# -----------------------------
# Enhanced ReAct Chain with Techniques
# -----------------------------

def enhanced_react_chain(query, retrieval_tools, base_url, api_key, max_steps=5):
    """Enhanced ReAct chain with multiple retrieval techniques"""
    context = []
    retrieved_evidence = []
    query_expander = QueryExpander(base_url, api_key)
    reranker = CrossEncoderReranker()
    summarizer = SummaryAugmenter(base_url, api_key)

    # Pre-retrieval processing
    expanded_queries = [query] + query_expander.expand(query)

    for step in range(max_steps):
        # Parallel retrieval with different techniques
        all_results = []
        for q in expanded_queries:
            all_results.extend(retrieval_tools['bm25'].retrieve(q, top_k=3))
            all_results.extend(retrieval_tools['semantic'].retrieve(q, top_k=3))

        # Post-retrieval processing
        unique_results = list(set(all_results))
        reranked = reranker.rerank(query, unique_results)[:5]
        summary = summarizer.summarize(reranked[:3])

        # Update context
        context.append(f"Step {step+1} Summary: {summary}")
        retrieved_evidence.extend(reranked)

        # Generate next action
        prompt = f"""Answer: {query}
        Context: {context}
        Should we (1) Continue retrieval or (2) Final answer?"""

        decision = query_llm_api(prompt, base_url, api_key)
        if "2" in decision or "answer" in decision.lower():
            line_break = "\n"
            answer_prompt = f"""Synthesize answer from context:
            Question: {query}
            Context: {line_break.join(context)}
            respond only with a name, 'yes,' 'no,' or 'insufficient information.' Prioritize providing a definitive answer whenever possible. Only use 'insufficient information' as a last resort when you are absolutely certain that no reasonable inference can be made
            Answer in less that 3 words:"""
            return query_llm_api(answer_prompt, base_url, api_key), retrieved_evidence

    return "Answer not found", retrieved_evidence



In [26]:
# -----------------------------
# Technique Comparison Evaluation
# -----------------------------

def compare_techniques(queries, base_url, api_key):
    """Compare performance of different technique combinations"""
    documents = load_corpus("corpus-1.json")

    # Initialize different configurations
    configs = {
        "baseline": {"chunking": False, "expansion": False, "reranking": False},
        "chunking": {"chunking": True, "expansion": False, "reranking": False},
        "full_pipeline": {"chunking": True, "expansion": True, "reranking": True}
    }

    results = {}
    for config_name, params in configs.items():
        # Preprocessing
        processed_docs = DocumentChunker().chunk(documents) if params['chunking'] else documents

        # Initialize retrievers
        bm25 = EnhancedBM25Retriever(processed_docs)
        semantic = NeuralRetriever(processed_docs)

        # Configure pipeline
        retrieval_tools = {
            'bm25': bm25,
            'semantic': semantic,
            'expander': QueryExpander(base_url, api_key) if params['expansion'] else None
        }

        # Run evaluation
        ans, metrics = evaluate_system(
            queries,
            retrieval_tools,
            base_url,
            api_key,
            react_chain=enhanced_react_chain
        )
        print(ans)
        results[config_name] = metrics

    # Print comparison
    print("Technique Comparison Results:")
    for config, metrics in results.items():
        print(f"\n{config}:")
        print(f"  Recall: {metrics['avg_recall']:.2f}")
        print(f"  MRR: {metrics['avg_mrr']:.2f}")
        print(f"  EM: {metrics['avg_em']:.2f}")

    return results

These methods takes time as the documents have to be chunked and each chunk is converted to embeddings using different models.

Further the query is expanded, similar ones are generated and multiple embeddings for the same is created using different models

Post retrieval we use the summariser and the ranking techniques on which the final answer is generated.

In [27]:
documents = load_corpus("corpus-1.json")
chunker = DocumentChunker()
processed_docs = chunker.chunk(documents)

bm25 = EnhancedBM25Retriever(processed_docs)
semantic = NeuralRetriever(processed_docs)
hybrid = HybridRetriever(processed_docs)

# Use enhanced ReAct chain
retrieval_tools = {'bm25': bm25, 'semantic': semantic}
queries = load_queries("/content/MultiHopRAG-1.json")

# Config
base_url = "https://api.deepinfra.com/v1/openai"
api_key = "pVqbCygRUXv8rNd3wotoyxVMPvLPgxiY"


# Compare different techniques
results = compare_techniques(queries, base_url, api_key)

[{'query': 'Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?', 'gold_answer': 'sam bankman-fried', 'pred_answer': 'sam bankman-fried', 'recall': 0.3333333333333333, 'mrr': 0.5, 'em': 1}, {'query': "Which individual is implicated in both inflating the value of a Manhattan apartment to a figure not yet achieved in New York City's real estate history, according to 'Fortune', and is also accused of adjusting this apartment's valuation to compensate for a loss in another asset's worth, as reported by 'The Age'?", 'gold_answer': 'donald trump', 'pred_answer': 'insufficient information', 'recall': 1.0, 'mrr': 1.0, 'em': 0}, {'query': 'Who is the figure associated with generative AI technology whose departure from OpenAI was considered shocking according to Fortune, and is also the subject of a prevailing th