# Setup

In [1]:
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from dotenv import load_dotenv
import os
import nest_asyncio

nest_asyncio.apply()

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

embed_model = OpenAIEmbedding(model_name="text-embedding-ada-002", api_key=OPENAI_API_KEY)
llm = OpenAI(api_key=OPENAI_API_KEY, model_name="gpt-4o-mini", temperature=0.1)

In [2]:
from vectorstore import get_vectorstore

vectorstore = get_vectorstore()

In [3]:
from index import get_index

index = get_index(embed_model=embed_model)

In [4]:
import json

def store_docs(docs, filename):
    docs_to_store = []

    for node in docs:
        docs_to_store.append({
            "text": node.text,
            "metadata": node.metadata,
            "score": node.score
        })

    with open(f"./output/{filename}", "w") as f:
        json.dump(docs_to_store, f, indent=4)

# Retrieving

In [5]:
query_gen_prompt = """You are an AI language model assistant specializing in query expansion. Your task is to generate {num_queries} diverse versions of the given user question. These variations will be used to retrieve relevant documents from a vector database, helping to overcome limitations of distance-based similarity search.

Original question: {query}

Instructions:
1. Create {num_queries} unique variations of the original question.
2. Ensure each variation maintains the core intent of the original question.
3. Use different phrasings, synonyms, or perspectives for each variation.
4. Consider potential context or implications not explicitly stated in the original question.
5. Avoid introducing new topics or drastically changing the meaning of the question.

Please provide your {num_queries} question variations, each on a new line:
"""

In [6]:
top_n = 50
num_queries = 5
question = "Who is Ali?"

## Existing Implementation

In [7]:
vector_retriever = index.as_retriever(similarity_top_k=top_n)

In [8]:
from llama_index.core.retrievers import QueryFusionRetriever

retriever = QueryFusionRetriever(
    [vector_retriever],
    similarity_top_k=top_n,
    num_queries=num_queries,  # set this to 1 to disable query generation
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
    query_gen_prompt=query_gen_prompt,
    llm=llm,
)

In [9]:
# it will genereate n - 1 queries since the original query is also included
nodes_with_scores = retriever.retrieve(question)

Generated queries:
1. What is the identity of Ali?
2. Can you provide information about Ali?
3. Who goes by the name Ali?
4. What can you tell me about Ali?


In [11]:
store_docs(nodes_with_scores, "baseline_docs.json")

## Cross Encoder

In [13]:
from llama_index.core.postprocessor import SentenceTransformerRerank

sentence_transformer_rerank = SentenceTransformerRerank(
    model='cross-encoder/ms-marco-MiniLM-L-6-v2',
    top_n=top_n
)

In [14]:
cross_encoder_retriever = index.as_retriever(similarity_top_k=top_n, postprocessor=sentence_transformer_rerank)

In [15]:
docs = cross_encoder_retriever.retrieve(question)

In [None]:
# a = index.as_retriever(similarity_top_k=top_n, postprocessors=[sentence_transformer_rerank])
# b = a.retrieve(question)

In [16]:
len(docs)

50

In [17]:
store_docs(docs, "cross_encoder_docs.json")

## Cross Encoder with Query Fusion

In [18]:
from llama_index.core.retrievers import QueryFusionRetriever

cross_encoder_retriever_with_query_fusion = QueryFusionRetriever(
    [cross_encoder_retriever],
    similarity_top_k=top_n,
    num_queries=num_queries,  # set this to 1 to disable query generation
    use_async=True,
    verbose=True,
    query_gen_prompt=query_gen_prompt,
    llm=llm,
)

In [19]:
docs2 = cross_encoder_retriever_with_query_fusion.retrieve(question)
len(docs2)

Generated queries:
1. What is the identity of Ali?
2. Can you provide information about Ali?
3. Who goes by the name Ali?
4. What can you tell me about Ali?


50

In [20]:
store_docs(docs2, "cross_encoder_retriever_with_query_fusion.json")