In [3]:
from retrievers.bm25_retriever import BM25Retriever
from retrievers.colbert_retriever import ColBERTRetriever
from retrievers.dense_retriever import DPRRetriever
from retrievers.hybrid_retriever import HybridRetriever

documents = ["The sky is blue.", "The sky is pink", "The sun is bright.", "The grass is green."]

# Initialize the Retrievers with the documents
bm25_retriever = BM25Retriever(documents)
dense_retriever = DPRRetriever(documents)
colbert_retriever = ColBERTRetriever(documents)
hybrid_retriever = HybridRetriever(documents)


Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the

In [4]:
# Initialize the Retriever with the documents
query = "What is the color of the sky?"

# Retrieve the top 2 most relevant documents based on the query
top_k = 2
bm25_retrieved_docs = bm25_retriever.retrieve(query, top_k=top_k)
dpr_retrieved_docs = dense_retriever.retrieve(query, top_k=top_k)
colbert_retrieved_docs = colbert_retriever.retrieve(query, top_k=top_k)
hybrid_retrieved_docs = hybrid_retriever.retrieve(query, top_k=top_k)

# Output the retrieved documents"
print("Top-k retrieved documents (BM25):", bm25_retrieved_docs)
print("Top-k retrieved documents (DPR):", dpr_retrieved_docs)
print("Top-k retrieved documents (ColBERT):", colbert_retrieved_docs)
print("Top-k retrieved documents (Hybrid):", hybrid_retrieved_docs)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Top-k retrieved documents (BM25): ['The sky is pink', 'The sky is blue.']
Top-k retrieved documents (DPR): ['The sky is blue.', 'The sky is pink']
Top-k retrieved documents (ColBERT): ['The sky is blue.', 'The sky is pink']
Top-k retrieved documents (Hybrid): ['The sky is blue.', 'The sky is pink']


In [None]:
# Apply rerankers to the retrieved documents
from rerankers.cross_encoder_reranker import CrossEncoderReranker
from rerankers.embedding_reranker import EmbeddingReranker
from rerankers.none_reranker import NoReranker

cross_encoder_reranker = CrossEncoderReranker()
embedding_reranker = EmbeddingReranker()
no_reranker = NoReranker()

print("\n--- Applying Rerankers ---")

# Apply rerankers to BM25 results
print("\nBM25 Results:")
print("  + No Reranker:", no_reranker.rerank(query, bm25_retrieved_docs))
print("  + Embedding Reranker:", embedding_reranker.rerank(query, bm25_retrieved_docs))
print("  + Cross-Encoder Reranker:", cross_encoder_reranker.rerank(query, bm25_retrieved_docs))

# Apply rerankers to DPR results
print("\nDPR Results:")
print("  + No Reranker:", no_reranker.rerank(query, dpr_retrieved_docs))
print("  + Embedding Reranker:", embedding_reranker.rerank(query, dpr_retrieved_docs))
print("  + Cross-Encoder Reranker:", cross_encoder_reranker.rerank(query, dpr_retrieved_docs))

# Apply rerankers to ColBERT results
print("\nColBERT Results:")
print("  + No Reranker:", no_reranker.rerank(query, colbert_retrieved_docs))
print("  + Embedding Reranker:", embedding_reranker.rerank(query, colbert_retrieved_docs))
print("  + Cross-Encoder Reranker:", cross_encoder_reranker.rerank(query, colbert_retrieved_docs))

# Apply rerankers to Hybrid results
print("\nHybrid Results:")
print("  + No Reranker:", no_reranker.rerank(query, hybrid_retrieved_docs))
print("  + Embedding Reranker:", embedding_reranker.rerank(query, hybrid_retrieved_docs))
print("  + Cross-Encoder Reranker:", cross_encoder_reranker.rerank(query, hybrid_retrieved_docs))