In [None]:
from beir import util, LoggingHandler
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
import logging, json
import os
os.environ["TQDM_AUTO_NO_TQDM"] = "True"



In [None]:
logging.basicConfig(format='%(asctime)s - %(message)s',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])


In [None]:
dataset = "scifact"  
data_path = util.download_and_unzip(f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip", "datasets")
corpus, queries, qrels = GenericDataLoader(data_folder=data_path).load(split="test")

In [None]:
# 3) Initialize retrievers
#   a) SPLADE sparse
from retrieval.splade import SpladeRetriever
sparse_model = SpladeRetriever()
#   b) Contriever dense
from retrieval.dense import DenseRetriever
dense_model  = DenseRetriever()

In [None]:
# 4) Build an evaluator
evaluator = EvaluateRetrieval(
    metrics=["ndcg@10", "map", "recall@100"],
    batch_size=16,
    keep_documents=True
)

In [None]:
# 5) Run retrieval under different settings
logging.info("Running SPLADE only...")
results_sparse = evaluator.retrieve(corpus, queries, sparse_model.splade_embed)

logging.info("Running Contriever only...")
results_dense  = evaluator.retrieve(corpus, queries, dense_model.dense_embed)

In [None]:
# 6) Hybrid fusion function
def hybrid_search_func(query, corpus_embeddings, passages):
    # Precompute both embeddings inside Splade & Dense retrievers
    # Here we assume models return dict {pid: score}
    sparse_scores = sparse_model.get_scores(query, passages)
    dense_scores  = dense_model.get_scores(query, passages)
    alpha = 0.5
    return {pid: alpha * sparse_scores[pid] + (1-alpha) * dense_scores[pid]
            for pid in sparse_scores}

logging.info("Running Hybrid (α=0.5)...")
results_hybrid = evaluator.retrieve(corpus, queries,
                                    lambda q: hybrid_search_func(q, None, list(corpus.keys())))


In [None]:
# 7) Evaluate
logging.info("Evaluating SPLADE...")
metrics_sparse = evaluator.evaluate(qrels, results_sparse)

logging.info("Evaluating Contriever...")
metrics_dense = evaluator.evaluate(qrels, results_dense)

logging.info("Evaluating Hybrid...")
metrics_hybrid = evaluator.evaluate(qrels, results_hybrid)

In [None]:

# 8) Print summary
print("Sparse Metrics:", json.dumps(metrics_sparse, indent=2))
print("Dense  Metrics:", json.dumps(metrics_dense, indent=2))
print("Hybrid Metrics:", json.dumps(metrics_hybrid, indent=2))