## Dense + Sparse: Hybrid Retrieval
uses results from dense and sparse retrieval to improve performance

## Load Dataset

In [1]:
from dataset_loader import DatasetLoader

# Load cranfield dataset for testing functions, use MS MARCO, HotpotQA, and potentially Climate-FEVER for real evaluation
loader = DatasetLoader("cranfield")
docs, queries, qrels = loader.get_all()
loader.print_info()

# # load multiple datasets for evaluation
# nameset = ["beir/msmarco/dev", "bier/hotpotqa/dev", "bier/climate-fever/dev"]
#
# # Dictionary to hold datasets
# datasets = {}
#
# for name in nameset:
#     loader = DatasetLoader(name)
#     docs, queries, qrels = loader.get_all()
#     datasets[name] = {
#         "docs": docs,
#         "queries": queries,
#         "qrels": qrels
#     }
#     loader.print_info()


DATASET: cranfield
DOCS (1400): ('1', 'experimental investigation of the aerodynamics of a\nwing in a slipstream .\n  an experimental study of a wing in a propeller slipstream was\nmade in order to determine the spanwise distribution of the lift\nincrease due to slipstream at different angles of attack of the wing\nand at different free stream to slipstream velocity ratios .  the\nresults were intended in part as an evaluation basis for different\ntheoretical treatments of this problem .\n  the comparative span loading curves, together with\nsupporting evidence, showed that a substantial part of the lift increment\nproduced by the slipstream was due to a /destalling/ or\nboundary-layer-control effect .  the integrated remaining lift\nincrement, after subtracting this destalling lift, was found to agree\nwell with a potential flow theory .\n  an empirical evaluation of the destalling effects was made for\nthe specific configuration of the experiment .') 

QUERIES (225): ('1', 'what simi

In [2]:
# extract document and query IDs + texts
doc_ids, doc_texts = list(docs.keys()), list(docs.values())
query_ids, query_texts = list(queries.keys()), list(queries.values())

## Load Results from Dense and Sparse Retrieval
Results should follow format:
```
{
    "query_id": {
        "doc_id": {
            "score": 0.9
        }
    }
}
```

In [3]:
from ranx import Run

# Load dense and sparse results from JSON
dense_run = Run.from_file("dense_run.json")
sparse_run = Run.from_file("bm25_run.json")  # use bm25 over tfidf

## Apply Multiple Fusion Strategies + Evaluate

In [16]:
from ranx import fuse, evaluate
import time

# do rrf fusion
rff_start_time = time.time()
rff_run = fuse([sparse_run, dense_run], method="rrf")
rff_end_time = time.time()
rff_retrieval_time = (rff_end_time - rff_start_time) / len(rff_run.run)
print(f"RRF Retrieval time per query: {rff_retrieval_time:.6f} seconds")

# do wsum fusion
wsum_start_time = time.time()
wsum_run = fuse([sparse_run, dense_run], method="wsum", params={"weights": [0.5, 0.5]})
wsum_end_time = time.time()
wsum_retrieval_time = (wsum_end_time - wsum_start_time) / len(wsum_run.run)
print(f"WSUM 50-50 Retrieval time per query: {wsum_retrieval_time:.6f} seconds")

# do 40-60 fusion
wsum2_start_time = time.time()
wsum2_run = fuse([sparse_run, dense_run], method="wsum", params={"weights": [0.4, 0.6]})
wsum2_end_time = time.time()
wsum2_retrieval_time = (wsum2_end_time - wsum2_start_time) / len(wsum2_run.run)
print(f"WSUM2 40-60 Retrieval time per query: {wsum2_retrieval_time:.6f} seconds")

# do 60-40 fusion
wsum3_start_time = time.time()
wsum3_run = fuse([sparse_run, dense_run], method="wsum", params={"weights": [0.6, 0.4]})
wsum3_end_time = time.time()
wsum3_retrieval_time = (wsum3_end_time - wsum3_start_time) / len(wsum3_run.run)
print(f"WSUM3 60-40 Retrieval time per query: {wsum3_retrieval_time:.6f} seconds")


RRF Retrieval time per query: 0.000034 seconds
WSUM 50-50 Retrieval time per query: 0.000024 seconds
WSUM2 40-60 Retrieval time per query: 0.000020 seconds
WSUM3 60-40 Retrieval time per query: 0.000021 seconds


In [17]:
# calculate MRR for each fusion strategy
mrr_rff = evaluate(qrels, rff_run, metrics=["mrr"])
mrr_wsum = evaluate(qrels, wsum_run, metrics=["mrr"])
mrr_wsum2 = evaluate(qrels, wsum2_run, metrics=["mrr"])
mrr_wsum3 = evaluate(qrels, wsum3_run, metrics=["mrr"])

print(f"RRF MRR: {mrr_rff:.4f}")
print(f"WSUM 50/50 MRR: {mrr_wsum:.4f}")
print(f"WSUM2 40/60 MRR: {mrr_wsum2:.4f}")
print(f"WSUM3 60/40 MRR: {mrr_wsum3:.4f}")

RRF MRR: 0.5162
WSUM 50/50 MRR: 0.5058
WSUM2 40/60 MRR: 0.4915
WSUM3 60/40 MRR: 0.5043
