In [1]:
import os
from google.colab import drive
drive.mount('/content/drive')
os.chdir("/content/drive/MyDrive/rag-benchmarks")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# import os
# os.chdir('..')
print(os.getcwd())
print(os.listdir())

/content/drive/MyDrive/rag-benchmarks
['notebooks', 'uv.lock', 'pyproject.toml', 'rag', 'scripts', 'data', '.venv', 'requirements.txt', 'outputs']


In [3]:
!pip install -r requirements.txt



In [4]:
import torch

def get_best_device():
    if torch.cuda.is_available():
        print("Found CUDA GPU.")
    elif torch.backends.mps.is_available():
        print("Found Apple Silicon GPU.")
    else:
        print("No GPU found. Using 'cpu'.")

print(os.getcwd())
get_best_device()


/content/drive/MyDrive/rag-benchmarks
Found CUDA GPU.


In [5]:
# fetched from run_all_combinations.py with scope limited to nfcorpus dataset
import subprocess
import sys
import json
import pandas as pd
import os

# Define retrieval and rerank combos
COMBINATIONS = [
    # Sparse retrieval
    {"retrieval": "tfidf", "rerank": None, "desc": "TFIDF + no reranking (BASELINE)"},
    {"retrieval": "tfidf", "rerank": "bi_encoder", "desc": "TFIDF + bi encoder rerank"},
    {"retrieval": "tfidf", "rerank": "cross_encoder", "desc": "TFIDF + cross encoder rerank"},
    {"retrieval": "bm25", "rerank": None, "desc": "BM25 + No reranking"},
    {"retrieval": "bm25", "rerank": "bi_encoder", "desc": "BM25 + bi encoder rerank"},
    {"retrieval": "bm25", "rerank": "cross_encoder", "desc": "BM25 + cross encoder rerank"},
    # Dense retrieval
    {"retrieval": "dense", "rerank": None, "desc": "Dense + no rerank"},
    {"retrieval": "dense", "rerank": "bi_encoder", "desc": "Dense + bi encoder"},
    {"retrieval": "dense", "rerank": "cross_encoder", "desc": "Dense + cross encoder"},
    # Hybrid retrieval (RRF)
    {"retrieval": "hybrid", "rerank": None, "desc": "Hybrid + no rerank"},
    {"retrieval": "hybrid", "rerank": "bi_encoder", "desc": "Hybrid + bi encoder"},
    {"retrieval": "hybrid", "rerank": "cross_encoder", "desc": "Hybrid + cross encoder"},
]

dataset_to_run = "nfcorpus"
output_dir = 'outputs'
os.makedirs(output_dir, exist_ok=True)
all_results = []

print(f"=== Running experiments for dataset: {dataset_to_run} ===")
for combo in COMBINATIONS:
    print(f"--- {combo['desc']} ---")
    try:
        reranker_name = combo["rerank"].split('_')[0] if combo["rerank"] else "none"
        output_filename = f"{dataset_to_run}_{combo['retrieval']}_{reranker_name}.json"
        output_path = os.path.join(output_dir, output_filename)

        command = [
            "python",
            "scripts/run_experiment.py",
            "--dataset",
            dataset_to_run,
            "--retriever",
            combo["retrieval"],
            "--reranker",
            reranker_name,
            "--output",
            output_path,
            "--top_k",
            "100",
            "--device",
            "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu",
        ]

        # Use ! to run the command in the notebook
        # This is equivalent to subprocess.run but shows output live
        !{' '.join(command)}

        with open(output_path, 'r') as f:
            results = json.load(f)

        results['dataset'] = dataset_to_run
        results['retriever'] = combo['retrieval']
        results['reranker'] = combo['rerank'] if combo['rerank'] else "none"
        all_results.append(results)

    except Exception as e:
        print(f"An unexpected error occurred: {e}")

if all_results:
    results_df = pd.DataFrame(all_results)
    results_csv_path = f'{dataset_to_run}_results.csv'
    results_df.to_csv(results_csv_path, index=False)
    print(f"All experiments complete. Results saved to {results_csv_path}")
    display(results_df)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
STEP[bi-reranker]: Similarity stats -> min=-0.1264, max=0.5304, mean=0.2627
STEP[bi-reranker]: Top-100 doc_ids=['MED-4818', 'MED-3307', 'MED-1979', 'MED-1491', 'MED-5110', 'MED-4144', 'MED-4746', 'MED-2991', 'MED-1859', 'MED-1610', 'MED-4491', 'MED-4145', 'MED-3288', 'MED-3494', 'MED-3319', 'MED-4976', 'MED-4814', 'MED-4493', 'MED-1802', 'MED-732', 'MED-4758', 'MED-5235', 'MED-4053', 'MED-4812', 'MED-3000', 'MED-2340', 'MED-1977', 'MED-5326', 'MED-4357', 'MED-2671', 'MED-847', 'MED-2367', 'MED-1801', 'MED-2352', 'MED-4138', 'MED-2341', 'MED-1983', 'MED-4057', 'MED-4593', 'MED-1601', 'MED-3171', 'MED-4797', 'MED-4363', 'MED-2678', 'MED-4134', 'MED-2215', 'MED-3653', 'MED-2182', 'MED-5188', 'MED-4141', 'MED-4844', 'MED-4139', 'MED-4482', 'MED-708', 'MED-4436', 'MED-4484', 'MED-4129', 'MED-3175', 'MED-5191', 'MED-5197', 'MED-4485', 'MED-1407', 'MED-1540', 'MED-3891', 'MED-4474', 'MED-4803', 'MED-335', 'MED-4594', 'MED-4963',

Unnamed: 0,ndcg,map,recall,precision,dataset,retriever,reranker
0,"{'NDCG@1': 0.37152, 'NDCG@3': 0.34085, 'NDCG@5...","{'MAP@1': 0.05412, 'MAP@3': 0.08613, 'MAP@5': ...","{'Recall@1': 0.05412, 'Recall@3': 0.09459, 'Re...","{'P@1': 0.387, 'P@3': 0.31992, 'P@5': 0.27059,...",nfcorpus,tfidf,none
1,"{'NDCG@1': 0.41486, 'NDCG@3': 0.37843, 'NDCG@5...","{'MAP@1': 0.05301, 'MAP@3': 0.09002, 'MAP@5': ...","{'Recall@1': 0.05301, 'Recall@3': 0.10111, 'Re...","{'P@1': 0.43034, 'P@3': 0.36017, 'P@5': 0.3046...",nfcorpus,tfidf,bi_encoder
2,"{'NDCG@1': 0.47833, 'NDCG@3': 0.41476, 'NDCG@5...","{'MAP@1': 0.06642, 'MAP@3': 0.10646, 'MAP@5': ...","{'Recall@1': 0.06642, 'Recall@3': 0.11335, 'Re...","{'P@1': 0.49536, 'P@3': 0.3839, 'P@5': 0.31827...",nfcorpus,tfidf,cross_encoder
3,"{'NDCG@1': 0.37616, 'NDCG@3': 0.32527, 'NDCG@5...","{'MAP@1': 0.0443, 'MAP@3': 0.07567, 'MAP@5': 0...","{'Recall@1': 0.0443, 'Recall@3': 0.08293, 'Rec...","{'P@1': 0.39009, 'P@3': 0.30444, 'P@5': 0.2551...",nfcorpus,bm25,none
4,"{'NDCG@1': 0.40712, 'NDCG@3': 0.36272, 'NDCG@5...","{'MAP@1': 0.04795, 'MAP@3': 0.07997, 'MAP@5': ...","{'Recall@1': 0.04795, 'Recall@3': 0.09001, 'Re...","{'P@1': 0.42415, 'P@3': 0.34675, 'P@5': 0.2904...",nfcorpus,bm25,bi_encoder
5,"{'NDCG@1': 0.45666, 'NDCG@3': 0.39084, 'NDCG@5...","{'MAP@1': 0.05565, 'MAP@3': 0.0929, 'MAP@5': 0...","{'Recall@1': 0.05565, 'Recall@3': 0.09944, 'Re...","{'P@1': 0.47368, 'P@3': 0.36636, 'P@5': 0.3027...",nfcorpus,bm25,cross_encoder
6,"{'NDCG@1': 0.39474, 'NDCG@3': 0.36355, 'NDCG@5...","{'MAP@1': 0.04323, 'MAP@3': 0.07722, 'MAP@5': ...","{'Recall@1': 0.04323, 'Recall@3': 0.09053, 'Re...","{'P@1': 0.41486, 'P@3': 0.34881, 'P@5': 0.3027...",nfcorpus,dense,none
7,"{'NDCG@1': 0.40402, 'NDCG@3': 0.37892, 'NDCG@5...","{'MAP@1': 0.04958, 'MAP@3': 0.08488, 'MAP@5': ...","{'Recall@1': 0.04958, 'Recall@3': 0.09822, 'Re...","{'P@1': 0.41796, 'P@3': 0.36429, 'P@5': 0.3232...",nfcorpus,dense,bi_encoder
8,"{'NDCG@1': 0.47678, 'NDCG@3': 0.41676, 'NDCG@5...","{'MAP@1': 0.06173, 'MAP@3': 0.10222, 'MAP@5': ...","{'Recall@1': 0.06173, 'Recall@3': 0.10882, 'Re...","{'P@1': 0.49536, 'P@3': 0.38803, 'P@5': 0.3176...",nfcorpus,dense,cross_encoder
9,"{'NDCG@1': 0.41486, 'NDCG@3': 0.36, 'NDCG@5': ...","{'MAP@1': 0.04683, 'MAP@3': 0.08203, 'MAP@5': ...","{'Recall@1': 0.04683, 'Recall@3': 0.09332, 'Re...","{'P@1': 0.43653, 'P@3': 0.34056, 'P@5': 0.2854...",nfcorpus,hybrid,none
