In [1]:
import os
from google.colab import drive
drive.mount('/content/drive')
os.chdir("/content/drive/MyDrive/rag-benchmarks")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# import os
# os.chdir('..')
print(os.getcwd())
print(os.listdir())

/content/drive/MyDrive/rag-benchmarks
['notebooks', 'uv.lock', 'pyproject.toml', 'rag', 'scripts', 'data', '.venv', 'requirements.txt', 'outputs', 'nfcorpus_results.csv']


In [3]:
!pip install -r requirements.txt



In [4]:
import torch

def get_best_device():
    if torch.cuda.is_available():
        print("Found CUDA GPU.")
    elif torch.backends.mps.is_available():
        print("Found Apple Silicon GPU.")
    else:
        print("No GPU found. Using 'cpu'.")

print(os.getcwd())
get_best_device()


/content/drive/MyDrive/rag-benchmarks
Found CUDA GPU.


In [5]:
# fetched from run_all_combinations.py with scope limited to scifact dataset
import subprocess
import sys
import json
import pandas as pd
import os

# Define retrieval and rerank combos
COMBINATIONS = [
    # Sparse retrieval
    {"retrieval": "tfidf", "rerank": None, "desc": "TFIDF + no reranking (BASELINE)"},
    {"retrieval": "tfidf", "rerank": "bi_encoder", "desc": "TFIDF + bi encoder rerank"},
    {"retrieval": "tfidf", "rerank": "cross_encoder", "desc": "TFIDF + cross encoder rerank"},
    {"retrieval": "bm25", "rerank": None, "desc": "BM25 + No reranking"},
    {"retrieval": "bm25", "rerank": "bi_encoder", "desc": "BM25 + bi encoder rerank"},
    {"retrieval": "bm25", "rerank": "cross_encoder", "desc": "BM25 + cross encoder rerank"},
    # Dense retrieval
    {"retrieval": "dense", "rerank": None, "desc": "Dense + no rerank"},
    {"retrieval": "dense", "rerank": "bi_encoder", "desc": "Dense + bi encoder"},
    {"retrieval": "dense", "rerank": "cross_encoder", "desc": "Dense + cross encoder"},
    # Hybrid retrieval (RRF)
    {"retrieval": "hybrid", "rerank": None, "desc": "Hybrid + no rerank"},
    {"retrieval": "hybrid", "rerank": "bi_encoder", "desc": "Hybrid + bi encoder"},
    {"retrieval": "hybrid", "rerank": "cross_encoder", "desc": "Hybrid + cross encoder"},
]

dataset_to_run = "scifact"
output_dir = 'outputs'
os.makedirs(output_dir, exist_ok=True)
all_results = []

print(f"=== Running experiments for dataset: {dataset_to_run} ===")
for combo in COMBINATIONS:
    print(f"--- {combo['desc']} ---")
    try:
        reranker_name = combo["rerank"].split('_')[0] if combo["rerank"] else "none"
        output_filename = f"{dataset_to_run}_{combo['retrieval']}_{reranker_name}.json"
        output_path = os.path.join(output_dir, output_filename)

        command = [
            "python",
            "scripts/run_experiment.py",
            "--dataset",
            dataset_to_run,
            "--retriever",
            combo["retrieval"],
            "--reranker",
            reranker_name,
            "--output",
            output_path,
            "--top_k",
            "100",
            "--device",
            "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu",
        ]

        # Use ! to run the command in the notebook
        # This is equivalent to subprocess.run but shows output live
        !{' '.join(command)}

        with open(output_path, 'r') as f:
            results = json.load(f)

        results['dataset'] = dataset_to_run
        results['retriever'] = combo['retrieval']
        results['reranker'] = combo['rerank'] if combo['rerank'] else "none"
        all_results.append(results)

    except Exception as e:
        print(f"An unexpected error occurred: {e}")

if all_results:
    results_df = pd.DataFrame(all_results)
    results_csv_path = f'{dataset_to_run}_results.csv'
    results_df.to_csv(results_csv_path, index=False)
    print(f"All experiments complete. Results saved to {results_csv_path}")
    display(results_df)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
STEP[bi-reranker]: Top-100 doc_ids=['19675911', '13481731', '20945963', '33533307', '40817021', '31889025', '24443043', '26341063', '409280', '25301182', '7640792', '25355575', '12206390', '5698494', '27466734', '29253460', '6085365', '25451374', '12561083', '37029185', '24408040', '4445629', '8509018', '4506414', '17914395', '16760369', '1456068', '13230773', '23983289', '75636923', '35724562', '13726379', '43629704', '12810152', '24163770', '44624045', '10209731', '2138767', '1287809', '74137632', '6070278', '34139429', '35495268', '12794099', '19185192', '24586989', '5993745', '11880289', '43483151', '8780599', '7419612', '39368721', '12102963', '16422880', '20526907', '22420524', '27240667', '52175065', '32534305', '54490092', '37065914', '40164383', '6993046', '26121646', '6270720', '2774906', '5939172', '12779444', '2787558', '18997216', '27166444', '13831558', '15512462', '11246427', '27460509', '9822397', '7641593

Unnamed: 0,ndcg,map,recall,precision,dataset,retriever,reranker
0,"{'NDCG@1': 0.45333, 'NDCG@3': 0.52834, 'NDCG@5...","{'MAP@1': 0.43011, 'MAP@3': 0.50222, 'MAP@5': ...","{'Recall@1': 0.43011, 'Recall@3': 0.57939, 'Re...","{'P@1': 0.45333, 'P@3': 0.21, 'P@5': 0.13867, ...",scifact,tfidf,none
1,"{'NDCG@1': 0.53667, 'NDCG@3': 0.6136, 'NDCG@5'...","{'MAP@1': 0.51789, 'MAP@3': 0.58712, 'MAP@5': ...","{'Recall@1': 0.51789, 'Recall@3': 0.66339, 'Re...","{'P@1': 0.53667, 'P@3': 0.24111, 'P@5': 0.1626...",scifact,tfidf,bi_encoder
2,"{'NDCG@1': 0.58, 'NDCG@3': 0.64523, 'NDCG@5': ...","{'MAP@1': 0.55361, 'MAP@3': 0.61846, 'MAP@5': ...","{'Recall@1': 0.55361, 'Recall@3': 0.69328, 'Re...","{'P@1': 0.58, 'P@3': 0.25, 'P@5': 0.16333, 'P@...",scifact,tfidf,cross_encoder
3,"{'NDCG@1': 0.43667, 'NDCG@3': 0.51543, 'NDCG@5...","{'MAP@1': 0.42333, 'MAP@3': 0.49134, 'MAP@5': ...","{'Recall@1': 0.42333, 'Recall@3': 0.56944, 'Re...","{'P@1': 0.43667, 'P@3': 0.20333, 'P@5': 0.1346...",scifact,bm25,none
4,"{'NDCG@1': 0.51, 'NDCG@3': 0.57967, 'NDCG@5': ...","{'MAP@1': 0.49122, 'MAP@3': 0.55495, 'MAP@5': ...","{'Recall@1': 0.49122, 'Recall@3': 0.62689, 'Re...","{'P@1': 0.51, 'P@3': 0.22556, 'P@5': 0.15533, ...",scifact,bm25,bi_encoder
5,"{'NDCG@1': 0.55667, 'NDCG@3': 0.61903, 'NDCG@5...","{'MAP@1': 0.53361, 'MAP@3': 0.59402, 'MAP@5': ...","{'Recall@1': 0.53361, 'Recall@3': 0.66328, 'Re...","{'P@1': 0.55667, 'P@3': 0.23889, 'P@5': 0.1533...",scifact,bm25,cross_encoder
6,"{'NDCG@1': 0.50333, 'NDCG@3': 0.59673, 'NDCG@5...","{'MAP@1': 0.48233, 'MAP@3': 0.56564, 'MAP@5': ...","{'Recall@1': 0.48233, 'Recall@3': 0.66033, 'Re...","{'P@1': 0.50333, 'P@3': 0.23778, 'P@5': 0.164,...",scifact,dense,none
7,"{'NDCG@1': 0.53333, 'NDCG@3': 0.60348, 'NDCG@5...","{'MAP@1': 0.51456, 'MAP@3': 0.57768, 'MAP@5': ...","{'Recall@1': 0.51456, 'Recall@3': 0.65006, 'Re...","{'P@1': 0.53333, 'P@3': 0.23556, 'P@5': 0.1586...",scifact,dense,bi_encoder
8,"{'NDCG@1': 0.57667, 'NDCG@3': 0.64161, 'NDCG@5...","{'MAP@1': 0.55028, 'MAP@3': 0.61507, 'MAP@5': ...","{'Recall@1': 0.55028, 'Recall@3': 0.68728, 'Re...","{'P@1': 0.57667, 'P@3': 0.24889, 'P@5': 0.164,...",scifact,dense,cross_encoder
9,"{'NDCG@1': 0.52667, 'NDCG@3': 0.59089, 'NDCG@5...","{'MAP@1': 0.50483, 'MAP@3': 0.56821, 'MAP@5': ...","{'Recall@1': 0.50483, 'Recall@3': 0.63344, 'Re...","{'P@1': 0.52667, 'P@3': 0.22778, 'P@5': 0.1493...",scifact,hybrid,none
