In [1]:
%pip install -q python-terrier pyterrier_pisa 
%pip install -q git+https://github.com/naver/splade.git git+https://github.com/cmacdonald/pyt_splade.git

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pyterrier as pt
import pandas as pd
import pyt_splade
import torch

In [2]:
ds_topics = pt.get_dataset('irds:msmarco-passage/trec-dl-2019/judged')
ds_text = pt.get_dataset('irds:msmarco-passage')
topics = ds_topics.get_topics()

Java started (triggered by _pt_tokeniser) and loaded: pyterrier.java, pyterrier.terrier.java [version=5.11 (build: craig.macdonald 2025-01-13 21:29), helper_version=0.0.8]


In [3]:
import os
# Set environment variable for offline mode
os.environ['TRANSFORMERS_OFFLINE'] = '1'

# Ensure the local model path exists
local_model_path="/home/killdollar/btp/Information-Retrieval/splade-model"
if not os.path.exists(local_model_path):
	raise FileNotFoundError(f"The path {local_model_path} does not exist. Please ensure the model is downloaded.")

# Initialize SPLADE with explicit parameters# Force CPU usage
# Add this code at the beginning of your notebook (after imports)
import warnings
import torch
from contextlib import contextmanager

# Suppress specific warnings about CUDA autocast deprecation
warnings.filterwarnings("ignore", message=".*torch.cuda.amp.autocast.*")
warnings.filterwarnings("ignore", message=".*User provided device_type of 'cuda', but CUDA is not available.*")

# Create a silent replacement for torch.cuda.amp.autocast
@contextmanager
def silent_autocast():
    """Silent replacement for torch.cuda.amp.autocast that does nothing"""
    yield

# Monkey patch torch.cuda.amp.autocast to suppress warnings
torch.cuda.amp.autocast = silent_autocast

# Continue using CPU settings
os.environ['CUDA_VISIBLE_DEVICES'] = ''  # Hide all GPUs
torch.set_num_threads(4)  # Limit CPU threads to
# Initialize SPLADE with explicit CPU parameters
splade = pyt_splade.Splade(
    model=local_model_path,
    device="cpu",  # Force CPU
    max_length=128  # Reduce token length to save memory
)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
indexer = pt.IterDictIndexer('./indices/msmarco-passage')
index = indexer.path
bm25 = pt.terrier.Retriever(index, wmodel="BM25", num_results=100)
splade_reranker = splade.scorer()

In [5]:
def get_text_for_docnos(docnos, dataset):
    """Efficiently get text for specific document IDs"""
    doc_texts = {}
    
    # Convert all docnos to the format we'll try
    docno_variants = {}
    for docno in docnos:
        str_docno = str(docno)
        docno_variants[str_docno] = docno
        # Also try integer version if it's numeric
        if str_docno.isdigit():
            docno_variants[int(str_docno)] = docno
    
    # Only iterate through corpus until we find all needed documents
    needed_ids = set(docno_variants.keys())
    found_count = 0
    
    corpus_iter = dataset.get_corpus_iter()
    for doc in corpus_iter:
        doc_id = doc['docno']
        
        # Check if this document is one we need
        if doc_id in needed_ids:
            original_docno = docno_variants[doc_id]
            doc_texts[original_docno] = doc['text']
            found_count += 1
            
            # Stop early if we found all documents we need
            if found_count >= len(docnos):
                break
    
    # Fill in missing documents
    for docno in docnos:
        if docno not in doc_texts:
            doc_texts[docno] = "Document text not available"
    
    return doc_texts

In [6]:
import gc

# Replace your current query processing loop with this optimized version
all_results = []

for idx, row in topics.iterrows():
    query_id = row['qid']
    query_text = row['query']
    
    print(f"Processing query {idx+1}/{len(topics)}: {query_text}")
    
    try:
        # Step 1: Get initial results from BM25
        initial_results = bm25.search(query_text)
        
        # Step 2: Process in smaller batches to avoid memory issues
        batch_size = 20  # Process 20 documents at a time
        all_reranked = []
        
        for start_idx in range(0, len(initial_results), batch_size):
            # Take a batch
            batch = initial_results.iloc[start_idx:start_idx+batch_size].copy()
            
            # Get text for this batch only
            docnos = batch['docno'].tolist()
            doc_texts = get_text_for_docnos(docnos, ds_text)
            batch['text'] = batch['docno'].map(doc_texts)
            batch['query'] = query_text
            
            # Apply SPLADE scoring to this batch
            reranked_batch = splade_reranker.transform(batch)
            all_reranked.append(reranked_batch)
            
            # Clean memory after each batch
            gc.collect()
            
        # Combine all batches
        if all_reranked:
            reranked_results = pd.concat(all_reranked)
            # Sort by SPLADE score and take top 10
            top_results = reranked_results.sort_values('score', ascending=False).head(10)
            print(f"SPLADE reranking successful")
        else:
            # Fallback to BM25 if all batches failed
            top_results = initial_results.head(10)
            top_results['text'] = top_results['docno'].map(lambda d: get_text_for_docnos([d], ds_text)[d])
            print(f"Falling back to BM25 results")
    
    except Exception as e:
        print(f"Error in SPLADE processing: {e}")
        # Fallback to BM25
        top_results = initial_results.head(10)
        top_results['text'] = top_results['docno'].map(lambda d: get_text_for_docnos([d], ds_text)[d])
        print(f"Falling back to BM25 results due to error")
    
    # Add query information to results
    top_results['qid'] = query_id
    top_results['query'] = query_text
    top_results['docid'] = top_results['docno']
    
    # Ensure rank is 0-based
    if 'rank' not in top_results.columns:
        top_results['rank'] = range(len(top_results))
    
    # Reorder columns to match MonoT5 CSV structure
    cols = ['qid', 'docid', 'docno', 'query', 'text', 'score', 'rank']
    top_results = top_results[cols]
    
    # Add to collection
    all_results.append(top_results)
    
    # Show progress for first query
    if idx == 0:
        print("\nSample results for first query:")
        print(top_results[['docno', 'score', 'text']].head(3))
    
    # Clean memory after each query
    gc.collect()

Processing query 1/43: do goldfish grow


msmarco-passage documents:  93%|█████████▎| 8182166/8841823 [00:58<00:04, 139749.35it/s]
msmarco-passage documents:  94%|█████████▎| 8273019/8841823 [00:26<00:01, 310200.82it/s]
msmarco-passage documents:  95%|█████████▍| 8393766/8841823 [00:26<00:01, 318283.12it/s]
msmarco-passage documents: 100%|█████████▉| 8803136/8841823 [00:27<00:00, 316701.78it/s]
msmarco-passage documents:  93%|█████████▎| 8266017/8841823 [00:26<00:01, 311359.52it/s]


SPLADE reranking successful

Sample results for first query:
      docno      score                                               text
26  8182159  27.926720  'Goldfish usually grow with their environment ...
10  1960257  27.720867  Goldfish Only Grow to the Size of Their Enclos...
24  2928707  27.577404  Goldfish Only Grow to the Size of Their Enclos...
Processing query 2/43: what is wifi vs bluetooth


msmarco-passage documents:  92%|█████████▏| 8160523/8841823 [00:27<00:02, 302238.00it/s]
msmarco-passage documents:  94%|█████████▍| 8301971/8841823 [00:25<00:01, 321145.43it/s]
msmarco-passage documents:  94%|█████████▍| 8301973/8841823 [00:24<00:01, 334876.15it/s]
msmarco-passage documents:  83%|████████▎ | 7369955/8841823 [00:22<00:04, 334793.13it/s]
msmarco-passage documents:  86%|████████▋ | 7637132/8841823 [00:22<00:03, 333207.59it/s]


SPLADE reranking successful
Processing query 3/43: why did the us volunterilay enter ww1


msmarco-passage documents:  69%|██████▉   | 6093907/8841823 [00:18<00:08, 329114.77it/s]
msmarco-passage documents:  99%|█████████▉| 8767916/8841823 [00:26<00:00, 331315.06it/s]
msmarco-passage documents:  93%|█████████▎| 8251701/8841823 [00:25<00:01, 327781.23it/s]
msmarco-passage documents:  92%|█████████▏| 8093226/8841823 [00:24<00:02, 330535.46it/s]
msmarco-passage documents:  92%|█████████▏| 8093228/8841823 [00:24<00:02, 333358.02it/s]


SPLADE reranking successful
Processing query 4/43: definition declaratory judgment


msmarco-passage documents:  97%|█████████▋| 8612910/8841823 [00:26<00:00, 327565.49it/s]
msmarco-passage documents:  97%|█████████▋| 8612904/8841823 [00:24<00:00, 346961.37it/s]
msmarco-passage documents:  97%|█████████▋| 8612905/8841823 [00:24<00:00, 348180.97it/s]
msmarco-passage documents:  88%|████████▊ | 7777480/8841823 [00:22<00:03, 347778.84it/s]
msmarco-passage documents:  90%|█████████ | 7997887/8841823 [00:22<00:02, 348365.08it/s]


SPLADE reranking successful
Processing query 5/43: right pelvic pain causes


msmarco-passage documents:  99%|█████████▉| 8737053/8841823 [00:25<00:00, 343556.85it/s]
msmarco-passage documents:  97%|█████████▋| 8594276/8841823 [00:24<00:00, 346385.01it/s]
msmarco-passage documents:  97%|█████████▋| 8594277/8841823 [00:24<00:00, 349075.02it/s]
msmarco-passage documents:  95%|█████████▍| 8376910/8841823 [00:24<00:01, 347992.83it/s]
msmarco-passage documents:  97%|█████████▋| 8620356/8841823 [00:24<00:00, 349833.48it/s]


SPLADE reranking successful
Processing query 6/43: what are the social determinants of health


msmarco-passage documents:  95%|█████████▌| 8441642/8841823 [00:26<00:01, 314806.91it/s]
msmarco-passage documents: 100%|█████████▉| 8833192/8841823 [00:26<00:00, 330359.95it/s]
msmarco-passage documents: 100%|█████████▉| 8833199/8841823 [00:26<00:00, 331647.14it/s]
msmarco-passage documents:  98%|█████████▊| 8690218/8841823 [00:25<00:00, 339198.79it/s]
msmarco-passage documents:  96%|█████████▌| 8444727/8841823 [00:24<00:01, 338608.24it/s]


SPLADE reranking successful
Processing query 7/43: does legionella pneumophila cause pneumonia


msmarco-passage documents:  99%|█████████▊| 8723467/8841823 [00:26<00:00, 328650.72it/s]
msmarco-passage documents:  99%|█████████▉| 8731623/8841823 [00:25<00:00, 340099.21it/s]
msmarco-passage documents:  98%|█████████▊| 8639627/8841823 [00:25<00:00, 341157.06it/s]
msmarco-passage documents:  99%|█████████▉| 8731624/8841823 [00:26<00:00, 329793.54it/s]
msmarco-passage documents:  99%|█████████▉| 8743646/8841823 [00:25<00:00, 337361.82it/s]


SPLADE reranking successful
Processing query 8/43: how is the weather in jamaica


msmarco-passage documents:  93%|█████████▎| 8255705/8841823 [00:24<00:01, 335716.67it/s]
msmarco-passage documents: 100%|█████████▉| 8839920/8841823 [00:25<00:00, 348116.79it/s]
msmarco-passage documents:  95%|█████████▌| 8415744/8841823 [00:23<00:01, 351345.78it/s]
msmarco-passage documents:  96%|█████████▌| 8484849/8841823 [00:24<00:01, 348786.94it/s]
msmarco-passage documents:  96%|█████████▌| 8484844/8841823 [00:24<00:01, 349401.57it/s]


SPLADE reranking successful
Processing query 9/43: types of dysarthria from cerebral palsy


msmarco-passage documents:  97%|█████████▋| 8617275/8841823 [00:24<00:00, 347365.67it/s]
msmarco-passage documents:  94%|█████████▍| 8306449/8841823 [00:23<00:01, 346610.22it/s]
msmarco-passage documents:  98%|█████████▊| 8644844/8841823 [00:24<00:00, 354056.88it/s]
msmarco-passage documents:  81%|████████  | 7121927/8841823 [00:20<00:04, 354050.19it/s]
msmarco-passage documents:  88%|████████▊ | 7748146/8841823 [00:24<00:03, 322681.08it/s]


SPLADE reranking successful
Processing query 10/43: who is robert gray


msmarco-passage documents: 100%|█████████▉| 8820474/8841823 [00:25<00:00, 340828.47it/s]
msmarco-passage documents:  99%|█████████▉| 8760866/8841823 [00:26<00:00, 335405.71it/s]
msmarco-passage documents:  99%|█████████▉| 8780803/8841823 [00:25<00:00, 346451.14it/s]
msmarco-passage documents:  99%|█████████▉| 8760871/8841823 [00:25<00:00, 338472.01it/s]
msmarco-passage documents:  99%|█████████▉| 8760865/8841823 [00:25<00:00, 344038.33it/s]


SPLADE reranking successful
Processing query 11/43: what types of food can you cook sous vide


msmarco-passage documents:  93%|█████████▎| 8178998/8841823 [00:24<00:01, 337017.12it/s]
msmarco-passage documents:  96%|█████████▌| 8455301/8841823 [00:24<00:01, 341227.16it/s]
msmarco-passage documents:  95%|█████████▌| 8402974/8841823 [00:24<00:01, 343249.23it/s]
msmarco-passage documents:  96%|█████████▌| 8455306/8841823 [00:24<00:01, 344132.76it/s]
msmarco-passage documents:  96%|█████████▌| 8455304/8841823 [00:24<00:01, 342217.50it/s]


SPLADE reranking successful
Processing query 12/43: how long is life cycle of flea


msmarco-passage documents:  75%|███████▌  | 6641238/8841823 [00:19<00:06, 341761.55it/s]
msmarco-passage documents: 100%|█████████▉| 8801660/8841823 [00:25<00:00, 341766.51it/s]
msmarco-passage documents:  98%|█████████▊| 8623794/8841823 [00:25<00:00, 337347.89it/s]
msmarco-passage documents:  91%|█████████ | 8008355/8841823 [00:23<00:02, 339280.41it/s]
msmarco-passage documents:  96%|█████████▋| 8531391/8841823 [00:24<00:00, 341613.20it/s]


SPLADE reranking successful
Processing query 13/43: what can contour plowing reduce


msmarco-passage documents:  91%|█████████ | 8052624/8841823 [00:23<00:02, 338282.07it/s]
msmarco-passage documents:  93%|█████████▎| 8235730/8841823 [00:23<00:01, 344976.63it/s]
msmarco-passage documents: 100%|█████████▉| 8832484/8841823 [00:26<00:00, 335339.88it/s]
msmarco-passage documents:  94%|█████████▎| 8288705/8841823 [00:25<00:01, 325438.01it/s]
msmarco-passage documents:  99%|█████████▉| 8747694/8841823 [00:26<00:00, 325798.07it/s]


SPLADE reranking successful
Processing query 14/43: when was the salvation army founded


msmarco-passage documents: 100%|█████████▉| 8811426/8841823 [00:26<00:00, 335087.08it/s]
msmarco-passage documents:  99%|█████████▉| 8785370/8841823 [00:26<00:00, 326690.11it/s]
msmarco-passage documents:  99%|█████████▉| 8785373/8841823 [00:26<00:00, 330602.64it/s]
msmarco-passage documents: 100%|█████████▉| 8811422/8841823 [00:27<00:00, 323318.26it/s]
msmarco-passage documents:  99%|█████████▉| 8785374/8841823 [00:27<00:00, 323261.36it/s]


SPLADE reranking successful
Processing query 15/43: what is a active margin


msmarco-passage documents:  96%|█████████▌| 8446504/8841823 [00:27<00:01, 307305.39it/s]
msmarco-passage documents:  96%|█████████▌| 8446506/8841823 [00:26<00:01, 324174.95it/s]
msmarco-passage documents:  99%|█████████▊| 8714087/8841823 [00:27<00:00, 322199.54it/s]
msmarco-passage documents:  98%|█████████▊| 8676838/8841823 [00:26<00:00, 322244.16it/s]
msmarco-passage documents:  99%|█████████▊| 8709223/8841823 [00:27<00:00, 317155.99it/s]


SPLADE reranking successful
Processing query 16/43: difference between rn and bsn


msmarco-passage documents:  94%|█████████▎| 8283531/8841823 [00:26<00:01, 318505.76it/s]
msmarco-passage documents:  94%|█████████▎| 8283532/8841823 [00:25<00:01, 322222.81it/s]
msmarco-passage documents:  99%|█████████▊| 8730430/8841823 [00:26<00:00, 333997.27it/s]
msmarco-passage documents:  91%|█████████▏| 8080460/8841823 [00:25<00:02, 314383.30it/s]
msmarco-passage documents:  97%|█████████▋| 8618876/8841823 [00:26<00:00, 324600.10it/s]


SPLADE reranking successful
Processing query 17/43: medicare s definition of mechanical ventilation


msmarco-passage documents:  97%|█████████▋| 8540468/8841823 [00:26<00:00, 327783.34it/s]
msmarco-passage documents:  96%|█████████▌| 8485132/8841823 [00:25<00:01, 331125.26it/s]
msmarco-passage documents:  96%|█████████▌| 8485136/8841823 [00:25<00:01, 331618.87it/s]
msmarco-passage documents:  99%|█████████▉| 8787234/8841823 [00:24<00:00, 353069.82it/s]
msmarco-passage documents:  91%|█████████ | 8025740/8841823 [00:24<00:02, 326536.83it/s]


SPLADE reranking successful
Processing query 18/43: how to find the midsegment of a trapezoid


msmarco-passage documents:  69%|██████▉   | 6108721/8841823 [00:18<00:08, 333427.67it/s]
msmarco-passage documents:  89%|████████▉ | 7857634/8841823 [00:23<00:02, 333276.66it/s]
msmarco-passage documents:  99%|█████████▉| 8748954/8841823 [00:24<00:00, 350067.02it/s]
msmarco-passage documents:  82%|████████▏ | 7277971/8841823 [00:20<00:04, 347594.31it/s]
msmarco-passage documents:  91%|█████████ | 8058875/8841823 [00:23<00:02, 342982.62it/s]


SPLADE reranking successful
Processing query 19/43: what is an aml surveillance analyst


msmarco-passage documents:  94%|█████████▍| 8345724/8841823 [00:24<00:01, 334711.71it/s]
msmarco-passage documents:  95%|█████████▍| 8388343/8841823 [00:23<00:01, 352181.56it/s]
msmarco-passage documents: 100%|█████████▉| 8828147/8841823 [00:30<00:00, 292920.56it/s]
msmarco-passage documents:  96%|█████████▌| 8495250/8841823 [00:26<00:01, 319898.33it/s]
msmarco-passage documents:  98%|█████████▊| 8706770/8841823 [00:26<00:00, 327184.26it/s]


SPLADE reranking successful
Processing query 20/43: what is the daily life of thai people


msmarco-passage documents:  92%|█████████▏| 8139258/8841823 [00:25<00:02, 315265.18it/s]
msmarco-passage documents:  98%|█████████▊| 8629184/8841823 [00:26<00:00, 330319.82it/s]
msmarco-passage documents:  99%|█████████▊| 8725077/8841823 [00:27<00:00, 320019.84it/s]
msmarco-passage documents:  97%|█████████▋| 8555308/8841823 [00:26<00:00, 322202.44it/s]
msmarco-passage documents:  95%|█████████▌| 8442177/8841823 [00:26<00:01, 317407.66it/s]


SPLADE reranking successful
Processing query 21/43: definition of a sigmet


msmarco-passage documents:  99%|█████████▊| 8710814/8841823 [00:27<00:00, 317933.25it/s]
msmarco-passage documents:  94%|█████████▍| 8305155/8841823 [00:26<00:01, 317998.75it/s]
msmarco-passage documents:  19%|█▉        | 1721919/8841823 [00:05<00:22, 319772.51it/s]
msmarco-passage documents:  55%|█████▍    | 4843649/8841823 [00:14<00:12, 324200.37it/s]
msmarco-passage documents:  91%|█████████▏| 8083366/8841823 [00:24<00:02, 329352.96it/s]


SPLADE reranking successful
Processing query 22/43: cost of interior concrete flooring


msmarco-passage documents:  96%|█████████▌| 8495099/8841823 [00:24<00:00, 348164.02it/s]
msmarco-passage documents:  96%|█████████▋| 8516149/8841823 [00:25<00:00, 336005.51it/s]
msmarco-passage documents:  92%|█████████▏| 8138536/8841823 [00:24<00:02, 331094.40it/s]
msmarco-passage documents:  97%|█████████▋| 8546679/8841823 [00:25<00:00, 331130.80it/s]
msmarco-passage documents:  97%|█████████▋| 8546159/8841823 [00:25<00:00, 332088.77it/s]


SPLADE reranking successful
Processing query 23/43: what is the most popular food in switzerland


msmarco-passage documents:  98%|█████████▊| 8683095/8841823 [00:26<00:00, 326146.39it/s]
msmarco-passage documents:  96%|█████████▌| 8469913/8841823 [00:25<00:01, 330168.59it/s]
msmarco-passage documents:  99%|█████████▉| 8742336/8841823 [00:26<00:00, 331195.27it/s]
msmarco-passage documents:  95%|█████████▌| 8400009/8841823 [00:25<00:01, 326845.72it/s]
msmarco-passage documents:  95%|█████████▌| 8400012/8841823 [00:24<00:01, 338732.96it/s]


SPLADE reranking successful
Processing query 24/43: how are some sharks warm blooded


msmarco-passage documents:  94%|█████████▎| 8273763/8841823 [00:25<00:01, 327961.49it/s]
msmarco-passage documents:  98%|█████████▊| 8622992/8841823 [00:26<00:00, 330525.29it/s]
msmarco-passage documents:  98%|█████████▊| 8622991/8841823 [00:26<00:00, 328565.22it/s]
msmarco-passage documents:  98%|█████████▊| 8622993/8841823 [00:26<00:00, 331197.96it/s]
msmarco-passage documents:  98%|█████████▊| 8622996/8841823 [00:26<00:00, 326907.69it/s]


SPLADE reranking successful
Processing query 25/43: what is durable medical equipment consist of


msmarco-passage documents:  99%|█████████▉| 8754404/8841823 [00:26<00:00, 327802.80it/s]
msmarco-passage documents:  99%|█████████▉| 8754400/8841823 [00:26<00:00, 325222.58it/s]
msmarco-passage documents:  99%|█████████▉| 8754398/8841823 [00:26<00:00, 329495.28it/s]
msmarco-passage documents:  99%|█████████▉| 8754397/8841823 [00:26<00:00, 325655.45it/s]
msmarco-passage documents:  99%|█████████▉| 8754403/8841823 [00:26<00:00, 333612.59it/s]


SPLADE reranking successful
Processing query 26/43: exons definition biology


msmarco-passage documents:  99%|█████████▉| 8794308/8841823 [00:26<00:00, 327318.11it/s]
msmarco-passage documents:  97%|█████████▋| 8577215/8841823 [00:25<00:00, 331138.86it/s]
msmarco-passage documents:  99%|█████████▉| 8794303/8841823 [00:26<00:00, 332078.47it/s]
msmarco-passage documents:  99%|█████████▉| 8767240/8841823 [00:26<00:00, 331743.75it/s]
msmarco-passage documents:  90%|█████████ | 8001295/8841823 [00:24<00:02, 326989.29it/s]


SPLADE reranking successful
Processing query 27/43: define visceral


msmarco-passage documents: 100%|█████████▉| 8802281/8841823 [00:26<00:00, 333026.47it/s]
msmarco-passage documents: 100%|█████████▉| 8811080/8841823 [00:28<00:00, 305551.97it/s]
msmarco-passage documents: 100%|█████████▉| 8799163/8841823 [00:27<00:00, 314955.77it/s]
msmarco-passage documents:  87%|████████▋ | 7685385/8841823 [00:24<00:03, 318609.89it/s]
msmarco-passage documents:  93%|█████████▎| 8214500/8841823 [00:25<00:01, 317274.28it/s]


SPLADE reranking successful
Processing query 28/43: tracheids are part of


msmarco-passage documents:  96%|█████████▌| 8492305/8841823 [00:27<00:01, 311113.57it/s]
msmarco-passage documents:  94%|█████████▍| 8355483/8841823 [00:26<00:01, 316069.46it/s]
msmarco-passage documents: 100%|█████████▉| 8802909/8841823 [00:27<00:00, 314731.13it/s]
msmarco-passage documents:  95%|█████████▌| 8406413/8841823 [00:26<00:01, 317015.91it/s]
msmarco-passage documents: 100%|█████████▉| 8828525/8841823 [00:27<00:00, 316538.88it/s]


SPLADE reranking successful
Processing query 29/43: rsa definition key


msmarco-passage documents:  96%|█████████▌| 8485139/8841823 [00:27<00:01, 309513.03it/s]
msmarco-passage documents:  76%|███████▋  | 6742119/8841823 [00:21<00:06, 317001.96it/s]
msmarco-passage documents:  73%|███████▎  | 6412650/8841823 [00:18<00:07, 340567.74it/s]
msmarco-passage documents:  99%|█████████▊| 8709994/8841823 [00:26<00:00, 329728.87it/s]
msmarco-passage documents:  96%|█████████▌| 8485141/8841823 [00:25<00:01, 334907.09it/s]


SPLADE reranking successful
Processing query 30/43: who formed the commonwealth of independent states


msmarco-passage documents:  93%|█████████▎| 8220089/8841823 [00:26<00:02, 310673.44it/s]
msmarco-passage documents:  94%|█████████▍| 8293805/8841823 [00:26<00:01, 314954.05it/s]
msmarco-passage documents:  87%|████████▋ | 7720187/8841823 [00:24<00:03, 319519.57it/s]
msmarco-passage documents:  99%|█████████▉| 8792133/8841823 [00:27<00:00, 319437.23it/s]
msmarco-passage documents:  95%|█████████▌| 8435739/8841823 [00:26<00:01, 316404.32it/s]


SPLADE reranking successful
Processing query 31/43: causes of left ventricular hypertrophy


msmarco-passage documents:  98%|█████████▊| 8683181/8841823 [00:28<00:00, 307545.03it/s]
msmarco-passage documents:  98%|█████████▊| 8664069/8841823 [00:27<00:00, 315843.59it/s]
msmarco-passage documents:  98%|█████████▊| 8683178/8841823 [00:27<00:00, 315530.69it/s]
msmarco-passage documents:  99%|█████████▉| 8738510/8841823 [00:26<00:00, 323981.40it/s]
msmarco-passage documents:  98%|█████████▊| 8683185/8841823 [00:27<00:00, 313799.60it/s]


SPLADE reranking successful
Processing query 32/43: lps laws definition


msmarco-passage documents:  97%|█████████▋| 8536118/8841823 [00:27<00:00, 308875.86it/s]
msmarco-passage documents:  99%|█████████▉| 8769481/8841823 [00:28<00:00, 311671.61it/s]
msmarco-passage documents:  98%|█████████▊| 8687371/8841823 [00:27<00:00, 317622.28it/s]
msmarco-passage documents:  99%|█████████▉| 8769489/8841823 [00:27<00:00, 323706.19it/s]
msmarco-passage documents:  97%|█████████▋| 8585198/8841823 [00:27<00:00, 311410.65it/s]


SPLADE reranking successful
Processing query 33/43: what are the three percenters


msmarco-passage documents:  86%|████████▌ | 7573692/8841823 [00:24<00:04, 311105.00it/s]
msmarco-passage documents:  88%|████████▊ | 7824992/8841823 [00:24<00:03, 319547.14it/s]
msmarco-passage documents:  82%|████████▏ | 7229501/8841823 [00:21<00:04, 343313.09it/s]
msmarco-passage documents:  99%|█████████▉| 8749913/8841823 [00:25<00:00, 340786.92it/s]
msmarco-passage documents:  75%|███████▌  | 6658794/8841823 [00:21<00:06, 314500.71it/s]


SPLADE reranking successful
Processing query 34/43: causes of military suicide


msmarco-passage documents: 100%|█████████▉| 8819116/8841823 [00:28<00:00, 308639.67it/s]
msmarco-passage documents:  96%|█████████▌| 8496921/8841823 [00:25<00:01, 329905.50it/s]
msmarco-passage documents:  96%|█████████▌| 8496922/8841823 [00:26<00:01, 325862.15it/s]
msmarco-passage documents:  96%|█████████▌| 8496336/8841823 [00:26<00:01, 319450.22it/s]
msmarco-passage documents:  94%|█████████▎| 8276051/8841823 [00:25<00:01, 320797.38it/s]


SPLADE reranking successful
Processing query 35/43: what is theraderm used for


msmarco-passage documents:  98%|█████████▊| 8651776/8841823 [00:27<00:00, 312444.15it/s]


SPLADE reranking successful
Processing query 36/43: what is famvir prescribed for


msmarco-passage documents:  92%|█████████▏| 8117095/8841823 [00:24<00:02, 337284.14it/s]
msmarco-passage documents:  94%|█████████▍| 8327369/8841823 [00:26<00:01, 315484.21it/s]
msmarco-passage documents:  98%|█████████▊| 8665820/8841823 [00:26<00:00, 321049.83it/s]
msmarco-passage documents:  98%|█████████▊| 8640143/8841823 [00:27<00:00, 317245.05it/s]
msmarco-passage documents:  83%|████████▎ | 7319987/8841823 [00:23<00:04, 313973.57it/s]


SPLADE reranking successful
Processing query 37/43: anthropological definition of environment


msmarco-passage documents:  98%|█████████▊| 8635981/8841823 [00:26<00:00, 330531.62it/s]
msmarco-passage documents:  97%|█████████▋| 8612877/8841823 [00:25<00:00, 331389.35it/s]
msmarco-passage documents: 100%|█████████▉| 8798990/8841823 [00:26<00:00, 328569.97it/s]
msmarco-passage documents:  99%|█████████▉| 8734157/8841823 [00:26<00:00, 328686.03it/s]
msmarco-passage documents:  99%|█████████▉| 8754859/8841823 [00:27<00:00, 312837.24it/s]


SPLADE reranking successful
Processing query 38/43: axon terminals or synaptic knob definition


msmarco-passage documents:  98%|█████████▊| 8641107/8841823 [00:27<00:00, 309621.80it/s]
msmarco-passage documents:  95%|█████████▌| 8418679/8841823 [00:26<00:01, 322145.78it/s]
msmarco-passage documents:  98%|█████████▊| 8641104/8841823 [00:27<00:00, 317091.25it/s]
msmarco-passage documents:  97%|█████████▋| 8619129/8841823 [00:27<00:00, 315420.03it/s]
msmarco-passage documents:  93%|█████████▎| 8216702/8841823 [00:26<00:01, 315719.28it/s]


SPLADE reranking successful
Processing query 39/43: is cdg airport in main paris


msmarco-passage documents:  95%|█████████▌| 8433858/8841823 [00:27<00:01, 311063.62it/s]
msmarco-passage documents:  86%|████████▌ | 7589975/8841823 [00:24<00:04, 312643.64it/s]
msmarco-passage documents:  95%|█████████▌| 8433854/8841823 [00:26<00:01, 320020.69it/s]
msmarco-passage documents:  97%|█████████▋| 8610271/8841823 [00:27<00:00, 316448.92it/s]
msmarco-passage documents:  87%|████████▋ | 7728831/8841823 [00:24<00:03, 313657.95it/s]


SPLADE reranking successful
Processing query 40/43: example of monotonic function


msmarco-passage documents:  99%|█████████▉| 8757181/8841823 [00:27<00:00, 317393.27it/s]
msmarco-passage documents:  99%|█████████▉| 8757182/8841823 [00:26<00:00, 327312.46it/s]
msmarco-passage documents:  97%|█████████▋| 8598792/8841823 [00:27<00:00, 315305.13it/s]
msmarco-passage documents:  99%|█████████▉| 8757184/8841823 [00:27<00:00, 314879.70it/s]
msmarco-passage documents:  99%|█████████▉| 8757183/8841823 [00:27<00:00, 313033.54it/s]


SPLADE reranking successful
Processing query 41/43: what is physical description of spruce


msmarco-passage documents:  92%|█████████▏| 8162984/8841823 [00:26<00:02, 309545.35it/s]
msmarco-passage documents:  92%|█████████▏| 8133335/8841823 [00:25<00:02, 316305.72it/s]
msmarco-passage documents:  89%|████████▉ | 7901000/8841823 [00:24<00:02, 317226.03it/s]
msmarco-passage documents:  96%|█████████▌| 8503450/8841823 [00:26<00:01, 316122.71it/s]
msmarco-passage documents:  88%|████████▊ | 7766971/8841823 [00:24<00:03, 317701.52it/s]


SPLADE reranking successful
Processing query 42/43: hydrogen is a liquid below what temperature


msmarco-passage documents:  89%|████████▉ | 7911557/8841823 [00:25<00:02, 312251.48it/s]
msmarco-passage documents:  98%|█████████▊| 8633911/8841823 [00:26<00:00, 321207.37it/s]
msmarco-passage documents:  97%|█████████▋| 8588219/8841823 [00:27<00:00, 317201.87it/s]
msmarco-passage documents:  99%|█████████▊| 8710041/8841823 [00:28<00:00, 301553.63it/s]
msmarco-passage documents:  99%|█████████▊| 8712732/8841823 [00:27<00:00, 312371.07it/s]


SPLADE reranking successful
Processing query 43/43: difference between a mcdouble and a double cheeseburger


msmarco-passage documents:  95%|█████████▌| 8434626/8841823 [00:27<00:01, 307982.60it/s]
msmarco-passage documents:  99%|█████████▉| 8795494/8841823 [00:28<00:00, 312328.80it/s]
msmarco-passage documents:  99%|█████████▉| 8795488/8841823 [00:28<00:00, 311381.58it/s]
msmarco-passage documents:  99%|█████████▉| 8795495/8841823 [00:28<00:00, 313478.79it/s]
msmarco-passage documents:  99%|█████████▉| 8795491/8841823 [00:27<00:00, 323218.53it/s]


SPLADE reranking successful


In [7]:
# Cell to save the SPLADE results to CSV
import pandas as pd

# Combine all results into a single DataFrame
combined_results = pd.concat(all_results) if all_results else pd.DataFrame()

# Save results to CSV
combined_results.to_csv('splade_results.csv', index=False)

print(f"\nRetrieved {len(combined_results)} documents across {len(topics)} queries")
print(f"Results saved to splade_results.csv")

# Show summary of results
print("\nTop documents for first few queries:")
print(combined_results.groupby('qid').head(1)[['qid', 'docno', 'score']].head(5))

# Print sample of CSV structure
print("\nSample of saved CSV structure:")
print(combined_results.head(3))


Retrieved 425 documents across 43 queries
Results saved to splade_results.csv

Top documents for first few queries:
        qid    docno      score
26   156493  8182159  27.926720
5   1110199  8160519  23.507820
8   1063750  4337526  21.525167
13   130510  8612903  24.929392
40   489204  1778458  19.598661

Sample of saved CSV structure:
       qid    docid    docno             query  \
26  156493  8182159  8182159  do goldfish grow   
10  156493  1960257  1960257  do goldfish grow   
24  156493  2928707  2928707  do goldfish grow   

                                                 text      score  rank  
26  'Goldfish usually grow with their environment ...  27.926720     0  
10  Goldfish Only Grow to the Size of Their Enclos...  27.720867     0  
24  Goldfish Only Grow to the Size of Their Enclos...  27.577404     1  
