In [8]:
%pip install --no-cache-dir torch

Collecting torch
  Downloading torch-2.8.0-cp310-cp310-manylinux_2_28_x86_64.whl (888.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m888.0/888.0 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:21[0m
Collecting nvidia-cufft-cu12==11.3.3.83
  Downloading nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (193.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.1/193.1 MB[0m [31m999.5 kB/s[0m eta [36m0:00:00[0m00:01[0m00:07[0m
[?25hCollecting nvidia-cusolver-cu12==11.7.3.90
  Downloading nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl (267.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m267.5/267.5 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:03[0m
[?25hCollecting nvidia-cusparse-cu12==12.5.8.93
  Downloading nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (288.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [9]:
%pip install -U sentence-transformers

Collecting sentence-transformers
  Using cached sentence_transformers-5.1.0-py3-none-any.whl (483 kB)
Collecting transformers<5.0.0,>=4.41.0
  Using cached transformers-4.55.2-py3-none-any.whl (11.3 MB)
Collecting scikit-learn
  Using cached scikit_learn-1.7.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)
Collecting huggingface-hub>=0.20.0
  Using cached huggingface_hub-0.34.4-py3-none-any.whl (561 kB)
Collecting tokenizers<0.22,>=0.21
  Using cached tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
Installing collected packages: scikit-learn, huggingface-hub, tokenizers, transformers, sentence-transformers
Successfully installed huggingface-hub-0.34.4 scikit-learn-1.7.1 sentence-transformers-5.1.0 tokenizers-0.21.4 transformers-4.55.2
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import pandas as pd
import pyterrier as pt
import numpy as np
from sentence_transformers import CrossEncoder

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
indexer = pt.IterDictIndexer('./indices/msmarco-passage')
index = indexer.path
bm25 = pt.terrier.Retriever(index, wmodel="BM25", num_results=100)

Java started (triggered by TerrierIndexer.__init__) and loaded: pyterrier.java, pyterrier.terrier.java [version=5.11 (build: craig.macdonald 2025-01-13 21:29), helper_version=0.0.8]


In [3]:
ds_topics = pt.get_dataset('irds:msmarco-passage/trec-dl-2019/judged')
ds_text = pt.get_dataset('irds:msmarco-passage')
topics = ds_topics.get_topics()


In [4]:
def get_text_for_docnos(docnos, dataset):
    """Efficiently get text for specific document IDs"""
    doc_texts = {}
    
    # Convert all docnos to the format we'll try
    docno_variants = {}
    for docno in docnos:
        str_docno = str(docno)
        docno_variants[str_docno] = docno
        # Also try integer version if it's numeric
        if str_docno.isdigit():
            docno_variants[int(str_docno)] = docno
    
    # Only iterate through corpus until we find all needed documents
    needed_ids = set(docno_variants.keys())
    found_count = 0
    
    corpus_iter = dataset.get_corpus_iter()
    for doc in corpus_iter:
        doc_id = doc['docno']
        
        # Check if this document is one we need
        if doc_id in needed_ids:
            original_docno = docno_variants[doc_id]
            doc_texts[original_docno] = doc['text']
            found_count += 1
            
            # Stop early if we found all documents we need
            if found_count >= len(docnos):
                break
    
    # Fill in missing documents
    for docno in docnos:
        if docno not in doc_texts:
            doc_texts[docno] = "Document text not available"
    
    return doc_texts


In [5]:
import torch
try:
    print("Trying to load ColBERT model from HuggingFace...")
    from transformers import AutoModelForSequenceClassification, AutoTokenizer
    
    # Try to load ColBERT model
    model_name = "colbert-ir/colbertv2.0"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    
    def _colbert_apply(df):
        """Apply ColBERT scoring to query-document pairs"""
        pairs = list(zip(df['query'].values, df['text'].values))
        scores = []
        
        for query, doc in pairs:
            inputs = tokenizer(query, doc, return_tensors="pt", truncation=True, max_length=512)
            with torch.no_grad():
                outputs = model(**inputs)
                
            # Fix: Handle multi-element logits correctly
            # For binary classification models, use the second logit (positive class)
            if outputs.logits.size(1) > 1:
                # Use the second element (positive class score)
                score = outputs.logits[0, 1].item()
            else:
                # Use the single score if only one dimension
                score = outputs.logits.item()
                
            scores.append(score)
        
        return np.array(scores)
    
    # Create the PyTerrier transformer for reranking
    reranker = pt.apply.doc_score(_colbert_apply, batch_size=32)
    model_name_for_output = "colbert"
    print("Successfully loaded ColBERT model")
    
except Exception as e:
    print(f"Failed to load ColBERT: {e}")
    print("Falling back to Sentence Transformers MiniLM model...")
    # Rest of your fallback code remains the same

Trying to load ColBERT model from HuggingFace...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at colbert-ir/colbertv2.0 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Successfully loaded ColBERT model


In [6]:
def add_text_efficiently(df):
    """Add text to dataframe for only the documents in this batch"""
    df = df.copy()
    docnos = df['docno'].tolist()
    doc_texts = get_text_for_docnos(docnos, ds_text)
    df['text'] = df['docno'].map(doc_texts)
    return df

In [7]:
if reranker is not None:
    # BM25 -> Add Text -> Reranking
    retrieval_pipeline = bm25 >> pt.apply.generic(add_text_efficiently) >> reranker
else:
    # BM25 -> Add Text (no reranking)
    retrieval_pipeline = bm25 >> pt.apply.generic(add_text_efficiently)


In [8]:
all_results = []

for idx, row in topics.iterrows():
    query_id = row['qid']
    query_text = row['query']
    
    print(f"Processing query {idx+1}/{len(topics)}: {query_text}")
    
    # Get results from the pipeline
    results = retrieval_pipeline.search(query_text)
    
    # Take top 10 results
    top_results = results.head(10)
    
    # Add query information to results
    top_results['qid'] = query_id
    top_results['query'] = query_text
    
    # Add to collection
    all_results.append(top_results)
    
    # Show sample results for first query
    if idx == 0:
        print("\nSample results for first query:")
        print(top_results[['docno', 'score', 'text']].head(3))

# Step 8: Combine all results
combined_results = pd.concat(all_results) if all_results else pd.DataFrame()

# Step 9: Save results to CSV
output_file = f'{model_name_for_output}_results.csv'
combined_results.to_csv(output_file, index=False)

print(f"\nRetrieved {len(combined_results)} documents across {len(topics)} queries")
print(f"Results saved to {output_file}")

# Step 10: Show summary of results
print("\nTop documents for first few queries:")
print(combined_results.groupby('qid').head(1)[['qid', 'docno', 'score']].head(5))

Processing query 1/43: do goldfish grow


msmarco-passage documents: 100%|█████████▉| 8803136/8841823 [00:30<00:00, 285437.34it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text



Sample results for first query:
      docno     score                                               text
83  3024301  0.189813  Family Zoo: Growing up in Tasmania, her family...
98  3180514  0.079862  Koi Carp Worlds Largest Goldfish Bowl Goldfish...
71  2612491  0.070616  Goldfish growth rates can be massive for their...
Processing query 2/43: what is wifi vs bluetooth


msmarco-passage documents:  94%|█████████▍| 8301973/8841823 [00:26<00:01, 308702.46it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 3/43: why did the us volunterilay enter ww1


msmarco-passage documents:  99%|█████████▉| 8767916/8841823 [00:30<00:00, 290616.84it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 4/43: definition declaratory judgment


msmarco-passage documents:  97%|█████████▋| 8612910/8841823 [00:27<00:00, 316478.93it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 5/43: right pelvic pain causes


msmarco-passage documents:  99%|█████████▉| 8737053/8841823 [00:27<00:00, 314027.57it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 6/43: what are the social determinants of health


msmarco-passage documents: 100%|█████████▉| 8833199/8841823 [00:28<00:00, 307551.55it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 7/43: does legionella pneumophila cause pneumonia


msmarco-passage documents:  99%|█████████▉| 8743646/8841823 [00:27<00:00, 313626.19it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 8/43: how is the weather in jamaica


msmarco-passage documents: 100%|█████████▉| 8839920/8841823 [00:27<00:00, 319116.72it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 9/43: types of dysarthria from cerebral palsy


msmarco-passage documents:  98%|█████████▊| 8644844/8841823 [00:27<00:00, 313103.43it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 10/43: who is robert gray


msmarco-passage documents: 100%|█████████▉| 8820474/8841823 [00:27<00:00, 315708.68it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 11/43: what types of food can you cook sous vide


msmarco-passage documents:  96%|█████████▌| 8455306/8841823 [00:25<00:01, 326454.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 12/43: how long is life cycle of flea


msmarco-passage documents: 100%|█████████▉| 8801660/8841823 [00:27<00:00, 323711.69it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 13/43: what can contour plowing reduce


msmarco-passage documents: 100%|█████████▉| 8832484/8841823 [00:27<00:00, 320462.87it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 14/43: when was the salvation army founded


msmarco-passage documents: 100%|█████████▉| 8811426/8841823 [00:27<00:00, 322606.55it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 15/43: what is a active margin


msmarco-passage documents:  99%|█████████▊| 8714087/8841823 [00:26<00:00, 322897.18it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 16/43: difference between rn and bsn


msmarco-passage documents:  99%|█████████▊| 8730430/8841823 [00:27<00:00, 322038.26it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 17/43: medicare s definition of mechanical ventilation


msmarco-passage documents:  99%|█████████▉| 8787234/8841823 [00:27<00:00, 322428.20it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 18/43: how to find the midsegment of a trapezoid


msmarco-passage documents:  99%|█████████▉| 8748954/8841823 [00:27<00:00, 322321.06it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 19/43: what is an aml surveillance analyst


msmarco-passage documents: 100%|█████████▉| 8828147/8841823 [00:27<00:00, 321987.60it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 20/43: what is the daily life of thai people


msmarco-passage documents:  99%|█████████▊| 8725077/8841823 [00:27<00:00, 321420.83it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 21/43: definition of a sigmet


msmarco-passage documents:  99%|█████████▊| 8710814/8841823 [00:29<00:00, 296577.70it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 22/43: cost of interior concrete flooring


msmarco-passage documents:  97%|█████████▋| 8546679/8841823 [00:27<00:00, 315636.89it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 23/43: what is the most popular food in switzerland


msmarco-passage documents:  99%|█████████▉| 8742336/8841823 [00:27<00:00, 319413.81it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 24/43: how are some sharks warm blooded


msmarco-passage documents:  98%|█████████▊| 8622996/8841823 [00:27<00:00, 315282.50it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 25/43: what is durable medical equipment consist of


msmarco-passage documents:  99%|█████████▉| 8754404/8841823 [00:28<00:00, 312352.06it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 26/43: exons definition biology


msmarco-passage documents:  99%|█████████▉| 8794308/8841823 [00:27<00:00, 317299.40it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 27/43: define visceral


msmarco-passage documents: 100%|█████████▉| 8811080/8841823 [00:27<00:00, 318723.83it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 28/43: tracheids are part of


msmarco-passage documents: 100%|█████████▉| 8828525/8841823 [00:28<00:00, 314273.07it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 29/43: rsa definition key


msmarco-passage documents:  99%|█████████▊| 8709994/8841823 [00:27<00:00, 316887.66it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 30/43: who formed the commonwealth of independent states


msmarco-passage documents:  99%|█████████▉| 8792133/8841823 [00:28<00:00, 311744.52it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 31/43: causes of left ventricular hypertrophy


msmarco-passage documents:  99%|█████████▉| 8738510/8841823 [00:28<00:00, 311097.05it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 32/43: lps laws definition


msmarco-passage documents:  99%|█████████▉| 8769489/8841823 [00:27<00:00, 318405.15it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 33/43: what are the three percenters


msmarco-passage documents:  99%|█████████▉| 8749913/8841823 [00:27<00:00, 316304.54it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 34/43: causes of military suicide


msmarco-passage documents: 100%|█████████▉| 8819116/8841823 [00:28<00:00, 311947.35it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 35/43: what is theraderm used for


msmarco-passage documents:  98%|█████████▊| 8651776/8841823 [00:27<00:00, 314664.69it/s]


Processing query 36/43: what is famvir prescribed for


msmarco-passage documents:  98%|█████████▊| 8665820/8841823 [00:27<00:00, 313735.66it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 37/43: anthropological definition of environment


msmarco-passage documents: 100%|█████████▉| 8798990/8841823 [00:28<00:00, 305022.88it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 38/43: axon terminals or synaptic knob definition


msmarco-passage documents:  98%|█████████▊| 8641107/8841823 [00:38<00:00, 226115.88it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 39/43: is cdg airport in main paris


msmarco-passage documents:  97%|█████████▋| 8610271/8841823 [00:31<00:00, 272497.98it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 40/43: example of monotonic function


msmarco-passage documents:  99%|█████████▉| 8757184/8841823 [00:28<00:00, 302505.54it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 41/43: what is physical description of spruce


msmarco-passage documents:  96%|█████████▌| 8503450/8841823 [00:28<00:01, 302855.01it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 42/43: hydrogen is a liquid below what temperature


msmarco-passage documents:  99%|█████████▊| 8712732/8841823 [00:29<00:00, 298622.94it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text


Processing query 43/43: difference between a mcdouble and a double cheeseburger


msmarco-passage documents:  99%|█████████▉| 8795495/8841823 [00:29<00:00, 301062.21it/s]



Retrieved 425 documents across 43 queries
Results saved to colbert_results.csv

Top documents for first few queries:
        qid    docno     score
83   156493  3024301  0.189813
38  1110199   735472  0.121643
80  1063750  4047881  0.050373
41   130510  5685710  0.104719
52   489204  6461840  0.131342


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['qid'] = query_id
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_results['query'] = query_text
