In [1]:
!pip install python-terrier
!pip install transformers

import torch
import pyterrier as pt
import requests
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from pathlib import Path



In [2]:
pt.init()

Java started and loaded: pyterrier.java, pyterrier.terrier.java [version=5.11 (build: craig.macdonald 2025-01-13 21:29), helper_version=0.0.8]
java is now started automatically with default settings. To force initialisation early, run:
pt.java.init() # optional, forces java initialisation
  pt.init()


In [5]:
from pathlib import Path

# Load BM25 Index using PyTerrier
DATASET_NAME = "irds:beir/webis-touche2020"  # Options: "irds:trec-robust-2004", "irds:beir/webis-touche2020", "irds:beir/dbpedia-entity"

# Load Dataset
dataset = pt.datasets.get_dataset(DATASET_NAME)
queries = dataset.get_topics()
qrels = dataset.get_qrels()

There are multiple query fields available: ('text', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.


beir/webis-touche2020 documents:   0%|          | 0/382545 [00:00<?, ?it/s]

  bm25 = pt.BatchRetrieve(index, wmodel="BM25")


In [None]:
index = pt.index.IterDictIndexer(
    str(Path.cwd()),
    meta={
        "docno": 39,
        "text": 131072,
    },
    type=pt.index.IndexingType.MEMORY,
).index(dataset.get_corpus_iter())
bm25 = pt.BatchRetrieve(index, wmodel="BM25")

In [6]:
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [7]:

def generate_reformulated_query(query, instructions):
    reformulated_queries = []
    for instruction in instructions:
        prompt = f"{instruction}: {query}"
        inputs = tokenizer(prompt, return_tensors="pt")
        outputs = model.generate(**inputs, max_length=64, num_return_sequences=1)
        reformulated_query = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # *** Add this line to remove any potentially problematic colons: ***
        reformulated_query = reformulated_query.replace("?", "").replace('"', '').replace("'", "")
        print(reformulated_query)

        reformulated_queries.append(reformulated_query)
    return reformulated_queries

In [8]:
# Step 2: Retrieve Documents with BM25
def retrieve_documents(query, k=10):
    """Retrieve top-K documents using BM25 in PyTerrier."""
    results = bm25.search(query, k)
    return results[["docno", "score"]].values.tolist()

In [25]:
import itertools
import pandas as pd

def evaluate_combinations(paraphrases, k=10):
    """Evaluate retrieval performance for all combinations of paraphrases."""

    results_list = []

    # Generate all subsets of paraphrases
    all_paraphrase_subsets = [subset for L in range(1, len(paraphrases) + 1) for subset in itertools.combinations(paraphrases, L)]

    for subset in all_paraphrase_subsets[:1]:
        subset_name = " | ".join(subset)  # For display
        print(f"\n🔹 Evaluating Combination: {subset_name}")

        for _, row in queries.iterrows():
            query = row["text"]
            qid = row["qid"]

            # Generate reformulated queries using the current subset of paraphrases
            reformed_queries = generate_reformulated_query(query, subset)

            results = []
            for rq in reformed_queries:
                results.extend(retrieve_documents(rq, k))

            # Aggregate scores
            aggregated_results = {}
            for docid, score in results:
                aggregated_results[docid] = aggregated_results.get(docid, 0) + score

            sorted_results = sorted(aggregated_results.items(), key=lambda x: x[1], reverse=True)[:k]

            # Store results for evaluation
            for rank, (docid, score) in enumerate(sorted_results):
                results_list.append({"qid": qid, "docno": docid, "rank": rank + 1, "score": score, "subset": subset_name})

    # Convert results to DataFrame
    results_df = pd.DataFrame(results_list)

    # Compute evaluation metrics for each subset
    print("\n📊 Evaluation Results Per Combination:")
    for subset in all_paraphrase_subsets:
        subset_name = " | ".join(subset)
        subset_df = results_df[results_df["subset"] == subset_name]
        eval_metrics = pt.Utils.evaluate(subset_df, qrels, metrics=["map", "recip_rank", "ndcg_cut_10"])
        print(f"\n🔹 Results for '{subset_name}': {eval_metrics}")

In [23]:
# Manually input your paraphrased instructions here
paraphrases = [
    "Improve the search effectiveness by suggesting expansion terms for the query",
    "Recommend expansion terms for the query to improve search results",
    "Improve the search effectiveness by suggesting useful expansion terms for the query",
    "Maximize search utility by suggesting relevant expansion phrases for the query",
    "Enhance search efficiency by proposing valuable terms to expand the query",
    "Elevate search performance by recommending relevant expansion phrases for the query",
    "Boost the search accuracy by providing helpful expansion terms to enrich the query",
    "Increase the search efficacy by offering beneficial expansion keywords for the query",
    "Optimize search results by suggesting meaningful expansion terms to enhance the query",
    "Enhance search outcomes by recommending beneficial expansion terms to supplement the query"
]

In [26]:
evaluate_combinations(paraphrases)


🔹 Evaluating Combination: Improve the search effectiveness by suggesting expansion terms for the query
Should teachers get tenure
Is vaping with e-cigarettes safe
Should insider trading be prohibited
Should corporal punishment be used in schools
Should social security be privatized
Is a college education worth it
Should felons who have completed their sentence be allowed to vote
Should abortion be legal
Should students have to wear school uniforms
Should any vaccines be required for children
Should performance-enhancing drugs be accepted in sports
Should birth control pills be available over the counter
Can alternative energy replace fossil fuels
Is sexual orientation determined at birth
Should animals be used for scientific or commercial testing
Should prescription drugs be advertised directly to consumers
Should recreational marijuana be legal
Should churches remain tax-exempt
Should gay marriage be legal
Is drinking milk healthy for humans
Is human activity primarily responsible fo

  eval_metrics = pt.Utils.evaluate(subset_df, qrels, metrics=["map", "recip_rank", "ndcg_cut_10", "P_10"])


ValueError: No results for evaluation