In [13]:
# Imports
from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run
from tira.rest_api_client import Client
import pyterrier as pt

In [14]:
# Create a REST client to the TIRA platform for retrieving the pre-indexed data.
ensure_pyterrier_is_loaded()
tira = Client()

In [15]:
# The dataset: the union of the IR Anthology and the ACL Anthology
# This line creates an IRDSDataset object and registers it under the name provided as an argument.
pt_dataset = pt.get_dataset('irds:ir-lab-sose-2024/ir-acl-anthology-20240504-training')
qrels = pt_dataset.get_qrels()


In [16]:
# Some baselines that were executed in TIRA
bm25_baseline = tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 (tira-ir-starter-pyterrier)', pt_dataset)
sparse_cross_encoder = tira.pt.from_submission('ir-benchmarks/fschlatt/sparse-cross-encoder-4-512', pt_dataset)
rank_zephyr = tira.pt.from_submission('workshop-on-open-web-search/fschlatt/rank-zephyr', pt_dataset)

In [17]:
run_file_path = '../runs/run1.txt'

try:

    bm25_improved = pt.io.read_results('../runs/run.txt')
    print("Run file loaded successfully.")

except Exception as e:
    print(f"Error loading run file: {e}")

print(bm25_improved.head())

dataset_topics = pt_dataset.get_topics()
run_topic_ids = set(bm25_improved['qid'].unique())
dataset_topic_ids = set(dataset_topics['qid'].unique())

missing_topics = run_topic_ids - dataset_topic_ids
if missing_topics:
    print(f"Warning: The following topic IDs are in the run file but not in the dataset: {missing_topics}")
    
# Ensure document IDs in the run file match those in the qrels
qrels = pt_dataset.get_qrels()
run_doc_ids = set(bm25_improved['docno'].unique())
qrels_doc_ids = set(qrels['docno'].unique())

missing_docs = run_doc_ids - qrels_doc_ids
if missing_docs:
    print(f"Warning: The following document IDs are in the run file but not in the qrels: {missing_docs}")    


bm25_improved = pt.io.read_results('../runs/run.txt')

# Define query field to use
query_field = 'text'  # Adjust based on your specific query field

# Evaluate models
eval_metrics = ["ndcg_cut.10", "recip_rank", "recall_100"]
evaluation = pt.Experiment(
    [bm25_improved, bm25_baseline, sparse_cross_encoder, rank_zephyr],
    pt_dataset.get_topics(),
    pt_dataset.get_qrels(),
    ["ndcg_cut.10", "recip_rank", "recall_100"],
    names=["BM 25 (Improved)", "BM 25 (Baseline)", "Sparse Cross Encoder", "RankZephyr"]
)

print(evaluation)

Run file loaded successfully.
    qid      docno  rank      score          name
0  8293     8293_1     1  37.018181  doc_T5_Query
1  8293  3990173_5     2  33.864796  doc_T5_Query
2  8293   557905_2     3  33.511957  doc_T5_Query
3  8293  1687026_0     4  33.511957  doc_T5_Query
4  8293  2307305_9     5  30.003736  doc_T5_Query
There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.
There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.
                   name  ndcg_cut.10  recip_rank  recall_100
0      BM 25 (Improved)     0.000000    0.000000    0.000000
1      BM 25 (Baseline)     0.374041    0.579877    0.601333
2  Sparse Cross Encoder     0.366460    0.612980    0.601333
3            RankZephyr     0.347070    0.568413    0.601333
