# Step 1: Import Libraries

In [2]:
# Install necessary libraries
!pip install python-terrier

[0m

In [3]:
# Import necessary libraries
from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run
from tira.rest_api_client import Client

import nltk

import pandas as pd
import pyterrier as pt
from pyterrier.measures import *
import math

In [4]:
# Create a REST client to the TIRA platform for retrieving the pre-indexed data.
ensure_pyterrier_is_loaded()
tira = Client()

PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [5]:
# Initialize PyTerrier
if not pt.started():
    pt.init()

In [6]:
# The dataset: the union of the IR Anthology and the ACL Anthology
# This line creates an IRDSDataset object and registers it under the name provided as an argument.
dataset = pt.get_dataset('irds:ir-lab-sose-2024/ir-acl-anthology-20240504-training')

# A (pre-built) PyTerrier index loaded from TIRA
index = tira.pt.index('ir-lab-sose-2024/tira-ir-starter/Index (tira-ir-starter-pyterrier)', dataset)

# Step 2: Data Loading

In [7]:
# Display some example data
topics = dataset.get_topics()
qrels = dataset.get_qrels()


There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.


In [8]:
print(topics.head())
print(qrels.head())

  qid                                      text  \
0   1  retrieval system improving effectiveness   
1   2  machine learning language identification   
2   3             social media detect self-harm   
3   4             stemming for arabic languages   
4   5            audio based animal recognition   

                                      title  \
0  retrieval system improving effectiveness   
1  machine learning language identification   
2             social media detect self-harm   
3             stemming for arabic languages   
4            audio based animal recognition   

                                      query  \
0  retrieval system improving effectiveness   
1  machine learning language identification   
2             social media detect self harm   
3             stemming for arabic languages   
4            audio based animal recognition   

                                         description  \
0  What papers focus on improving the effectivene...   
1  What papers 

In [9]:
# Step 3: Data Preprocessing
# Here we would include steps to clean and preprocess the text, such as removing stop words, stemming, etc.
# For this example, we'll assume the data is already preprocessed.

# Step 4: Information Retrieval Models

In [10]:
# BM25
bm25 = pt.BatchRetrieve(index, wmodel="BM25")

In [11]:
# TF-IDF
tfidf = pt.BatchRetrieve(index, wmodel="TF_IDF")

# Step 5: Evaluation

In [12]:
# Evaluate BM25
bm25_results = bm25.transform(topics)
bm25_metrics = pt.Experiment(
    [bm25],
    topics,
    qrels,
    eval_metrics=["map", "ndcg", "recip_rank"]
)

In [13]:
# Evaluate TF-IDF
tfidf_results = tfidf.transform(topics)
tfidf_metrics = pt.Experiment(
    [tfidf],
    topics,
    qrels,
    eval_metrics=["map", "ndcg", "recip_rank"]
)

# Step 6: Conclusion

In [14]:
run = bm25_results
persist_and_normalize_run(run, system_name='bm25-baseline', default_output='../runs')

run = tfidf_results
persist_and_normalize_run(run, system_name='tfidf-baseline', default_output='../runs')

The run file is normalized outside the TIRA sandbox, I will store it at "../runs".
Done. run file is stored under "../runs/run.txt".
The run file is normalized outside the TIRA sandbox, I will store it at "../runs".
Done. run file is stored under "../runs/run.txt".


In [15]:
# Summarize findings
print("BM25 Metrics:", bm25_metrics)
print("TF-IDF Metrics:", tfidf_metrics)

BM25 Metrics:        name       map      ndcg  recip_rank
0  BR(BM25)  0.262311  0.549461    0.579877
TF-IDF Metrics:          name       map      ndcg  recip_rank
0  BR(TF_IDF)  0.264204  0.554081    0.582987


# Step 7 : Benchmarks

### bm25

In [16]:
# Local test of BM25
bm25_experiment = pt.Experiment(
    [bm25],
    topics,
    qrels,
    eval_metrics=['P_1000', 'map', 'recip_rank'],
    names=['BM25'],
    baseline=0
)

In [17]:
# Display BM25 results
print("BM25 Experiment Results")
print(bm25_experiment)

BM25 Experiment Results
   name       map  recip_rank    P_1000 map + map - map p-value recip_rank +  \
0  BM25  0.262311    0.579877  0.016191  None  None        None         None   

  recip_rank - recip_rank p-value P_1000 + P_1000 - P_1000 p-value  
0         None               None     None     None           None  


### TF-IDF

In [18]:
# Local test of TF-IDF
tfidf_experiment = pt.Experiment(
    [tfidf],
    topics,
    qrels,
    eval_metrics=['P_1000', 'map', 'recip_rank'],
    names=['TF-IDF'],
    baseline=0
)

In [19]:
# Display TF-IDF results
print("TF-IDF Experiment Results")
print(tfidf_experiment)

TF-IDF Experiment Results
     name       map  recip_rank    P_1000 map + map - map p-value  \
0  TF-IDF  0.264204    0.582987  0.016426  None  None        None   

  recip_rank + recip_rank - recip_rank p-value P_1000 + P_1000 -  \
0         None         None               None     None     None   

  P_1000 p-value  
0           None  
