In [None]:
#Loading Dataset:
!pip install python-terrier

Collecting python-terrier
  Downloading python-terrier-0.9.2.tar.gz (104 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/104.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.4/104.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting wget (from python-terrier)
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyjnius>=1.4.2 (from python-terrier)
  Downloading pyjnius-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting matchpy (from python-terrier)
  Downloading matchpy-0.5.5-py3-none-any.whl (69 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.6/69.6 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
Collecting depreca

In [None]:
import pyterrier as pt

if not pt.started():
  pt.init()

terrier-assemblies 5.7 jar-with-dependencies not found, downloading to /root/.pyterrier...
Done
terrier-python-helper 0.0.7 jar not found, downloading to /root/.pyterrier...
Done


PyTerrier 0.9.2 has loaded Terrier 5.7 (built by craigm on 2022-11-10 18:30) and terrier-helper 0.0.7



In [None]:
# Get Dataset
df = pt.get_dataset("trec-deep-learning-docs")

In [None]:
fields = ["title", "body"]
indexer_with_fields = pt.TRECCollectionIndexer("./index_with_fields")

# Set the properties on Terrier's global properties
pt.ApplicationSetup.setProperty("indexer.meta.forward.keys", ",".join(fields))
pt.ApplicationSetup.setProperty("indexer.meta.reverse.keys", ",".join(fields))
pt.ApplicationSetup.setProperty("FieldTags.process", ",".join(fields))
pt.ApplicationSetup.setProperty("TrecDocTags.doctag", "DOC")
pt.ApplicationSetup.setProperty("TrecDocTags.idtag", "DOCNO")
pt.ApplicationSetup.setProperty("TrecDocTags.skip", "DOCHDR")

# Now index with the fields
indexref_with_fields = indexer_with_fields.index(df.get_corpus())
index_with_fields = pt.IndexFactory.of(indexref_with_fields)

Downloading msmarco_document corpus to /root/.pyterrier/corpora/msmarco_document/corpus


msmarco-docs.trec.gz:   0%|          | 0.00/7.92G [00:00<?, ?iB/s]

In [None]:
# Get "topics" and "qrels" document
topics = df.get_topics("test")
qrels = df.get_qrels("test")

Downloading msmarco_document topics to /root/.pyterrier/corpora/msmarco_document/msmarco-test2019-queries.tsv.gz


msmarco-test2019-queries.tsv.gz:   0%|          | 0.00/4.18k [00:00<?, ?iB/s]

Downloading msmarco_document qrels to /root/.pyterrier/corpora/msmarco_document/2019qrels-docs.txt


2019qrels-docs.txt:   0%|          | 0.00/331k [00:00<?, ?iB/s]

In [None]:
# Define the models
models = {
    "TF-IDF": pt.BatchRetrieve(index_with_fields, wmodel="TF_IDF"),
    "BM25": pt.BatchRetrieve(index_with_fields, wmodel="BM25"),
    "BM25F": pt.BatchRetrieve(index_with_fields, wmodel="BM25F"),
    "PL2": pt.BatchRetrieve(index_with_fields, wmodel="PL2"),
    "PL2F": pt.BatchRetrieve(index_with_fields, wmodel="PL2F"),
    "DPH": pt.BatchRetrieve(index_with_fields, wmodel="DPH")
}

In [None]:
# Evaluate the results using the specified metrics
eval_metrics = ["map_cut_10", "map_cut_100", "ndcg_cut_10", "ndcg_cut_100"]

# Retrieve results for each model
results = {}
for name, model in models.items():
    results[name] = model.transform(topics)

# Store the results for each metrics according to their weighting model
evaluator = {}
for model_name, model_results in results.items():
    evaluator[model_name] = pt.Utils.evaluate(model_results, qrels, eval_metrics)

In [None]:
# Define function for calculating MRR@10 and MRR@100
def calculate_reciprocal_rank(results_dict, qrels):
    metrics = ["recip_rank"]
    mrr_eval = {}

    for model_name, results in results_dict.items():
        # For top 10
        top_10_results = results.groupby('query').head(10)
        evaluator_10 = pt.Utils.evaluate(top_10_results, qrels, metrics=metrics)

        # For top 100
        top_100_results = results.groupby('query').head(100)
        evaluator_100 = pt.Utils.evaluate(top_100_results, qrels, metrics=metrics)

        # Get the reciprocal rank values directly
        recip_rank_10 = evaluator_10["recip_rank"]
        recip_rank_100 = evaluator_100["recip_rank"]

        mrr_eval[model_name] = {
            "MRR@10": recip_rank_10,
            "MRR@100": recip_rank_100
        }

    return mrr_eval

# Function call
mrr_eval = calculate_reciprocal_rank(results, qrels)

In [None]:
# Display the results
import pandas as pd

# Convert the evaluations to a DataFrame
df_eval = pd.DataFrame.from_dict(evaluator).T # Dataframe for MAP and nDCG results
df_eval.columns = ["MAP@10", "MAP@100", "nDCG@10", "nDCG@100"]
df_reciprocal_rank = pd.DataFrame.from_dict(mrr_eval).T # Dataframe for MRR results

# Merge the two DataFrames for displaying all results
merged_df = pd.concat([df_reciprocal_rank, df_eval], axis=1)
print(merged_df)

          MRR@10   MRR@100    MAP@10   MAP@100   nDCG@10  nDCG@100
TF-IDF  0.864987  0.866044  0.113309  0.244909  0.524194  0.503096
BM25    0.874123  0.874123  0.114305  0.248334  0.538802  0.512328
BM25F   0.023256  0.023256  0.023256  0.023259  0.018011  0.018193
PL2     0.841085  0.842746  0.112509  0.248535  0.542381  0.518715
PL2F    0.023256  0.024211  0.023256  0.023259  0.018011  0.018197
DPH     0.907946  0.907946  0.115437  0.249372  0.543129  0.515971
