In [10]:
from sys import modules

IN_COLAB = 'google.colab' in modules
if IN_COLAB:
    !pip install -q ir_axioms[examples] python-terrier

In [11]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

In [12]:
from pyterrier.datasets import get_dataset, Dataset

# Load dataset.
dataset_name = "msmarco-passage"
dataset: Dataset = get_dataset(f"irds:{dataset_name}")
dataset_test: Dataset = get_dataset(f"irds:{dataset_name}/trec-dl-2020/judged")

In [13]:
from pathlib import Path

cache_dir = Path("cache/")
index_dir = cache_dir / "indices" / dataset_name.split("/")[0]

In [14]:
from pyterrier.index import IterDictIndexer

if not index_dir.exists():
    indexer = IterDictIndexer(str(index_dir.absolute()))
    indexer.index(
        dataset.get_corpus_iter(),
        fields=["text"]
    )

In [15]:
from pyterrier.batchretrieve import BatchRetrieve

# BM25 baseline retrieval.
bm25 = BatchRetrieve(str(index_dir.absolute()), wmodel="BM25")

In [16]:
from ir_axioms.axiom import (
    ArgUC, QTArg, QTPArg, aSL, PROX1, PROX2, PROX3, PROX4, PROX5, TFC1, TFC3, RS_TF, RS_TF_IDF, RS_BM25, RS_PL2, RS_QL,
    AND, LEN_AND, M_AND, LEN_M_AND, DIV, LEN_DIV, M_TDC, LEN_M_TDC, STMC1, STMC1_f, STMC2, STMC2_f, LNC1, TF_LNC, LB1,
    REG, ANTI_REG, REG_f, ANTI_REG_f, ASPECT_REG, ASPECT_REG_f, ORIG, VoteAxiom
)

axiom = (
        ~VoteAxiom([
            ArgUC(), QTArg(), QTPArg(), aSL(),
            LNC1(), TF_LNC(), LB1(),
            PROX1(), PROX2(), PROX3(), PROX4(), PROX5(),
            REG(), REG_f(), ANTI_REG(), ANTI_REG_f(), ASPECT_REG(), ASPECT_REG_f(),
            AND(), LEN_AND(), M_AND(), LEN_M_AND(), DIV(), LEN_DIV(),
            RS_TF(), RS_TF_IDF(), RS_BM25(), RS_PL2(), RS_QL(),
            TFC1(), TFC3(), M_TDC(), LEN_M_TDC(),
            STMC1(), STMC1_f(), STMC2(), STMC2_f(),
        ], minimum_votes=0.5) | ORIG()
)

In [17]:
from ir_axioms.modules.pivot import MiddlePivotSelection
from ir_axioms.backend.pyterrier.transformers import KwikSortReranker

kwiksort = bm25 % 20 >> KwikSortReranker(
    axiom=axiom,
    index=index_dir,
    dataset=dataset_name,
    pivot_selection=MiddlePivotSelection(),
    cache_dir=cache_dir,
    verbose=True
)

In [18]:
from pyterrier.pipelines import Experiment
from ir_measures import nDCG, MAP, RR

experiment = Experiment(
    [bm25, kwiksort ^ bm25],
    dataset_test.get_topics(),
    dataset_test.get_qrels(),
    [nDCG @ 10, RR, MAP],
    ["BM25", "KwikSort"],
    verbose=True,
)
experiment.sort_values(by="nDCG@10", ascending=False, inplace=True)

pt.Experiment:   0%|          | 0/2 [00:00<?, ?system/s]

Reranking query axiomatically:   0%|          | 0/54 [00:00<?, ?query/s]

In [19]:
experiment

Unnamed: 0,name,nDCG@10,RR,AP
0,BM25,0.493627,0.802359,0.358724
1,KwikSort,0.491858,0.802102,0.358587
