In [13]:
from sys import modules

IN_COLAB = 'google.colab' in modules
if IN_COLAB:
    !pip install -q ir_axioms[examples] python-terrier

In [14]:
# Start/initialize PyTerrier.
from pyterrier import started, init

if not started():
    init(tqdm="auto")

In [15]:
edition = 29
track = "deep.passages"
dataset_name = "msmarco-passage/trec-dl-2020/judged"
contents_field = "text"
depth = 20

In [16]:
from pyterrier.datasets import get_dataset
from ir_datasets import load

dataset = get_dataset(f"irds:{dataset_name}")
ir_dataset = load(dataset_name)

In [17]:
from pathlib import Path

cache_dir = Path("cache/")
index_dir = cache_dir / "indices" / dataset_name.split("/")[0]
result_dir = Path(
    "/mnt/ceph/storage/data-in-progress/data-research/"
    "web-search/web-search-trec/trec-system-runs"
) / f"trec{edition}" / track
result_files = list(result_dir.iterdir())

In [19]:
from pyterrier.index import IterDictIndexer

if not index_dir.exists():
    indexer = IterDictIndexer(str(index_dir.absolute()))
    indexer.index(
        dataset.get_corpus_iter(),
        fields=[contents_field]
    )

In [18]:
from pyterrier.io import read_results
from pyterrier import Transformer
from tqdm.auto import tqdm

results = [
    Transformer.from_df(read_results(result_file))
    for result_file in tqdm(result_files, desc="Load results")
]
results_names = [result_file.stem.replace("input.", "") for result_file in result_files]

In [20]:
from ir_axioms.axiom import (
    ArgUC, QTArg, QTPArg, aSL, PROX1, PROX2, PROX3, PROX4, PROX5, TFC1, TFC3, RS_TF, RS_TF_IDF, RS_BM25, RS_PL2, RS_QL,
    AND, LEN_AND, M_AND, LEN_M_AND, DIV, LEN_DIV, M_TDC, LEN_M_TDC, STMC1, STMC1_f, STMC2, STMC2_f, LNC1, TF_LNC, LB1,
    REG, ANTI_REG, ASPECT_REG, REG_f, ANTI_REG_f, ASPECT_REG_f
)

axioms = [
    ~ArgUC(),  # Very slow due to network access.
    ~QTArg(),  # Very slow due to network access.
    ~QTPArg(),  # Very slow due to network access.
    ~aSL(),
    ~LNC1(),
    ~TF_LNC(),
    ~LB1(),
    ~PROX1(),
    ~PROX2(),
    ~PROX3(),
    ~PROX4(),
    ~PROX5(),
    ~REG(),
    ~REG_f(),
    ~ANTI_REG(),
    ~ANTI_REG_f(),
    ~ASPECT_REG(),
    ~ASPECT_REG_f(),
    ~AND(),
    ~LEN_AND(),
    ~M_AND(),
    ~LEN_M_AND(),
    ~DIV(),
    ~LEN_DIV(),
    ~RS_TF(),
    ~RS_TF_IDF(),
    ~RS_BM25(),
    ~RS_PL2(),
    ~RS_QL(),
    ~TFC1(),
    ~TFC3(),
    ~M_TDC(),
    ~LEN_M_TDC(),
    ~STMC1(),  # Rather slow due many similarity calculations.
    ~STMC1_f(),  # Rather slow due many similarity calculations.
    ~STMC2(),
    ~STMC2_f(),
]
axiom_names = [axiom.axiom.name for axiom in axioms]

In [21]:
from ir_axioms.backend.pyterrier.experiment import AxiomaticExperiment

experiment = AxiomaticExperiment(
    retrieval_systems=results,
    topics=dataset.get_topics(),
    qrels=dataset.get_qrels(),
    index=index_dir,
    dataset=ir_dataset,
    contents_accessor=contents_field,
    axioms=axioms,
    axiom_names=axiom_names,
    depth=depth,
    filter_by_qrels=False,
    filter_by_topics=False,
    verbose=True,
    cache_dir=cache_dir,
)

In [22]:
preferences = experiment.preferences

Computing system axiomatic preferences:   0%|          | 0/37 [00:00<?, ?system/s]

Computing query axiom preferences:   0%|          | 0/43 [00:00<?, ?query/s]

KeyboardInterrupt: 

In [None]:
preferences.to_csv(f"trec-{edition}-{track}-preferences-all-axioms-depth-{depth}.csv")