In [2]:
!pip install ranx

[0m

In [3]:
from tira.third_party_integrations import ensure_pyterrier_is_loaded, normalize_run, persist_and_normalize_run
from tira.rest_api_client import Client
import pyterrier as pt
import pandas as pd
from tqdm import tqdm
from statistics import mean
from ranx import fuse, Run

In [4]:
ensure_pyterrier_is_loaded()
tira = Client()


PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [5]:
LONGEVAL_LAGS = {
    'lag1': {
        'tira_id': 'longeval-2023-01-20240423-training',
        'pt_dataset': pt.get_dataset('irds:ir-lab-padua-2024/longeval-2023-01-20240426-training'),
    },
    'lag6': {
        'tira_id': 'longeval-2023-06-20240418-training',
        'pt_dataset': pt.get_dataset('irds:ir-lab-padua-2024/longeval-2023-06-20240422-training'),
    },
    'lag8': {
        'tira_id': 'longeval-2023-08-20240418-training',
        'pt_dataset': pt.get_dataset('irds:ir-lab-padua-2024/longeval-2023-08-20240422-training'),
    }
}

In [6]:
def to_ranx_dict(run, qrels = None):
    ret = {}
    for _, row in run.iterrows():
        if row['qid'] not in ret:
            ret[row['qid']] = {}

        ret[row['qid']][row['docno']] = row['score']

    if qrels is not None:
        for key in qrels.keys():
            if not key in ret.keys():
                ret[key] = {}
    

    return Run(ret)

def from_ranx(run):
    ret = []

    for qid in run.keys():
        for docno in run[qid].keys():
            ret += [{
                'qid': qid,
                'docno': docno,
                'score': run[qid][docno]
            }]
    return pd.DataFrame(ret)


In [7]:
from ranx import fuse, optimize_fusion, Qrels

qrels = Qrels.from_ir_datasets("ir-lab-padua-2024/longeval-2023-01-20240426-training")

print("BM25")
df_3 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS["lag1"]['tira_id'])
print("Sparse Cross-Encoder")
df_4 = tira.pd.from_retriever_submission('ir-benchmarks/fschlatt/sparse-cross-encoder-4-512', LONGEVAL_LAGS["lag1"]['tira_id'])
print("ColBERT")
df_5 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/ColBERT Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS["lag1"]['tira_id'])
print("RankZephyr")
df_6 = normalize_run(tira.pd.from_retriever_submission('workshop-on-open-web-search/fschlatt/rank-zephyr', LONGEVAL_LAGS["lag1"]['tira_id']), "rank-zephyr")

run3 = to_ranx_dict(df_3, qrels)
run4 = to_ranx_dict(df_4, qrels)
run5 = to_ranx_dict(df_5, qrels)
run6 = to_ranx_dict(df_6, qrels)

best_params = optimize_fusion(
    qrels=qrels,
    runs=[run3, run4, run5, run6],
    norm="min-max",
    method="wsum",
    metric="ndcg@10", 
    show_progress = True,
    step=0.1, # -> best_params = (0.1, 0.1, 0.1, 0.7)
    # step=0.05, # -> best_params = (0.10, 0.10, 0.10, 0.70)
)
print(best_params)

for lag in tqdm(LONGEVAL_LAGS.keys()):
    print("BM25")
    df_3 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS[lag]['tira_id'])
    print("Sparse Cross-Encoder")
    df_4 = tira.pd.from_retriever_submission('ir-benchmarks/fschlatt/sparse-cross-encoder-4-512', LONGEVAL_LAGS[lag]['tira_id'])
    print("ColBERT")
    df_5 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/ColBERT Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS[lag]['tira_id'])
    print("RankZephyr")
    df_6 = normalize_run(tira.pd.from_retriever_submission('workshop-on-open-web-search/fschlatt/rank-zephyr', LONGEVAL_LAGS[lag]['tira_id']), "rank-zephyr")

    run3 = to_ranx_dict(df_3, qrels)
    run4 = to_ranx_dict(df_4, qrels)
    run5 = to_ranx_dict(df_5, qrels)
    run6 = to_ranx_dict(df_6, qrels)
    
    print("fuse")
    df = fuse(
        runs=[run3, run4, run5, run6],
        norm="min-max",
        method="wsum",   
        params=best_params,
    )
    df = from_ranx(df)
    system = "galapagos-tortoise-wsum"
    normalize_run(df, system).to_csv(f'{system}.{lag}', sep=" ", header=False, index=False)

BM25
Sparse Cross-Encoder
ColBERT
RankZephyr


Output()

{'weights': (0.1, 0.1, 0.1, 0.7)}


  0%|          | 0/3 [00:00<?, ?it/s]

BM25
Sparse Cross-Encoder
ColBERT
RankZephyr
fuse


 33%|███▎      | 1/3 [02:47<05:34, 167.49s/it]

BM25
Sparse Cross-Encoder
ColBERT
RankZephyr
fuse


 67%|██████▋   | 2/3 [04:22<02:05, 125.04s/it]

BM25
Sparse Cross-Encoder
ColBERT
RankZephyr
fuse


100%|██████████| 3/3 [09:16<00:00, 185.62s/it]


In [8]:
system = "galapagos-tortoise-rank-zephyr"
for lag in tqdm(LONGEVAL_LAGS.keys()):
    normalize_run(tira.pd.from_retriever_submission('workshop-on-open-web-search/fschlatt/rank-zephyr', LONGEVAL_LAGS[lag]['tira_id']), system).to_csv(f'{system}.{lag}', sep=" ", header=False, index=False)

100%|██████████| 3/3 [00:12<00:00,  4.23s/it]
