In [1]:
!pip install ranx

[0m

In [2]:
from tira.third_party_integrations import ensure_pyterrier_is_loaded, normalize_run, persist_and_normalize_run
from tira.rest_api_client import Client
import pyterrier as pt
import pandas as pd
from tqdm import tqdm
from statistics import mean
from ranx import fuse, Run

In [3]:
ensure_pyterrier_is_loaded()
tira = Client()


PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [4]:
LONGEVAL_LAGS = {
    'lag1': {
        'tira_id': 'longeval-2023-01-20240423-training',
        'pt_dataset': pt.get_dataset('irds:ir-lab-padua-2024/longeval-2023-01-20240426-training'),
    },
    'lag6': {
        'tira_id': 'longeval-2023-06-20240418-training',
        'pt_dataset': pt.get_dataset('irds:ir-lab-padua-2024/longeval-2023-06-20240422-training'),
    },
    'lag8': {
        'tira_id': 'longeval-2023-08-20240418-training',
        'pt_dataset': pt.get_dataset('irds:ir-lab-padua-2024/longeval-2023-08-20240422-training'),
    }
}

In [18]:
def to_ranx_dict(run, qrels = None):
    ret = {}
    for _, row in run.iterrows():
        if row['qid'] not in ret:
            ret[row['qid']] = {}

        ret[row['qid']][row['docno']] = row['score']

    if qrels is not None:
        for key in qrels.keys():
            if not key in ret.keys():
                ret[key] = {}
    

    return Run(ret)

def from_ranx(run):
    ret = []

    for qid in run.keys():
        for docno in run[qid].keys():
            ret += [{
                'qid': qid,
                'docno': docno,
                'score': run[qid][docno]
            }]
    return pd.DataFrame(ret)


In [9]:
from statistics import mean
from typing import Sequence

def kmaxavg(k: int) -> float:
    def _kmaxavg(a: Sequence[float]) -> float:
        return mean(sorted(a, reverse=True)[:k])
    return _kmaxavg

In [22]:
from ranx import fuse, optimize_fusion, Qrels

def get_df_with_sparse_cross_encoder_passage_scores(dataset_id, aggregation, system_name):
    predictions = tira.pd.from_retriever_submission('ir-benchmarks/fschlatt/abstract-metaball', dataset_id)
    ret = {}

    for _, row in predictions.iterrows():
        qid = row['qid'].split('___split')[0]
        docno = row['docno'].split('___p')[0]
        score = row['score']
        if qid not in ret:
            ret[qid] = {}
        if docno not in ret[qid]:
            ret[qid][docno] = []
        
        ret[qid][docno] += [score]
    
    ret_df = []
    for qid in ret.keys():
        for docno in ret[qid].keys():
            score = aggregation(ret[qid][docno])
            ret_df += [{
                'qid': qid,
                'docno': docno,
                'score': score,
                'system': system_name
            }]
    return normalize_run(pd.DataFrame(ret_df), system_name)

for system, agg in tqdm((
    ('sparse-cross-encoder-max-score', max),
    ('sparse-cross-encoder-3-max-avg-score', kmaxavg(3)),
)):
    qrels = Qrels.from_ir_datasets("ir-lab-padua-2024/longeval-2023-01-20240426-training")


    print("df1")
    df_1 = get_df_with_sparse_cross_encoder_passage_scores(LONGEVAL_LAGS["lag1"]['tira_id'], agg, system)

    #df_1 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/ColBERT Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS[lag]['tira_id'])
    
    print("df2")
    df_2 = tira.pd.from_retriever_submission('workshop-on-open-web-search/fschlatt/rank-zephyr', LONGEVAL_LAGS["lag1"]['tira_id'])
    df_2 = normalize_run(df_2, system)

    #df_3 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/MonoT5 Base (tira-ir-starter-gygaggle)', LONGEVAL_LAGS[lag]['tira_id'])

    run1 = to_ranx_dict(df_1, qrels)
    run2 = to_ranx_dict(df_2, qrels)

    # best_params = optimize_fusion(
    #     qrels=qrels,
    #     runs=[run1, run2],
    #     method="rrf",
    #     metric="ndcg@10", 
    #     min_k = 0,
    #     max_k = 10,
    #     step = 1,
    #     # min_k = 10,
    #     # max_k = 100,
    #     # step = 10,
    #     show_progress = True,
    # )
    best_params = optimize_fusion(
        qrels=qrels,
        runs=[run1, run2],
        norm="min-max",
        method="wsum",
        metric="ndcg@10", 
        show_progress = True,
    )
    print(best_params)




    for lag in tqdm(LONGEVAL_LAGS.keys()):
        print("df1")
        df_1 = get_df_with_sparse_cross_encoder_passage_scores(LONGEVAL_LAGS[lag]['tira_id'], agg, system)

        #df_1 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/ColBERT Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS[lag]['tira_id'])
        
        print("df2")
        df_2 = tira.pd.from_retriever_submission('workshop-on-open-web-search/fschlatt/rank-zephyr', LONGEVAL_LAGS[lag]['tira_id'])
        df_2 = normalize_run(df_2, system)

        #df_3 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/MonoT5 Base (tira-ir-starter-gygaggle)', LONGEVAL_LAGS[lag]['tira_id'])

        run1 = to_ranx_dict(df_1)
        run2 = to_ranx_dict(df_2)
        
        print("fuse")
        df = fuse(
            runs=[run1, run2],
            # method="rrf",   
            norm="min-max",
            method="wsum",   
            params=best_params,
        )
        df = from_ranx(df)
        normalize_run(df, system).to_csv(f'{system}.{lag}', sep=" ", header=False, index=False)

  0%|          | 0/2 [00:00<?, ?it/s]

df1
df2


Output()

{'weights': (0.0, 1.0)}




df1
df2
fuse




df1
df2
fuse




df1
df2
fuse


 67%|██████▋   | 2/3 [02:00<01:00, 60.37s/it]
  0%|          | 0/2 [02:13<?, ?it/s]


TypeError: can't unbox a <class 'numba.typed.typeddict.Dict'> as a <NULL>

In [28]:
from ranx import fuse, optimize_fusion, Qrels

qrels = Qrels.from_ir_datasets("ir-lab-padua-2024/longeval-2023-01-20240426-training")

print("BM25")
df_3 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS["lag1"]['tira_id'])
print("Sparse Cross-Encoder")
df_4 = tira.pd.from_retriever_submission('ir-benchmarks/fschlatt/sparse-cross-encoder-4-512', LONGEVAL_LAGS["lag1"]['tira_id'])
print("ColBERT")
df_5 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/ColBERT Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS["lag1"]['tira_id'])
print("RankZephyr")
df_6 = normalize_run(tira.pd.from_retriever_submission('workshop-on-open-web-search/fschlatt/rank-zephyr', LONGEVAL_LAGS["lag1"]['tira_id']), "rank-zephyr")

run3 = to_ranx_dict(df_3, qrels)
run4 = to_ranx_dict(df_4, qrels)
run5 = to_ranx_dict(df_5, qrels)
run6 = to_ranx_dict(df_6, qrels)

best_params = optimize_fusion(
    qrels=qrels,
    runs=[run3, run4, run5, run6],
    norm="min-max",
    method="wsum",
    metric="ndcg@10", 
    show_progress = True,
    # step=0.1, # -> best_params = (0.1, 0.1, 0.1, 0.7)
    step=0.05, # -> best_params = (0.10, 0.10, 0.10, 0.70)
)
print(best_params)

for lag in tqdm(LONGEVAL_LAGS.keys()):
    print("BM25")
    df_3 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS[lag]['tira_id'])
    print("Sparse Cross-Encoder")
    df_4 = tira.pd.from_retriever_submission('ir-benchmarks/fschlatt/sparse-cross-encoder-4-512', LONGEVAL_LAGS[lag]['tira_id'])
    print("ColBERT")
    df_5 = tira.pd.from_retriever_submission('ir-benchmarks/tira-ir-starter/ColBERT Re-Rank (tira-ir-starter-pyterrier)', LONGEVAL_LAGS[lag]['tira_id'])
    print("RankZephyr")
    df_6 = normalize_run(tira.pd.from_retriever_submission('workshop-on-open-web-search/fschlatt/rank-zephyr', LONGEVAL_LAGS[lag]['tira_id']), "rank-zephyr")

    run3 = to_ranx_dict(df_3, qrels)
    run4 = to_ranx_dict(df_4, qrels)
    run5 = to_ranx_dict(df_5, qrels)
    run6 = to_ranx_dict(df_6, qrels)
    
    print("fuse")
    df = fuse(
        runs=[run3, run4, run5, run6],
        norm="min-max",
        method="wsum",   
        params=best_params,
    )
    df = from_ranx(df)
    system = "galapagos-tortoise-wsum"
    normalize_run(df, system).to_csv(f'{system}.{lag}', sep=" ", header=False, index=False)

BM25
Sparse Cross-Encoder
ColBERT
RankZephyr


Output()

{'weights': (0.1, 0.1, 0.1, 0.7)}


  0%|          | 0/3 [00:00<?, ?it/s]

BM25
Sparse Cross-Encoder
ColBERT
RankZephyr
fuse


 33%|███▎      | 1/3 [00:48<01:36, 48.41s/it]

BM25
Sparse Cross-Encoder
ColBERT
RankZephyr
fuse


 67%|██████▋   | 2/3 [01:10<00:32, 32.66s/it]

BM25
Sparse Cross-Encoder
ColBERT
RankZephyr
fuse


100%|██████████| 3/3 [03:00<00:00, 60.29s/it]
