In [1]:
from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run, ir_datasets
from tira.rest_api_client import Client
ensure_pyterrier_is_loaded()
import pandas as pd
import pyterrier as pt

PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:
# Create a REST client to the TIRA platform for retrieving the pre-indexed data.
ensure_pyterrier_is_loaded()
tira = Client()

In [50]:
# The dataset: the union of the IR Anthology and the ACL Anthology
# This line creates an IRDSDataset object and registers it under the name provided as an argument.
dataset = 'ir-lab-sose-2024/ir-acl-anthology-20240504-training'
pt_dataset = pt.get_dataset('irds:' + dataset)

index = tira.pt.index('ir-lab-sose-2024/tira-ir-starter/Index (tira-ir-starter-pyterrier)', pt_dataset)
print("Files in IR-ACL corpus: %s " % len(list(pt_dataset.get_corpus_iter())))


# We are using Pisa Index to index the dataset
#index = PisaIndex('./index', overwrite=True)
#index.index(pt_dataset.get_corpus_iter())

# get all topics of training dataset
#topics = pt_dataset.get_topics()
#topics

ir-lab-sose-2024/ir-acl-anthology-20240504-training documents: 100%|██████████| 126958/126958 [00:02<00:00, 55395.31it/s]

Files in IR-ACL corpus: 126958 





# We now do Query expansion in order to improve retrieval effectiveness 
Query expansion generally improves recall, by adding more terms to the query, it broadens the search scope, potentially retrieving more relevant documents. While it could also have a slight negative effect on precision since query expansion might also introdoces irrelevant results. Therefore, we use ndcg_cut_10 and recall_1000 as the evalutation metric.

In [47]:
bm25 = pt.BatchRetrieve(index, wmodel="BM25", controls={"bm25.b" : 0.1, "bm25.k_1": 2.5})

js_kls = pt.BatchRetrieve(index, wmodel="Js_KLs")

pt.Experiment([bm25, js_kls], pt_dataset.get_topics(), pt_dataset.get_qrels(), eval_metrics=['ndcg_cut_10', 'recall_1000'], names=['BM25', 'Js_KLs'])

There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.


Unnamed: 0,name,ndcg_cut_10,recall_1000
0,BM25,0.462726,0.841521
1,Js_KLs,0.424393,0.842299


In [48]:
if not pt.started():
    pt.init(boot_packages=["com.github.terrierteam:terrier-prf:-SNAPSHOT"])

bo1_pipe = bm25 >> pt.rewrite.Bo1QueryExpansion(index) >> bm25

bo1_pipe1 = js_kls >> pt.rewrite.Bo1QueryExpansion(index) >> js_kls

bm251 = pt.BatchRetrieve(index, wmodel="BM25", controls={"bm25.b" : 0.7, "bm25.k_1": 1.1, "qe":"on", "qemodel" : "Bo1"})


pt.Experiment([bm25, bo1_pipe, js_kls, bo1_pipe1, bm251], pt_dataset.get_topics(), pt_dataset.get_qrels(), eval_metrics=['ndcg_cut_10', 'recall_1000'], names=['BM25', 'BM25 >> Bo1 >> BM25','JS_KLS', 'JS_KLS >> Bo1 >> JS_KLS', 'BM251'])

There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.


Unnamed: 0,name,ndcg_cut_10,recall_1000
0,BM25,0.462726,0.841521
1,BM25 >> Bo1 >> BM25,0.425518,0.82961
2,JS_KLS,0.424393,0.842299
3,JS_KLS >> Bo1 >> JS_KLS,0.428744,0.850608
4,BM251,0.401907,0.834609


In [6]:
kl_pipe = bm25 >> pt.rewrite.KLQueryExpansion(index) >> bm25

pt.Experiment([bm25, kl_pipe], pt_dataset.get_topics(), pt_dataset.get_qrels(), eval_metrics=['ndcg_cut_5', 'recall_1000'], names=['BM25', 'BM25 >> KL >> BM25'])

There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.


Unnamed: 0,name,ndcg_cut_5,recall_1000
0,BM25,0.39365,0.825376
1,BM25 >> KL >> BM25,0.383947,0.831915


In [7]:
if not pt.started():
    pt.init(boot_packages=["com.github.terrierteam:terrier-prf:-SNAPSHOT"])

rm3_pipe = bm25 >> pt.rewrite.RM3(index) >> bm25

pt.Experiment([bm25, rm3_pipe], pt_dataset.get_topics(), pt_dataset.get_qrels(), eval_metrics=['ndcg_cut_5', 'recall_1000'], names=['BM25', 'BM25 >> RM3 >> BM25'])

There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.
09:10:28.129 [main] WARN org.terrier.querying.RM1 - Did not identify any usable candidate expansion terms from docid 125137 among 5 possibilities
09:10:28.478 [main] WARN org.terrier.querying.RM1 - Did not identify any usable candidate expansion terms from docid 116910 among 4 possibilities


Unnamed: 0,name,ndcg_cut_5,recall_1000
0,BM25,0.39365,0.825376
1,BM25 >> RM3 >> BM25,0.341725,0.825062


# Query expansion with LLM´s

In [8]:
# Load the expansions

# llm expansions with gpt
gpt_cot = tira.pt.transform_queries('workshop-on-open-web-search/tu-dresden-03/qe-gpt3.5-cot', dataset, prefix='llm_expansion_')
gpt_sq_fs = tira.pt.transform_queries('workshop-on-open-web-search/tu-dresden-03/qe-gpt3.5-sq-fs', dataset, prefix='llm_expansion_')
gpt_sq_zs = tira.pt.transform_queries('ir-benchmarks/tu-dresden-03/qe-gpt3.5-sq-zs', dataset, prefix='llm_expansion_')

# llm expansions with llama
llama_cot = tira.pt.transform_queries('ir-benchmarks/tu-dresden-03/qe-llama-cot', dataset, prefix='llm_expansion_')
llama_sq_fs = tira.pt.transform_queries('ir-benchmarks/tu-dresden-03/qe-llama-sq-fs', dataset, prefix='llm_expansion_')
llama_sq_zs = tira.pt.transform_queries('ir-benchmarks/tu-dresden-03/qe-llama-sq-zs', dataset, prefix='llm_expansion_')

# llm expansions with flan-ul2
flan_cot = tira.pt.transform_queries('ir-benchmarks/tu-dresden-03/qe-flan-ul2-cot', dataset, prefix='llm_expansion_')
flan_sq_fs = tira.pt.transform_queries('ir-benchmarks/tu-dresden-03/qe-flan-ul2-sq-fs', dataset, prefix='llm_expansion_')
flan_sq_zs = tira.pt.transform_queries('ir-benchmarks/tu-dresden-03/qe-flan-ul2-sq-zs', dataset, prefix='llm_expansion_')

Download: 22.6kiB [00:00, 1.07MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/workshop-on-open-web-search/ir-acl-anthology-20240504-training/tu-dresden-03


Download: 3.42kiB [00:00, 13.9MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/workshop-on-open-web-search/ir-acl-anthology-20240504-training/tu-dresden-03


Download: 7.72kiB [00:00, 33.3MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/ir-acl-anthology-20240504-training/tu-dresden-03


Download: 20.7kiB [00:00, 1.07MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/ir-acl-anthology-20240504-training/tu-dresden-03


Download: 3.67kiB [00:00, 10.2MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/ir-acl-anthology-20240504-training/tu-dresden-03


Download: 10.0kiB [00:00, 36.1MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/ir-acl-anthology-20240504-training/tu-dresden-03


Download: 6.11kiB [00:00, 19.8MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/ir-acl-anthology-20240504-training/tu-dresden-03


Download: 3.60kiB [00:00, 19.1MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/ir-acl-anthology-20240504-training/tu-dresden-03


Download: 3.63kiB [00:00, 18.0MiB/s]

Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/ir-acl-anthology-20240504-training/tu-dresden-03





In [10]:
topics = pt_dataset.get_topics('query')

gpt_cot(topics.head(3))
gpt_cot(topics.head(3)).iloc[0].to_dict()


{'qid': '1',
 'query': 'retrieval system improving effectiveness',
 'llm_expansion_query': 'Improving the effectiveness of a retrieval system involves enhancing its ability to accurately and efficiently retrieve relevant information in response to user queries. This can be achieved through various strategies such as:\n\n1. **Improved indexing**: By ensuring that all relevant content is properly indexed, including metadata and keywords, the retrieval system can more easily locate and retrieve relevant information.\n\n2. **Enhanced search algorithms**: Implementing advanced search algorithms can help the system better understand user queries and match them with relevant content, improving the accuracy of search results.\n\n3. **User feedback**: Incorporating user feedback mechanisms can help the system learn from user interactions and improve its performance over time by adjusting search results based on user preferences.\n\n4. **Personalization**: Tailoring search results to individual 

In [11]:
tokeniser = pt.autoclass("org.terrier.indexing.tokenisation.Tokeniser").getTokeniser()

def pt_tokenize(text):
    return ' '.join(tokeniser.getTokens(text))

def expand_query(topic):
  ret = ' '.join([topic['query'], topic['query'], topic['query'],  topic['query'],  topic['query'], topic['llm_expansion_query']])

  # apply the tokenization
  return pt_tokenize(ret)

# we wrap this into an pyterrier transformer
# Documentation: https://pyterrier.readthedocs.io/en/latest/apply.html
pt_expand_query = pt.apply.query(expand_query)

In [16]:
# Now we can look into some expansion

(gpt_cot >> pt_expand_query)(topics.head(3))

Unnamed: 0,qid,query_0,llm_expansion_query,query
0,1,retrieval system improving effectiveness,Improving the effectiveness of a retrieval sys...,retrieval system improving effectiveness retri...
1,2,machine learning language identification,Machine learning language identification is a ...,machine learning language identification machi...
2,3,social media detect self harm,Social media platforms have implemented variou...,social media detect self harm social media det...


In [17]:
(gpt_cot >> pt_expand_query)(topics.head(3)).iloc[0].to_dict()


{'qid': '1',
 'query_0': 'retrieval system improving effectiveness',
 'llm_expansion_query': 'Improving the effectiveness of a retrieval system involves enhancing its ability to accurately and efficiently retrieve relevant information in response to user queries. This can be achieved through various strategies such as:\n\n1. **Improved indexing**: By ensuring that all relevant content is properly indexed, including metadata and keywords, the retrieval system can more easily locate and retrieve relevant information.\n\n2. **Enhanced search algorithms**: Implementing advanced search algorithms can help the system better understand user queries and match them with relevant content, improving the accuracy of search results.\n\n3. **User feedback**: Incorporating user feedback mechanisms can help the system learn from user interactions and improve its performance over time by adjusting search results based on user preferences.\n\n4. **Personalization**: Tailoring search results to individua

In [18]:
pipeline_gpt_cot = (gpt_cot >> pt_expand_query) >> bm25
pipeline_gpt_sq_fs = (gpt_sq_fs >> pt_expand_query) >> bm25
pipeline_gpt_sq_zs = (gpt_sq_zs >> pt_expand_query) >> bm25

pipeline_llama_cot = (llama_cot >> pt_expand_query) >> bm25
pipeline_llama_sq_fs = (llama_sq_fs >> pt_expand_query) >> bm25
pipeline_llama_sq_zs = (llama_sq_zs >> pt_expand_query) >> bm25

pipeline_flan_cot = (flan_cot >> pt_expand_query) >> bm25
pipeline_flan_sq_fs = (flan_sq_fs >> pt_expand_query) >> bm25
pipeline_flan_sq_zs = (flan_sq_zs >> pt_expand_query) >> bm25

In [36]:
pt.Experiment(
    [bm25, rm3_pipe, kl_pipe, bo1_pipe, pipeline_gpt_cot, pipeline_gpt_sq_fs, pipeline_gpt_sq_zs, pipeline_llama_cot, pipeline_llama_sq_fs, pipeline_llama_sq_zs, pipeline_flan_cot, pipeline_flan_sq_fs, pipeline_flan_sq_zs, ],
    names=['BM25', 'BM25+RM3', 'BM25+KL','BM25+BO1', 'BM25+GPT-COT', 'BM25+GPT-SQ-FS', 'BM25+GPT-SQ-ZS', 'BM25+Llama-COT', 'BM25+Llama-SQ-FS', 'BM25+Llama-SQ-ZS', 'BM25+Flan-COT', 'BM25+Flan-SQ-FS', 'BM25+Flan-SQ-ZS'],
    topics=topics,
    qrels=pt_dataset.get_qrels(),
    eval_metrics=['ndcg_cut_5','recall_1000'],
    verbose=True,
)

pt.Experiment:   8%|▊         | 1/13 [00:01<00:17,  1.48s/system]

10:29:38.173 [main] WARN org.terrier.querying.RM1 - Did not identify any usable candidate expansion terms from docid 125137 among 5 possibilities
10:29:38.336 [main] WARN org.terrier.querying.RM1 - Did not identify any usable candidate expansion terms from docid 116910 among 4 possibilities


pt.Experiment: 100%|██████████| 13/13 [00:32<00:00,  2.51s/system]


Unnamed: 0,name,ndcg_cut_5,recall_1000
0,BM25,0.419737,0.830769
1,BM25+RM3,0.341725,0.82472
2,BM25+KL,0.383947,0.831915
3,BM25+BO1,0.425813,0.834609
4,BM25+GPT-COT,0.320591,0.825698
5,BM25+GPT-SQ-FS,0.398788,0.83566
6,BM25+GPT-SQ-ZS,0.397821,0.829955
7,BM25+Llama-COT,0.285551,0.783448
8,BM25+Llama-SQ-FS,0.380488,0.837392
9,BM25+Llama-SQ-ZS,0.377338,0.839125


# Running gridsearch on BM25 to find the best configuration

In [41]:
def run_bm25_grid_search_run(index, output_dir, queries):
    """
        defaults: http://terrier.org/docs/current/javadoc/org/terrier/matching/models/BM25.html
        k_1 = 1.2d, k_3 = 8d, b = 0.75d
        We do not tune parameter k_3, as this parameter only impacts queries with reduntant terms.
    """
    for b in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
        for k_1 in [0.5, 1.0, 1.5, 2.0, 2.5]:
            system = f'bm25-b={b}-k_1={k_1}'
            configuration = {"bm25.b" : b, "bm25.k_1": k_1}
            run_output_dir = output_dir + '/' + system
            !rm -Rf {run_output_dir}
            !mkdir -p {run_output_dir}
            print(f'Run {system}')
            BM25 = pt.BatchRetrieve(index, wmodel="BM25", controls=configuration, verbose=True)
            run = BM25(queries)
            persist_and_normalize_run(run, system, run_output_dir)

In [42]:
dataset_tira = ir_datasets.load(dataset)
queries = pt.io.read_topics(ir_datasets.topics_file(dataset), format='trecxml')

queries.head(3)

Unnamed: 0,qid,query
0,1,retrieval system improving effectiveness
1,2,machine learning language identification
2,3,social media detect self harm


In [43]:
run_bm25_grid_search_run(index, 'grid-search/training', queries)

Run bm25-b=0.1-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 43.71q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.1-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=0.1-k_1=0.5/run.txt".
Run bm25-b=0.1-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 50.68q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.1-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=0.1-k_1=1.0/run.txt".
Run bm25-b=0.1-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 53.47q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.1-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=0.1-k_1=1.5/run.txt".
Run bm25-b=0.1-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 49.64q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.1-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=0.1-k_1=2.0/run.txt".
Run bm25-b=0.1-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 50.65q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.1-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=0.1-k_1=2.5/run.txt".
Run bm25-b=0.2-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 46.85q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.2-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=0.2-k_1=0.5/run.txt".
Run bm25-b=0.2-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 51.25q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.2-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=0.2-k_1=1.0/run.txt".
Run bm25-b=0.2-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 54.33q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.2-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=0.2-k_1=1.5/run.txt".
Run bm25-b=0.2-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 51.01q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.2-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=0.2-k_1=2.0/run.txt".
Run bm25-b=0.2-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 55.83q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.2-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=0.2-k_1=2.5/run.txt".
Run bm25-b=0.3-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 51.79q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.3-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=0.3-k_1=0.5/run.txt".
Run bm25-b=0.3-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 51.98q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.3-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=0.3-k_1=1.0/run.txt".
Run bm25-b=0.3-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 52.49q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.3-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=0.3-k_1=1.5/run.txt".
Run bm25-b=0.3-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 50.15q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.3-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=0.3-k_1=2.0/run.txt".
Run bm25-b=0.3-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 52.76q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.3-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=0.3-k_1=2.5/run.txt".
Run bm25-b=0.4-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 45.93q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.4-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=0.4-k_1=0.5/run.txt".
Run bm25-b=0.4-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 53.15q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.4-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=0.4-k_1=1.0/run.txt".
Run bm25-b=0.4-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 46.97q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.4-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=0.4-k_1=1.5/run.txt".
Run bm25-b=0.4-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 53.90q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.4-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=0.4-k_1=2.0/run.txt".
Run bm25-b=0.4-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 59.09q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.4-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=0.4-k_1=2.5/run.txt".
Run bm25-b=0.5-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 50.06q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.5-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=0.5-k_1=0.5/run.txt".
Run bm25-b=0.5-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 58.31q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.5-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=0.5-k_1=1.0/run.txt".
Run bm25-b=0.5-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 46.67q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.5-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=0.5-k_1=1.5/run.txt".
Run bm25-b=0.5-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 46.39q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.5-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=0.5-k_1=2.0/run.txt".
Run bm25-b=0.5-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 55.52q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.5-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=0.5-k_1=2.5/run.txt".
Run bm25-b=0.6-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 50.52q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.6-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=0.6-k_1=0.5/run.txt".
Run bm25-b=0.6-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 54.66q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.6-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=0.6-k_1=1.0/run.txt".
Run bm25-b=0.6-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 45.33q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.6-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=0.6-k_1=1.5/run.txt".
Run bm25-b=0.6-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 46.49q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.6-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=0.6-k_1=2.0/run.txt".
Run bm25-b=0.6-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 51.53q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.6-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=0.6-k_1=2.5/run.txt".
Run bm25-b=0.7-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 54.16q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.7-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=0.7-k_1=0.5/run.txt".
Run bm25-b=0.7-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 58.13q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.7-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=0.7-k_1=1.0/run.txt".
Run bm25-b=0.7-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 43.85q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.7-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=0.7-k_1=1.5/run.txt".
Run bm25-b=0.7-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 48.63q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.7-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=0.7-k_1=2.0/run.txt".
Run bm25-b=0.7-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 59.05q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.7-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=0.7-k_1=2.5/run.txt".
Run bm25-b=0.8-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 50.51q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.8-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=0.8-k_1=0.5/run.txt".
Run bm25-b=0.8-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 47.43q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.8-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=0.8-k_1=1.0/run.txt".
Run bm25-b=0.8-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 56.23q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.8-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=0.8-k_1=1.5/run.txt".
Run bm25-b=0.8-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 51.68q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.8-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=0.8-k_1=2.0/run.txt".
Run bm25-b=0.8-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 52.12q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.8-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=0.8-k_1=2.5/run.txt".
Run bm25-b=0.9-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 47.90q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.9-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=0.9-k_1=0.5/run.txt".
Run bm25-b=0.9-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 52.28q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.9-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=0.9-k_1=1.0/run.txt".
Run bm25-b=0.9-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 57.62q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.9-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=0.9-k_1=1.5/run.txt".
Run bm25-b=0.9-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 43.35q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.9-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=0.9-k_1=2.0/run.txt".
Run bm25-b=0.9-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 45.93q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=0.9-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=0.9-k_1=2.5/run.txt".
Run bm25-b=1.0-k_1=0.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 56.17q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=1.0-k_1=0.5".
Done. run file is stored under "grid-search/training/bm25-b=1.0-k_1=0.5/run.txt".
Run bm25-b=1.0-k_1=1.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 49.85q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=1.0-k_1=1.0".
Done. run file is stored under "grid-search/training/bm25-b=1.0-k_1=1.0/run.txt".
Run bm25-b=1.0-k_1=1.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 51.50q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=1.0-k_1=1.5".
Done. run file is stored under "grid-search/training/bm25-b=1.0-k_1=1.5/run.txt".
Run bm25-b=1.0-k_1=2.0


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 45.40q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=1.0-k_1=2.0".
Done. run file is stored under "grid-search/training/bm25-b=1.0-k_1=2.0/run.txt".
Run bm25-b=1.0-k_1=2.5


BR(BM25): 100%|██████████| 68/68 [00:01<00:00, 54.45q/s]


The run file is normalized outside the TIRA sandbox, I will store it at "grid-search/training/bm25-b=1.0-k_1=2.5".
Done. run file is stored under "grid-search/training/bm25-b=1.0-k_1=2.5/run.txt".


# Part 2: Evaluate all Configurations of the Grid Search

In [44]:
from trectools import TrecRun, TrecQrel, TrecEval
from tira.rest_api_client import Client
from glob import glob
import pandas as pd
tira = Client()

def load_qrels(dataset):
    return TrecQrel(tira.download_dataset('ir-lab-sose-2024', dataset, truth_dataset=True) + '/qrels.txt')

training_qrels = load_qrels('ir-acl-anthology-20240504-training')

In [45]:
def evaluate_run(run_dir, qrels):
    run = TrecRun(run_dir + '/run.txt')
    trec_eval = TrecEval(run, qrels)

    return {
        'run': run.get_runid(),
        'nDCG@10': trec_eval.get_ndcg(depth=10),
        'nDCG@10 (unjudgedRemoved)': trec_eval.get_ndcg(depth=10, removeUnjudged=True),
        'MAP': trec_eval.get_map(depth=10),
        'MRR': trec_eval.get_reciprocal_rank(),
        'P@10': trec_eval.get_precision(depth=10)
    }

In [46]:
df = []
for r in glob('grid-search/training/bm25*'):
    df += [evaluate_run(r, training_qrels)]
df = pd.DataFrame(df)
df.sort_values('nDCG@10', ascending=False)

  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first()

Unnamed: 0,run,nDCG@10,nDCG@10 (unjudgedRemoved),MAP,MRR,P@10
7,bm25-b=0.1-k_1=2.5,0.462726,0.641795,0.17972,0.655873,0.423529
26,bm25-b=0.1-k_1=2.0,0.461514,0.648075,0.180537,0.655465,0.422059
32,bm25-b=0.3-k_1=2.0,0.45902,0.663377,0.183024,0.650911,0.413235
39,bm25-b=0.2-k_1=1.5,0.457576,0.658266,0.182425,0.649682,0.414706
45,bm25-b=0.3-k_1=1.5,0.45753,0.667294,0.184446,0.659185,0.410294
11,bm25-b=0.1-k_1=1.5,0.456357,0.650468,0.180736,0.658769,0.413235
25,bm25-b=0.2-k_1=2.5,0.455446,0.653018,0.179566,0.643231,0.411765
57,bm25-b=0.2-k_1=2.0,0.455056,0.657161,0.17949,0.637603,0.414706
49,bm25-b=0.3-k_1=2.5,0.452102,0.662991,0.178927,0.628244,0.408824
50,bm25-b=0.2-k_1=1.0,0.451857,0.65339,0.182458,0.670712,0.4
