In [18]:
# Imports
from tira.third_party_integrations import ensure_pyterrier_is_loaded, persist_and_normalize_run
from tira.rest_api_client import Client
import pyterrier as pt
import pandas as pd

In [19]:
# Create a REST client to the TIRA platform for retrieving the pre-indexed data.
ensure_pyterrier_is_loaded()
tira = Client()

In [20]:
# The dataset: the union of the IR Anthology and the ACL Anthology
# This line creates an IRDSDataset object and registers it under the name provided as an argument.
pt_dataset = pt.get_dataset('irds:ir-lab-sose-2024/ir-acl-anthology-20240504-training')
pt_dataset.get_topics('query').head(4)

Unnamed: 0,qid,query
0,1,retrieval system improving effectiveness
1,2,machine learning language identification
2,3,social media detect self harm
3,4,stemming for arabic languages


In [21]:
# This assumes we have execited the ../baseline-retrieval-system/baseline-retrieval-system.ipynb notebook before.
bm25 = pt.io.read_results('../runs/runbm25.txt')
ngrams = pt.io.read_results('../runs/runngram.txt')
pt.Experiment(
    [bm25, ngrams],
    pt_dataset.get_topics(),
    pt_dataset.get_qrels(),
    ["ndcg_cut.10", "recip_rank", "recall_1000", "map"],
    names=["BM25", "Ngrams"]
)

There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.


Unnamed: 0,name,ndcg_cut.10,recip_rank,recall_1000,map
0,BM25,0.374041,0.579877,0.825376,0.262311
1,Ngrams,0.352402,0.567611,0.717923,0.226362


In [22]:
# This assumes we have execited the ../baseline-retrieval-system/baseline-retrieval-system.ipynb notebook before.
bm25 = pt.io.read_results('./runs/bm25.txt')
#ngrams = pt.io.read_results('../runs/runngram.txt')
full = pt.io.read_results('./runs/fullrun/run.txt')
res_dict = pt.Experiment(
    [bm25, full],
    pt_dataset.get_topics(),
    pt_dataset.get_qrels(),
    ["ndcg_cut.10", "recip_rank", "recall_1000", "map"],
    names=["BM25", "Full"],
    baseline=0,
    #perquery = True,
    save_dir = "./",
    save_mode="overwrite",
    dataframe = True
)
print(res_dict)

There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.
   name       map  recip_rank  recall_1000  ndcg_cut.10  map +  map -  \
0  BM25  0.262311    0.579877     0.825376     0.374041    NaN    NaN   
1  Full  0.180295    0.514831     0.707547     0.289385   21.0   46.0   

   map p-value  recip_rank +  recip_rank -  recip_rank p-value  recall_1000 +  \
0          NaN           NaN           NaN                 NaN            NaN   
1     0.000124          20.0          27.0            0.249336            7.0   

   recall_1000 -  recall_1000 p-value  ndcg_cut.10 +  ndcg_cut.10 -  \
0            NaN                  NaN            NaN            NaN   
1           42.0             0.000005           25.0           36.0   

   ndcg_cut.10 p-value  
0                  NaN  
1             0.008987  


GET A LIST OF QUERIES WHERE WE HAVE BETTER MAP THAN BM25

In [23]:
import pandas as pd
df = pd.DataFrame(res_dict)
print(df)
# Filter criteria
filter_criteria = (
    (df['measure'] == 'map') &
    (df['name'].isin(['Full', 'BM25']))
)

# Apply the filter
filtered_df = df[filter_criteria]

# Group by 'qid' and filter groups that have both 'NGRAMS' and 'BM25' entries
groups = filtered_df.groupby('qid')
valid_pairs = []
valid_qids = []
for qid, group in groups:
    if len(group) == 2 and set(group['name']) == {'Full', 'BM25'}:
        ngrams_row = group[group['name'] == 'Full'].iloc[0]
        bm25_row = group[group['name'] == 'BM25'].iloc[0]
        
        if ngrams_row['value'] > bm25_row['value']:
            valid_pairs.append((ngrams_row, bm25_row))
            valid_qids.append(qid)
# Print the valid pairs
for ngrams_row, bm25_row in valid_pairs:
    print(f"Pair found for qid {ngrams_row['qid']}:")
    print(f"Full: {ngrams_row}")
    print(f"BM25: {bm25_row}")
    print(valid_qids)


   name       map  recip_rank  recall_1000  ndcg_cut.10  map +  map -  \
0  BM25  0.262311    0.579877     0.825376     0.374041    NaN    NaN   
1  Full  0.180295    0.514831     0.707547     0.289385   21.0   46.0   

   map p-value  recip_rank +  recip_rank -  recip_rank p-value  recall_1000 +  \
0          NaN           NaN           NaN                 NaN            NaN   
1     0.000124          20.0          27.0            0.249336            7.0   

   recall_1000 -  recall_1000 p-value  ndcg_cut.10 +  ndcg_cut.10 -  \
0            NaN                  NaN            NaN            NaN   
1           42.0             0.000005           25.0           36.0   

   ndcg_cut.10 p-value  
0                  NaN  
1             0.008987  


KeyError: 'measure'

In [None]:
import pandas as pd

#print(pt_dataset.get_topics())
#valid_qids_normal = ['10', '18', '20', '28', '29', '3', '31', '33', '39', '4', '49', '51', '56', '62', '65', '68']
df = pd.DataFrame(pt_dataset.get_topics())
#print(df)

filtered_df = df[df['qid'].isin(valid_qids)]
# Printing the filtered DataFrame
print(filtered_df)

There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.
   qid                                               text  \
1    2           machine learning language identification   
2    3                      social media detect self-harm   
5    6              Comparison different retrieval models   
8    9                          Pseudo-relevance feedback   
9   10  How to represent natural conversations in word...   
12  13                             at least three authors   
18  20           Crawling websites using machine learning   
20  22                      Search engine caching effects   
22  24                       Limitations machine learning   
23  25                          medicine related research   
24  26                        Natural Language Processing   
27  29  information retrieval on different language so...   
29  31   risks of informatio

GET ALL QUERIES WHERE WE HAVE BETTER RECALL THAN BM25

In [None]:
import pandas as pd
df = pd.DataFrame(res_dict)
print(df)
# Filter criteria
filter_criteria = (
    (df['measure'] == 'recall_1000') &
    (df['name'].isin(['Full', 'BM25']))
)

# Apply the filter
filtered_df = df[filter_criteria]

# Group by 'qid' and filter groups that have both 'NGRAMS' and 'BM25' entries
groups = filtered_df.groupby('qid')
valid_pairs = []
valid_qids = []
for qid, group in groups:
    if len(group) == 2 and set(group['name']) == {'Full', 'BM25'}:
        ngrams_row = group[group['name'] == 'Full'].iloc[0]
        bm25_row = group[group['name'] == 'BM25'].iloc[0]
        
        if ngrams_row['value'] > bm25_row['value']:
            valid_pairs.append((ngrams_row, bm25_row))
            valid_qids.append(qid)
# Print the valid pairs
for ngrams_row, bm25_row in valid_pairs:
    print(f"Pair found for qid {ngrams_row['qid']}:")
    print(f"Full: {ngrams_row}")
    print(f"BM25: {bm25_row}")
    print(valid_qids)


     name qid      measure     value
0    BM25   1          map  0.404455
1    BM25   1   recip_rank  1.000000
2    BM25   1  recall_1000  0.966667
3    BM25   1  ndcg_cut.10  0.835780
36   BM25  10          map  0.001188
..    ...  ..          ...       ...
303  Full   8  ndcg_cut.10  0.000000
304  Full   9          map  0.160744
305  Full   9   recip_rank  0.166667
306  Full   9  recall_1000  1.000000
307  Full   9  ndcg_cut.10  0.154481

[544 rows x 4 columns]
Pair found for qid 16:
Full: name              Full
qid                 16
measure    recall_1000
value         0.842105
Name: 334, dtype: object
BM25: name              BM25
qid                 16
measure    recall_1000
value         0.684211
Name: 62, dtype: object
['16', '24', '29', '5', '52', '56', '65']
Pair found for qid 24:
Full: name              Full
qid                 24
measure    recall_1000
value              1.0
Name: 366, dtype: object
BM25: name              BM25
qid                 24
measure    recall_1000
v