# IR Lab Tutorial (Research Oriented): Query Performance Prediction

This tutorial shows the concept of qpp.

## Preparation: Install dependencies

In [None]:
# This is only needed in Google Colab, in a dev container, everything should be installed already
!pip3 install python-terrier tira

In [2]:
import pyterrier as pt
from tira.third_party_integrations import ensure_pyterrier_is_loaded
from tira.rest_api_client import Client

ensure_pyterrier_is_loaded()
tira = Client()

## Our Scenario

We want to build a search engine to support web developers working with CSS.

Our search engine has the following 3 documents:

In [7]:
def qpp_correlation_to_ground_truth(bm25, qpp, dataset, eval_metrics):
    import pandas as pd
    topics = dataset.get_topics()
    df_eval = pt.Experiment([bm25], topics=topics, qrels=dataset.get_qrels(), eval_metrics=eval_metrics, perquery=True, names=['BM25'])
    df_predictions = qpp(topics)
    df_joined = pd.merge(df_eval, df_predictions, on=['qid'])
    ret = []

    for q in ['max-idf', 'avg-idf', 'scq', 'max-scq', 'avg-scq', 'var', 'max-var', 'avg-var', 'wig+10', 'nqc+100', 'smv+100']:
        ret += [{'QPP Method': q, 'Pearson Correlation': df_joined[['value', q]].corr(method='pearson').iloc[0].to_dict()[q], 'Kendall': df_joined[['value', q]].corr(method='kendall').iloc[0].to_dict()[q], 'Spearman': df_joined[['value', q]].corr(method='spearman').iloc[0].to_dict()[q]}]

    return pd.DataFrame(ret).sort_values('Pearson Correlation', ascending=False)


In [8]:
dataset = pt.get_dataset("irds:disks45/nocr/trec-robust-2004")
qpp = tira.pt.transform_queries('ir-benchmarks/qpptk/all-predictors', dataset)
bm25 = tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', dataset)

qpp_correlation_to_ground_truth(bm25, qpp, dataset, ['ndcg_cut_10'])

There are multiple query fields available: ('title', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.


  warn(f'{backfill_count} topic(s) not found in qrels. Scores for these topics are given as NaN and should not contribute to averages.')


Unnamed: 0,QPP Method,Pearson Correlation,Kendall,Spearman
9,nqc+100,0.411153,0.317898,0.451018
10,smv+100,0.391963,0.302243,0.429123
8,wig+10,0.34266,0.240992,0.345846
7,avg-var,0.302321,0.256582,0.36949
6,max-var,0.27616,0.233053,0.338973
3,max-scq,0.264181,0.221495,0.323365
1,avg-idf,0.248902,0.168261,0.24334
4,avg-scq,0.220149,0.157302,0.229591
0,max-idf,0.214187,0.175756,0.25593
5,var,0.163394,0.163238,0.235878


In [12]:
dataset = pt.get_dataset("irds:ir-benchmarks/msmarco-passage-trec-dl-2019-judged-20230107-training")
qpp = tira.pt.transform_queries('ir-benchmarks/qpptk/all-predictors', 'msmarco-passage-trec-dl-2019-judged-20230107-training')
bm25 = tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', 'msmarco-passage-trec-dl-2019-judged-20230107-training')

qpp_correlation_to_ground_truth(bm25, qpp, dataset, ['ndcg_cut_10'])

Download: 199kiB [00:00, 1.94MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_datasets/ir-benchmarks/msmarco-passage-trec-dl-2019-judged-20230107-training/
There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.


Unnamed: 0,QPP Method,Pearson Correlation,Kendall,Spearman
6,max-var,0.466222,0.291485,0.418178
9,nqc+100,0.373691,0.260822,0.370728
8,wig+10,0.371033,0.229745,0.328437
0,max-idf,0.370412,0.231163,0.322145
10,smv+100,0.363132,0.300778,0.432806
1,avg-idf,0.328585,0.207548,0.314164
7,avg-var,0.325339,0.18979,0.296492
3,max-scq,0.197687,0.134762,0.20342
5,var,0.17275,0.067703,0.134124
2,scq,0.025106,0.027747,0.038213


In [14]:
dataset = pt.get_dataset("irds:ir-benchmarks/msmarco-passage-trec-dl-2020-judged-20230107-training")
qpp = tira.pt.transform_queries('ir-benchmarks/qpptk/all-predictors', 'msmarco-passage-trec-dl-2020-judged-20230107-training')
bm25 = tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', 'msmarco-passage-trec-dl-2020-judged-20230107-training')

qpp_correlation_to_ground_truth(bm25, qpp, dataset, ['ndcg_cut_10'])

Download from the Incubator: https://files.webis.de/data-in-production/data-research/tira-zenodo-dump-preparation/query-processors-in-progress/qpptk-all-predictors-trec-recent.zip
	This is only used for last spot checks before archival to Zenodo.


Download: 100%|██████████| 223k/223k [00:00<00:00, 2.41MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/msmarco-passage-trec-dl-2020-judged-20230107-training/qpptk


Download: 642kiB [00:00, 4.76MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/msmarco-passage-trec-dl-2020-judged-20230107-training/tira-ir-starter


Download: 235kiB [00:00, 2.30MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_datasets/ir-benchmarks/msmarco-passage-trec-dl-2020-judged-20230107-training/
There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.


Unnamed: 0,QPP Method,Pearson Correlation,Kendall,Spearman
8,wig+10,0.474703,0.313177,0.446778
7,avg-var,0.213855,0.128626,0.225047
1,avg-idf,0.203385,0.132821,0.181974
6,max-var,0.201037,0.103597,0.182545
4,avg-scq,0.154104,0.104858,0.173512
9,nqc+100,0.149108,0.131423,0.192456
10,smv+100,0.105101,0.090877,0.13589
0,max-idf,0.074536,0.068116,0.111758
5,var,0.067241,0.055925,0.101241
2,scq,0.019864,0.019574,0.026949


In [16]:
dataset = pt.get_dataset("irds:argsme/2020-04-01/touche-2020-task-1")
qpp = tira.pt.transform_queries('ir-benchmarks/qpptk/all-predictors', dataset)
bm25 = tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', dataset)

qpp_correlation_to_ground_truth(bm25, qpp, dataset, ['ndcg_cut_10'])

There are multiple query fields available: ('title', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.
/root/.ir_datasets/touche/2020/task-1/qrels.qrels


Unnamed: 0,QPP Method,Pearson Correlation,Kendall,Spearman
1,avg-idf,0.396869,0.291332,0.433549
10,smv+100,0.303901,0.159763,0.226448
3,max-scq,0.261866,0.145881,0.20432
4,avg-scq,0.226422,0.178558,0.250134
9,nqc+100,0.213075,0.113628,0.166977
8,wig+10,0.172826,0.113628,0.169223
0,max-idf,0.134577,0.139077,0.174775
6,max-var,0.048378,0.051513,0.084354
2,scq,0.016478,0.009398,0.03701
7,avg-var,0.0011,0.041863,0.05702


In [18]:
dataset = pt.get_dataset("irds:argsme/2020-04-01/touche-2021-task-1")
qpp = tira.pt.transform_queries('ir-benchmarks/qpptk/all-predictors', dataset)
bm25 = tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', dataset)

qpp_correlation_to_ground_truth(bm25, qpp, dataset, ['ndcg_cut_10'])

Download from the Incubator: https://files.webis.de/data-in-production/data-research/tira-zenodo-dump-preparation/query-processors-in-progress/qpptk-all-predictors-clef-labs.zip
	This is only used for last spot checks before archival to Zenodo.


Download: 100%|██████████| 969k/969k [00:00<00:00, 6.39MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/argsme-touche-2021-task-1-20230209-training/qpptk


Download: 1.03MiB [00:00, 1.16MiB/s]
[INFO] [starting] opening zip file


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/argsme-touche-2021-task-1-20230209-training/tira-ir-starter


[INFO] [starting] https://zenodo.org/record/6798216/files/topics-task-1-only-titles-2021.zip
[INFO] [finished] https://zenodo.org/record/6798216/files/topics-task-1-only-titles-2021.zip: [00:00] [1.35kB] [4.40MB/s]
[INFO] [finished] opening zip file [344ms]                                                            
[INFO] [starting] https://zenodo.org/record/6798216/files/touche-task1-51-100-relevance.qrels
[INFO] [finished] https://zenodo.org/record/6798216/files/touche-task1-51-100-relevance.qrels: [00:00] [100kB] [1.02MB/s]
[INFO] [starting] https://zenodo.org/record/6798216/files/touche-task1-51-100-quality.qrels            
[INFO] [finished] https://zenodo.org/record/6798216/files/touche-task1-51-100-quality.qrels: [00:00] [99.7kB] [828kB/s]
                                                                                                     

There are multiple qrel fields available: ['relevance', 'quality']. Defaulting to "relevance", but to use a different one, supply variant


Unnamed: 0,QPP Method,Pearson Correlation,Kendall,Spearman
6,max-var,0.270399,0.111834,0.171912
8,wig+10,0.263305,0.226122,0.345882
4,avg-scq,0.237657,0.16898,0.259256
3,max-scq,0.227286,0.161212,0.248397
7,avg-var,0.219205,0.128163,0.199232
9,nqc+100,0.152029,0.067755,0.104346
10,smv+100,0.145271,0.062857,0.084658
5,var,0.088366,0.044898,0.064106
2,scq,0.000595,-0.041633,-0.060168
1,avg-idf,-0.01702,-0.023673,-0.038271
