# NIR 2022 - Lab 6: Neural Re-Ranking with Transformers

In [None]:
import pyterrier as pt
import os

pt.init(
        mem=8000,
        version="snapshot",
        tqdm="notebook",
        boot_packages=["com.github.terrierteam:terrier-prf:-SNAPSHOT"],
    )

In [None]:
# load the index
index = pt.IndexFactory.of("your index")
print(index.getCollectionStatistics().toString())


In [None]:
# loading your train and test dataset
import pandas as pd
test_query = pd.read_csv("your query path", dtype=str)
test_qrel = pd.read_csv("your qrel path", dtype=str)

test_qrel = test_qrel.astype({"label": "int32"})

In [None]:
# load search model
BM25 = pt.BatchRetrieve(index, wmodel="BM25")

In [None]:
result = pt.Experiment([BM25], test_query, test_qrel,eval_metrics=["map","ndcg_cut_20"],names = ["BM25"])
result

## reranking

In [None]:
# reranking
from sentence_transformers import SentenceTransformer, util

# Load pre-trained model
model = SentenceTransformer('msmarco-distilbert-base-v3')

# Sentences are encoded by calling model.encode()
query_embedding = model.encode('How big is London')
doc_embedding = model.encode('London has 9,787,426 inhabitants at the 2011 census')

# Compute cosine similarity between query and document representations
cos_sim = util.pytorch_cos_sim(query_embedding, doc_embedding)
print("Cosine-Similarity:", cos_sim)


### cross encoder

In [None]:
from sentence_transformers import CrossEncoder
model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
scores = model.predict([('Query1', 'Paragraph1'), ('Query2', 'Paragraph2')])
scores

In [None]:
# load runs:
import collections
from tqdm import tqdm
def load_run(path):
    """Loads run into a dict of key: query_id, value: list of candidate doc
    ids."""
    print('Loading run...')
    run = collections.OrderedDict()
    with open(path) as f:
        for line in tqdm(f):
            query_id, _, doc_title, rank, _, _ = line.split(' ')
            if query_id not in run:
                run[query_id] = []
            run[query_id].append((doc_title, int(rank)))

    # Sort candidate docs by rank.
    sorted_run = collections.OrderedDict()
    for query_id, doc_titles_ranks in run.items():
        sorted(doc_titles_ranks, key=lambda x: x[1])
        doc_titles = [doc_titles for doc_titles, _ in doc_titles_ranks]
        sorted_run[query_id] = doc_titles

    return sorted_run

run = load_run("your run result")

In [None]:
# load corpus
import jsonlines

corpus = {}
with jsonlines.open("ir_course_dataset/corpus.jsonl", mode="r") as reader:
    for row in tqdm(reader):
            docno = row["_id"]
            title = row["title"]
            text = row["text"]

            corpus[docno] = text

    print("corpus num",len(corpus))

In [None]:
# Pipeline
import spacy
# Sentencizer
nlp = spacy.blank("en")
nlp.add_pipe('sentencizer')

stride = 5
max_length = 10
queries = dict(zip(test_query['qid'].to_list(), test_query['query'].to_list()))

n_segments = 0
n_docs = 0
n_doc_ids_not_found = 0

model_name = "cross"
cross_run = []

for query_id, doc_ids in tqdm(run.items(), total=len(run)):
  
    print(f'{query_id}: Converting to segments...')
    query_text = queries[query_id]
    passages = []
    for doc_id in doc_ids:
        if doc_id not in corpus:
            n_doc_ids_not_found += 1
            continue
        n_docs += 1
        doc_text = corpus[doc_id]
        doc = nlp(doc_text[:10000])
        sentences = [str(sent).strip() for sent in doc.sents]
        for i in range(0, len(sentences), stride):
            segment = ' '.join(sentences[i:i + max_length])
            passages.append([doc_id, segment])
            n_segments += 1
            if i + max_length >= len(sentences):
                break

    print(f'{query_id}: Reranking...')


    # get the score 
    # todo
    
    # Sort the scores in decreasing order
    # todo

    # Save the results in TREC format
   
    # Store ranking on disk in TREC format
    
with open("ir_course_run/" + f"{model_name}.run", "w") as f:
    for l in cross_run:
        f.write(l + "\n")    


## evaluation using pytrec_eval

In [None]:
# test runs

import pytrec_eval
# Load run
with open("ir_course_run/cross.run", 'r') as f_run:
    tf_run = pytrec_eval.parse_run(f_run)

qrels_dict = dict()
for _, r in test_qrel.iterrows():
    qid, docno, label, iteration = r
    if qid not in qrels_dict:
        qrels_dict[qid] = dict()
    qrels_dict[qid][docno] = int(label)

metrics = {"map", "ndcg_cut_5", "ndcg_cut_10", "ndcg_cut_20"}
evaluator = pytrec_eval.RelevanceEvaluator(qrels_dict, metrics)

# test 
tf_evals = evaluator.evaluate(tf_run)

In [None]:
tf_metric2vals = {m: [] for m in metrics}
for q, d in tf_evals.items():
    for m, val in d.items():
        tf_metric2vals[m].append(val)

# Compute average across topics
for m in metrics:
    print(m, '\t', pytrec_eval.compute_aggregated_measure(m, tf_metric2vals[m]))