In [None]:
from tqdm import tqdm

import numpy as np

import pandas as pd

from beir import util
from beir.datasets.data_loader import GenericDataLoader

from sklearn.feature_extraction.text import TfidfVectorizer

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

In [None]:
def precision_at_k(predicted, true, k):
    if k == 0:
        return 0.0
    hits = sum(idx in true for idx in predicted[:k])
    return hits / k


def recall_at_k(predicted, true, k):
    if not true:
        return 0.0
    hits = sum(idx in true for idx in predicted[:k])
    return hits / len(true)


def average_precision(predicted, true):
    if not true:
        return 0.0
    hits = 0
    score = 0.0
    for idx, id in enumerate(predicted):
        if id in true:
            hits += 1
            score += hits / (idx + 1)
    return score / len(true)


def mean_average_precision(predictions_truths):
    scores = [average_precision(pred, true) for pred, true in predictions_truths]
    return sum(scores) / len(scores)


def mean_reciprocal_rank(predictions_truths):
    total_rr = 0.0
    for pred, true in predictions_truths:
        for i, idx in enumerate(pred):
            if idx in true:
                total_rr += 1 / (i + 1)
                break
    return total_rr / len(predictions_truths)


def test_retrieval(predictions_ground_truths, k):
    for i, (pred, true) in enumerate(predictions_ground_truths):
        p_at_k = precision_at_k(pred, true, k)
        r_at_k = recall_at_k(pred, true, k)
        ap = average_precision(pred, true)
        print(f"Query {i+1} - P@{k}: {p_at_k:.2f}, R@{k}: {r_at_k:.2f}, AP: {ap:.2f}")

    map_score = mean_average_precision(predictions_ground_truths)
    mrr_score = mean_reciprocal_rank(predictions_ground_truths)

    print(f"\nMAP: {map_score:.2f}")
    print(f"MRR: {mrr_score:.2f}")

In [None]:
dataset = "fiqa"
data_path = "./data/beir_data" 

url = f"https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{dataset}.zip"
out_dir = util.download_and_unzip(url, data_path)

corpus, queries, qrels = GenericDataLoader(out_dir).load(split="test")

corpus_texts = [document['text'] for key, document in corpus.items()]
corpus_keys = [key for key, document in corpus.items()]

query_texts = [document for key, document in queries.items()]
query_keys = [key for key, document in queries.items()]

In [None]:
## TF-IDF

k = 15

vectorizer = TfidfVectorizer()

vectorizer.fit(corpus_texts + query_texts)
corpus_tf_idf = vectorizer.transform(corpus_texts)
query_tf_idf = vectorizer.transform(query_texts)

cosine_similarities = corpus_tf_idf @ query_tf_idf.T

predictions = []
for cosine_similarity in cosine_similarities.T:
    # top_k_indices = np.flip(np.argsort(cosine_similarity.toarray()).squeeze()[-k:]).tolist()
    top_k_indices = cosine_similarity.toarray().argsort().squeeze()[::-1][:k].tolist()
    predictions.append(top_k_indices)

corpus_query_matches = {query_keys[query_idx]: [corpus_keys[idx] for idx in prediction] for query_idx, prediction in enumerate(predictions)}
predictions_ground_truths = [(corpus_keys, list(qrels[query_key].keys()))for query_key, corpus_keys in corpus_query_matches.items()]

test_retrieval(predictions_ground_truths, k=3)
test_retrieval(predictions_ground_truths, k=5)
test_retrieval(predictions_ground_truths, k=10)
test_retrieval(predictions_ground_truths, k=15)