### Retrieval Evaluation

In [12]:
import pandas as pd
from typing import List, Dict, Callable
from qdrant_client import models

In [2]:
# Get ground_truth dataset
ground_truth_df = pd.read_csv('ground_truth.csv')

In [24]:
# Define retrieval function
from qdrant_client import models, QdrantClient

client = QdrantClient("http://localhost:6333")
client.get_collections()

def multi_stage_search(query: str, limit: int = 5) -> list[models.ScoredPoint]:
    results = client.query_points(
        collection_name="met-museum-euro-artworks",
        prefetch=[
            models.Prefetch(
                query=models.Document(
                    text=query,
                    model="jinaai/jina-embeddings-v2-small-en",
                ),
                using="jina-small",
                # Prefetch ten times more results, then
                # expected to return, so we can really rerank
                limit=(50 * limit),
            ),
        ],
        query=models.Document(
            text=query,
            model="Qdrant/bm25", 
        ),
        using="bm25",
        limit=limit,
        with_payload=True,
    )

    return results.points

In [6]:
ground_truth_dict = ground_truth_df.to_dict(orient='records')

In [10]:
type(multi_stage_search)

function

In [8]:
for gt_question in ground_truth_dict[:5]:
    multi_stage_search(gt_question['question'], limit=5)

points=[ScoredPoint(id=437399, version=0, score=13.682985, payload={'artwork_text': "Man with a Magnifying Glass by Rembrandt (Rembrandt van Rijn). The description of the artwork is: 'This portrait most likely depicts the Amsterdam auctioneer Pieter Haringh, who once handled the sale of a famous portrait by the Italian Renaissance master Raphael that served Rembrandt as a source of inspiration. The sitter may have used the magnifying glass in his hand to evaluate paintings and other luxury goods circulating on the busy Amsterdam art market. Like his wife in the pendant portrait also on view here, the sitter wears a form of fancy dress that has little to do with Dutch clothing worn at the time.'. The source/origin of the artwork is Bequest of Benjamin Altma, the Metropolitan Museum of Art acquired the artwork in 1913. The medium for the painting is Oil on canvas, and the dimensions are 36 x 29 1/4 in. (91.4 x 74.3 cm). The artwork is presented at gallery 616, located on the map here htt

In [18]:
class Retrieval_Metrics:
    def __init__(self, ground_truth_lst: List[Dict], retrieval_function: Callable[[str, int], List[models.ScoredPoint]], k: int):

        self.retrieval_function = retrieval_function
        self.ground_truth_lst = ground_truth_lst
        self.k = k

    def hit_rate_at_k(self, retrieved_points: List[models.ScoredPoint], k: int, question_id: int):
        k = k or self.k 

        top_k_retrieved_ids = {point.id for point in retrieved_points[:k]}
        return 1.0 if question_id in top_k_retrieved_ids else 0.0
    
    def mean_reciprocal_rank(self, retrieved_points: List[models.ScoredPoint], question_id: int):

        for rank, point in enumerate(retrieved_points, start=1):
            if question_id == point.id:
                return 1.0 / rank 
            
        return 0.0
    
    def evaluate(self) -> Dict[str, float]:
        hit_rates = []
        mrrs = []

        for item in self.ground_truth_lst:
            query = item['question']
            q_id = item['id']

            retrieved_points = self.retrieval_function(query, self.k)

            hit_rates.append(self.hit_rate_at_k(retrieved_points, self.k, q_id))
            mrrs.append(self.mean_reciprocal_rank(retrieved_points, q_id))

        n = len(hit_rates)
        if n == 0:
            return {
                f'hit_rate@{self.k}': 0.0,
                'mrr': 0.0,
                'num_queries': 0
            }
        else:
            return {
                f'hit_rate@{self.k}': sum(hit_rates) / n,
                'mrr': sum(mrrs) / n,
                'num_queries': n
            }




In [25]:
retrieval_evaluator = Retrieval_Metrics(
    ground_truth_lst=ground_truth_dict,
    retrieval_function=multi_stage_search,
    k=5
)

In [26]:
retrieval_evaluator.evaluate()

{'hit_rate@5': 0.3721881390593047,
 'mrr': 0.2866734832992502,
 'num_queries': 489}

In [None]:
a# Define retrieval metric functions
def hit_rate(relevance_total):
    cnt = 0 