In [None]:
import os

PATH = os.getcwd() + "/.cache/huggingface"
os.environ["HF_HOME"] = PATH
os.environ["HF_DATASETS_CACHE"] = PATH
os.environ["TORCH_HOME"] = PATH

import torch
from sentence_transformers import SentenceTransformer
from tqdm.notebook import tqdm
import pandas as pd
import math
from tqdm.notebook import tqdm
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [14]:
def compute_mrr(anchor_vecs, positive_vecs, negative_vecs_list):
    """
    anchor_vecs: list of anchor vectors
    positive_vecs: list of positive vectors (one for each anchor)
    negative_vecs_list: list of list of negative vectors per anchor
    """
    reciprocal_ranks = []

    for anchor, positive, negatives in zip(anchor_vecs, positive_vecs, negative_vecs_list):
        # Combine all candidates (positive + negatives)
        candidates = [positive] + negatives
        sims = cosine_similarity([anchor], candidates)[0]

        # Rank candidates in descending similarity
        ranked_indices = np.argsort(sims)[::-1]

        # Find rank of the positive (which is at index 0 in candidates)
        rank = list(ranked_indices).index(0) + 1  # +1 because ranks are 1-based

        reciprocal_ranks.append(1 / rank)

    return sum(reciprocal_ranks) / len(reciprocal_ranks)

In [15]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SentenceTransformer("./trained_models/all_mpnet_base_v2", device=DEVICE)

In [18]:
test_df = pd.read_csv("./datasets/test.csv")
print(test_df.shape)
test_df.sample(10).head(10)

(34349, 4)


Unnamed: 0,category,anchor,positive,negative
16912,Earphone,Name: Edifier P180 Plus 3.5mm Earphone\nPrice:...,ভালো ঝসাউন্ড কডয়ালিটির ডফোন,high capactiy NAS dehice
16532,Patch Cord,Name: R&M R196168 24AWG CAT6 U/UTP 4P LSZH 1 M...,R&M Ru96168 tatch cord,TEV PROI-I এর দাম কত?
14836,Mirrorless Camera,Name: Nikon Zf 24.5MP Mirrorless Camera(Only B...,Camera for detailed landscape and portrait pho...,সনি ব্রভিয়া 55 ইঞ্চি টিভি
28098,Power Strip,Name: Deli ET403 Universal Hole Extension Cord...,Deli ET403 Universal Hole Extension Cord 2 Pin...,নেটওয়ার্কিং তার
22922,Headphone,Name: Jabra Evolve 30 MS DUO Dual Ear USB & 3....,Jarba Evve 30 MS DUO - Bck Ediiton,কম্পিউর কন্ট্রোলারৈর জন্য ডভিাইস
3420,Headphone,Name: Anker Soundcore Space One Foldable Over-...,ভালো মানের হেডফোন খুঁজছি,নতুন অ্যান্টিভাইরাস খুঁজছি
14901,CPU Cooler,Name: DeepCool AK620 DIGITAL WH RGB CPU Cooler...,CPU এর তাপমাত্রা কমানোর ডিভাইস,টিভিতে ফুটেজ দেখার ব্যবস্থা
6964,Casing,Name: NZXT H5 Flow 2024 Compact Mid-tower Airf...,নতুন কম্পটার তৈরি করার জন্য কেস,বড় সইাজের ওয়াৌশিং মেশ
7705,IP Camera,Name: Uniview IPC2122LE-ADF40KMC-WL 2MP ColorH...,network camera with night vision,best picture quality tv
22445,RAM (Desktop),Name: PNY XLR8 8GB RGB DDR4 3200MHz White Desk...,PNY XLR8 8GB DDR4 3200MHz,ডিস্ট্রিক্টারের জন্য নির্ভরযোগ্য ফোন ব্যবস্থা


In [None]:
anchor_embedding = []
positive_embedding = []
negative_embedding = []

num_samples = test_df.shape[0]
batch_size = 200
num_batch = math.ceil(num_samples / batch_size)

for start in tqdm(range(0, num_samples, batch_size)):
    batch = test_df.iloc[start : start + batch_size]

    anchors = batch["anchor"].tolist()
    positives = batch["positive"].tolist()
    negatives = batch["negative"].tolist()

    anchor_embedding.extend(model.encode(anchors))
    positive_embedding.extend(model.encode(positives))
    negative_embedding.extend(model.encode(negatives))

In [None]:
mrr = compute_mrr(anchor_embedding, positive_embedding, negative_embedding)
print(f"MRR: {mrr:.4f}")