# mrr precision@k recall@k

In [128]:
import pandas as pd
import chromadb
import numpy as np
from chromadb.config import Settings
from src.vectorstorage import EmbeddingVectorStorage
from langchain_huggingface import HuggingFaceEndpointEmbeddings
from tqdm.auto import tqdm

In [129]:
# load eval dataset
df_eval = pd.read_csv('eval_test_df.csv')

storage_path = 'data/chroma'

settings = Settings()

client = chromadb.PersistentClient(path=storage_path, settings=settings)

collection_name = "bge_m3_embed_RecursiveCharacterTextSplitter"
collection = client.get_collection(collection_name)


In [130]:
df_eval

Unnamed: 0.1,Unnamed: 0,example_id,question_id,question,relevant_text,answer,article_url,top_score,top_score_id
0,0,1,1,What is the innovation behind Leclanché's new ...,Leclanché said it has developed an environment...,Leclanché's innovation is using a water-based ...,https://www.sgvoice.net/strategy/technology/23...,96,d4e62c94318b4b2d09a1f9519b9ba65b
1,1,2,2,What is the EU’s Green Deal Industrial Plan?,The Green Deal Industrial Plan is a bid by the...,The EU’s Green Deal Industrial Plan aims to en...,https://www.sgvoice.net/policy/25396/eu-seeks-...,100,0ff0fa30dbc4a6cedb659fa0d221ec83
2,2,3,2,What is the EU’s Green Deal Industrial Plan?,The European counterpart to the US Inflation R...,The EU’s Green Deal Industrial Plan aims to en...,https://www.pv-magazine.com/2023/02/02/europea...,98,80181cd176814323bb67524614a2f11e
3,3,4,3,What are the four focus areas of the EU's Gree...,The new plan is fundamentally focused on four ...,The four focus areas of the EU's Green Deal In...,https://www.sgvoice.net/policy/25396/eu-seeks-...,100,0ff0fa30dbc4a6cedb659fa0d221ec83
4,4,5,4,When did the cooperation between GM and Honda ...,What caught our eye was a new hookup between G...,July 2013,https://cleantechnica.com/2023/05/08/general-m...,100,4411ffe0103fa4d5d9f00addb04806dd
5,5,6,5,Did Colgate-Palmolive enter into PPA agreement...,"Scout Clean Energy, a Colorado-based renewable...",yes,https://solarindustrymag.com/scout-and-colgate...,99,2c10445abbf64fb300a2f97a24a5022a
6,6,7,6,What is the status of ZeroAvia's hydrogen fuel...,"In December, the US startup ZeroAvia announced...",ZeroAvia's hydrogen fuel cell electric aircraf...,https://cleantechnica.com/2023/01/02/the-wait-...,99,d560562b33ae4ee8a314b6cc1a045ed8
7,7,8,7,"What is the ""Danger Season""?",As spring turns to summer and the days warm up...,"The ""Danger Season"" is the period in the North...",https://cleantechnica.com/2023/05/15/what-does...,100,1dff007a49a8bef2920d0db78df34178
8,8,9,8,Is Mississipi an anti-ESG state?,Mississippi is among two dozen or so states in...,yes,https://cleantechnica.com/2023/05/15/mississip...,99,6b8fb712bcb9febe268e3679851e725e
9,9,10,9,Can you hang solar panels on garden fences?,Scaling down from the farm to the garden level...,yes,https://cleantechnica.com/2023/05/18/solar-pan...,99,f85ef61c7e872e59a17fd310ccc811fe


In [131]:
with open('secrets.txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        if line.startswith('api_token'):
            token = line.split('=')[1].strip()
                     
bge_m3_embed = HuggingFaceEndpointEmbeddings(
    model='http://100.67.185.22:8080',
    huggingfacehub_api_token=token,
    model_kwargs={"normalize_embeddings": True}
)

bge_m3_vectordb = EmbeddingVectorStorage(
    method_of_embedding=bge_m3_embed,
    group=collection_name,
    path_persistent=storage_path,
)

In [132]:
print(bge_m3_vectordb.test_heartbeat())
print(bge_m3_vectordb.collection_is_empty())

1732040348754623000
False


In [None]:
def compute_mrr_with_actual_retrieval(eval_df, vector_db, k=20, verbose=True):
    """
    Compute the MRR using actual retrieval results.
    """
    rrs = []

    # Iterate over each query in the DataFrame
    for i, row in tqdm(eval_df.iterrows(), desc="Computing MRR", disable=not verbose, total=len(eval_df)):
        query = row['question']
        retrieved_docs = vector_db.search_similar_w_scores(query, k=k)
        retrieved_doc_ids = [doc[0].metadata['origin_doc_id'] for doc in retrieved_docs]  # Extract document IDs

        # Find index of the ground truth document
        ground_truth_id = row['top_score_id']
        try:
            index = retrieved_doc_ids.index(ground_truth_id)
            rr = 1 / (index + 1)
        except ValueError:
            rr = 0  # Ground truth doc not found within top k results
        rrs.append(rr)

    # Append RR values to the DataFrame and calculate MRR
    eval_df['rr'] = rrs
    mrr = np.mean(rrs)  # Compute MRR
    return eval_df, mrr

updated_eval_df, mrr = compute_mrr_with_actual_retrieval(df_eval, bge_m3_vectordb)

Computing MRR: 100%|██████████| 23/23 [00:00<00:00, 40.14it/s]

Mean Reciprocal Rank (MRR): 0.6354382332643201





In [135]:
def compute_precision_for_ks_add_columns(eval_df, vector_db, ks=[1, 2, 5, 10], verbose=True):
    """
    Compute precision at multiple values of k for each query and add results as columns in the DataFrame.
    """
    # Initialize columns for each k in the DataFrame
    for k in ks:
        eval_df[f'precision@{k}'] = np.nan

    # Iterate over each query in the DataFrame
    for i, row in tqdm(eval_df.iterrows(), desc="Computing Precision for multiple k", disable=not verbose, total=len(eval_df)):
        query = row['question']
        retrieved_docs = vector_db.search_similar_w_scores(query, max(ks))  # Get max(k) docs to cover all k values
        retrieved_doc_ids = [doc[0].metadata['origin_doc_id'] for doc in retrieved_docs]  # Extract document IDs

        ground_truth_id = row['top_score_id']

        # Calculate precision for each k and store in respective columns
        for k in ks:
            relevant_docs = sum([1 for doc_id in retrieved_doc_ids[:k] if doc_id == ground_truth_id])
            precision = relevant_docs / k
            eval_df.at[i, f'precision@{k}'] = precision

    return eval_df

updated_eval_df = compute_precision_for_ks_add_columns(df_eval, bge_m3_vectordb)

Computing Precision for multiple k: 100%|██████████| 23/23 [00:00<00:00, 34.85it/s]


Unnamed: 0.1,Unnamed: 0,example_id,question_id,question,relevant_text,answer,article_url,top_score,top_score_id,rr,precision@1,precision@2,precision@5,precision@10
0,0,1,1,What is the innovation behind Leclanché's new ...,Leclanché said it has developed an environment...,Leclanché's innovation is using a water-based ...,https://www.sgvoice.net/strategy/technology/23...,96,d4e62c94318b4b2d09a1f9519b9ba65b,1.0,1.0,1.0,0.4,0.2
1,1,2,2,What is the EU’s Green Deal Industrial Plan?,The Green Deal Industrial Plan is a bid by the...,The EU’s Green Deal Industrial Plan aims to en...,https://www.sgvoice.net/policy/25396/eu-seeks-...,100,0ff0fa30dbc4a6cedb659fa0d221ec83,1.0,1.0,0.5,0.2,0.3
2,2,3,2,What is the EU’s Green Deal Industrial Plan?,The European counterpart to the US Inflation R...,The EU’s Green Deal Industrial Plan aims to en...,https://www.pv-magazine.com/2023/02/02/europea...,98,80181cd176814323bb67524614a2f11e,0.333333,0.0,0.0,0.2,0.1
3,3,4,3,What are the four focus areas of the EU's Gree...,The new plan is fundamentally focused on four ...,The four focus areas of the EU's Green Deal In...,https://www.sgvoice.net/policy/25396/eu-seeks-...,100,0ff0fa30dbc4a6cedb659fa0d221ec83,1.0,1.0,1.0,0.4,0.3
4,4,5,4,When did the cooperation between GM and Honda ...,What caught our eye was a new hookup between G...,July 2013,https://cleantechnica.com/2023/05/08/general-m...,100,4411ffe0103fa4d5d9f00addb04806dd,1.0,1.0,0.5,0.2,0.2


In [137]:
def compute_recall_for_ks_add_columns(eval_df, vector_db, ks=[1, 2, 5, 10], verbose=True):
    """
    Compute recall at multiple values of k for each query and add results as columns in the DataFrame.
    """
    # Initialize columns for each k in the DataFrame
    for k in ks:
        eval_df[f'recall@{k}'] = np.nan

    # Iterate over each query in the DataFrame
    for i, row in tqdm(eval_df.iterrows(), desc="Computing Recall for multiple k", disable=not verbose, total=len(eval_df)):
        query = row['question']
        retrieved_docs = vector_db.search_similar_w_scores(query, max(ks))  # Get max(k) docs to cover all k values
        retrieved_doc_ids = [doc[0].metadata['origin_doc_id'] for doc in retrieved_docs]  # Extract document IDs

        ground_truth_id = row['top_score_id']

        # Calculate recall for each k and store in respective columns
        for k in ks:
            is_relevant_retrieved = int(ground_truth_id in retrieved_doc_ids[:k])
            total_relevant = 1  # Since only one relevant document is assumed per query
            recall = is_relevant_retrieved / total_relevant
            eval_df.at[i, f'recall@{k}'] = recall

    return eval_df

updated_eval_df = compute_recall_for_ks_add_columns(df_eval, bge_m3_vectordb)

Computing Recall for multiple k: 100%|██████████| 23/23 [00:00<00:00, 43.59it/s]


In [138]:
updated_eval_df

Unnamed: 0.1,Unnamed: 0,example_id,question_id,question,relevant_text,answer,article_url,top_score,top_score_id,rr,precision@1,precision@2,precision@5,precision@10,recall@1,recall@2,recall@5,recall@10
0,0,1,1,What is the innovation behind Leclanché's new ...,Leclanché said it has developed an environment...,Leclanché's innovation is using a water-based ...,https://www.sgvoice.net/strategy/technology/23...,96,d4e62c94318b4b2d09a1f9519b9ba65b,1.0,1.0,1.0,0.4,0.2,1.0,1.0,1.0,1.0
1,1,2,2,What is the EU’s Green Deal Industrial Plan?,The Green Deal Industrial Plan is a bid by the...,The EU’s Green Deal Industrial Plan aims to en...,https://www.sgvoice.net/policy/25396/eu-seeks-...,100,0ff0fa30dbc4a6cedb659fa0d221ec83,1.0,1.0,0.5,0.2,0.3,1.0,1.0,1.0,1.0
2,2,3,2,What is the EU’s Green Deal Industrial Plan?,The European counterpart to the US Inflation R...,The EU’s Green Deal Industrial Plan aims to en...,https://www.pv-magazine.com/2023/02/02/europea...,98,80181cd176814323bb67524614a2f11e,0.333333,0.0,0.0,0.2,0.1,0.0,0.0,1.0,1.0
3,3,4,3,What are the four focus areas of the EU's Gree...,The new plan is fundamentally focused on four ...,The four focus areas of the EU's Green Deal In...,https://www.sgvoice.net/policy/25396/eu-seeks-...,100,0ff0fa30dbc4a6cedb659fa0d221ec83,1.0,1.0,1.0,0.4,0.3,1.0,1.0,1.0,1.0
4,4,5,4,When did the cooperation between GM and Honda ...,What caught our eye was a new hookup between G...,July 2013,https://cleantechnica.com/2023/05/08/general-m...,100,4411ffe0103fa4d5d9f00addb04806dd,1.0,1.0,0.5,0.2,0.2,1.0,1.0,1.0,1.0
5,5,6,5,Did Colgate-Palmolive enter into PPA agreement...,"Scout Clean Energy, a Colorado-based renewable...",yes,https://solarindustrymag.com/scout-and-colgate...,99,2c10445abbf64fb300a2f97a24a5022a,0.25,0.0,0.0,0.2,0.1,0.0,0.0,1.0,1.0
6,6,7,6,What is the status of ZeroAvia's hydrogen fuel...,"In December, the US startup ZeroAvia announced...",ZeroAvia's hydrogen fuel cell electric aircraf...,https://cleantechnica.com/2023/01/02/the-wait-...,99,d560562b33ae4ee8a314b6cc1a045ed8,0.5,0.0,0.5,0.4,0.3,0.0,1.0,1.0,1.0
7,7,8,7,"What is the ""Danger Season""?",As spring turns to summer and the days warm up...,"The ""Danger Season"" is the period in the North...",https://cleantechnica.com/2023/05/15/what-does...,100,1dff007a49a8bef2920d0db78df34178,1.0,1.0,1.0,0.4,0.2,1.0,1.0,1.0,1.0
8,8,9,8,Is Mississipi an anti-ESG state?,Mississippi is among two dozen or so states in...,yes,https://cleantechnica.com/2023/05/15/mississip...,99,6b8fb712bcb9febe268e3679851e725e,1.0,1.0,1.0,0.6,0.3,1.0,1.0,1.0,1.0
9,9,10,9,Can you hang solar panels on garden fences?,Scaling down from the farm to the garden level...,yes,https://cleantechnica.com/2023/05/18/solar-pan...,99,f85ef61c7e872e59a17fd310ccc811fe,1.0,1.0,1.0,0.4,0.2,1.0,1.0,1.0,1.0
