
# Evaluation notebook

This notebook is used to evaluate the different RAG retrievers built for the Task1.

Metrics to evaluate:

Performance: (Score with GPT4)

Time of computation : (Miliseconds [ms])



In [None]:
#Install the required packages
! pip install langchain_community==0.0.13
! pip install sentence_transformers
! pip install torchvision
! pip install plotly
! pip install nbformat


In [8]:
#Required imports
from components.retrievers.Faiss_retriever import Faiss_retrieve
from components.retrievers.qdrant_retriever import Qdrant_retrieve
from components.retrievers.Weviate_retriever import Weviate_retrieve_keyword, Weviate_retrieve_vector, Weviate_retrieve_hybrid
from components.retrievers.Native_retriever import Native_retrieve
from components.retrievers.Native_retriever_Tfidf import Tfidf_retrieve
from components.retrievers.Native_retriever_BM25 import bm25_retrieve
from components.retrievers.Native_random import Random_retrieve
from components.retrievers.Azure_retriever import Azure_retrieve_vector, Azure_retrieve_keyword, Azure_retrieve_hybrid
from components.llm_call.gpt4_llm import call_llm_gpt4
import time
import pandas as pd
import gc
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
#GPU turn on if available
import torch
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
embeddings = SentenceTransformer('all-MiniLM-L6-v2', device=device)
directory_data = 'data/'   #Path to the pdf source
embeddings_Faiss = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

#Importing test list
list_queries = [ 'Hi, do you have cola?','Hi I want to have a Fire Zinger Stacker without sauce and a cola', 'Give me a Veggie Tender, medium, with salad',
'Give me an orange chocolate milkshake, medium', 'Give me the gluten free burger options', 'How many calories does the Colonel have?',
'Can I get a Whopper?' ]

#Prompt for evaluation
def build_prompt(question,  context):
    prompt = f""" 
    Your task is to determine if the given context contains the information about the products mentioned in the question.

    Question: {question}

    Context:  {context}

    
    Always output 1 or 0.
    1 = contains information about the products mentioned in the question
    0 = not contains information about the products mentioned in the question
    
    """
    return prompt

In [9]:
#create a df with the queries
df = pd.DataFrame(list_queries, columns = ['query'])
k = 3

#Functions to retrieve and time the retrieval for each method
def retrieve_and_time_Faiss(retriever,path_db, embeddings, k, query, path_to_json):
    start = time.time()
    context = retriever(path_db,embeddings, k, query, path_to_json)
    end = time.time()
    #Evaluting the context
    prompt = build_prompt(query, context)
    result = call_llm_gpt4(prompt)
    if result not in ['1', '0']:
        result = '0'
    result = int(result)
    #Cleaning cache
    torch.cuda.empty_cache()
    # then collect the garbage
    gc.collect()
    torch.mps.empty_cache()
    return context,  (end - start)*1000, result
def retrieve_and_time_vector(retriever, embeddings, k, query):
    start = time.time()
    context = retriever(embeddings, k, query)
    end = time.time()
    #Evaluting the context
    prompt = build_prompt(query, context)
    result = call_llm_gpt4(prompt)
    if result not in ['1', '0']:
        result = '0'
    result = int(result)
    #Cleaning cache
    torch.cuda.empty_cache()
    # then collect the garbage
    gc.collect()
    torch.mps.empty_cache()
    return context, (end - start)*1000, result
def retrieve_and_time_keyword(retriever, k, query):
    start = time.time()
    context = retriever( k, query)
    end = time.time()
    prompt = build_prompt(query, context)
    result = call_llm_gpt4(prompt)
    if result not in ['1', '0']:
        result = '0'
    result = int(result)
    #Cleaning cache
    torch.cuda.empty_cache()
    # then collect the garbage
    gc.collect()
    torch.mps.empty_cache()
    return context, (end - start)*1000, result



# Make retrievals and measure time (store in a DataFrame)
df[['Faiss_context', 'Faiss_time', 'Faiss_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_Faiss(Faiss_retrieve,'components/db_builds/db_Faiss', embeddings_Faiss, k, x, 'data/KFC_enriched.json'))
)

df[['Qdrant_context', 'Qdrant_time', 'Qdrant_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_keyword(Qdrant_retrieve, k, x))
)

df[['Weviate_vector_context', 'Weviate_vector_time', 'Weviate_vector_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_keyword(Weviate_retrieve_vector, k, x))
)

df[['Weviate_keyword_context', 'Weviate_keyword_time', 'Weviate_keyword_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_keyword(Weviate_retrieve_keyword, k, x))
)

df[['Weviate_hybrid_context', 'Weviate_hybrid_time', 'Weviate_hybrid_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_keyword(Weviate_retrieve_hybrid,k, x))
)

df[['Native_vector_context', 'Native_vector_time', 'Native_vector_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_vector(Native_retrieve,embeddings, k, x))
)

df[['Tfidf_context', 'Tfidf_time', 'Tfidf_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_keyword(Tfidf_retrieve, k, x))
)

df[['bm25_context', 'bm25_time', 'bm25_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_keyword(bm25_retrieve, k, x))
)

df[['Azure_vector_context', 'Azure_vector_time', 'Azure_vector_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_vector(Azure_retrieve_vector, embeddings, k, x))
)

df[['Azure_keyword_context', 'Azure_keyword_time', 'Azure_keyword_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_keyword(Azure_retrieve_keyword, k, x))
)

df[['Azure_hybrid_context', 'Azure_hybrid_time', 'Azure_hybrid_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_vector(Azure_retrieve_hybrid, embeddings, k, x))
)

df[['Random_context', 'Random_time', 'Random_result']] = df['query'].apply(
    lambda x: pd.Series(retrieve_and_time_keyword(Random_retrieve, k, x))
)



In [10]:
#Display the DF with the results
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
df.head(3)

Unnamed: 0,query,Faiss_context,Faiss_time,Faiss_result,Qdrant_context,Qdrant_time,Qdrant_result,Weviate_vector_context,Weviate_vector_time,Weviate_vector_result,Weviate_keyword_context,Weviate_keyword_time,Weviate_keyword_result,Weviate_hybrid_context,Weviate_hybrid_time,Weviate_hybrid_result,Native_vector_context,Native_vector_time,Native_vector_result,Tfidf_context,Tfidf_time,Tfidf_result,bm25_context,bm25_time,bm25_result,Azure_vector_context,Azure_vector_time,Azure_vector_result,Azure_keyword_context,Azure_keyword_time,Azure_keyword_result,Azure_hybrid_context,Azure_hybrid_time,Azure_hybrid_result,Random_context,Random_time,Random_result
0,"Hi, do you have cola?","{Match 0: product: pepsi, content: {'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}, category: drinks , Price: 2.8, Available: False, Keywords: {'keywords': 'soda, beverage, cola, carbonated drink, pepsi'} }, {Match 1: product: guava, content: {}, category: drinks , Price: 15, Available: False, Keywords: {'keywords': 'guava, fruit, drink, juice, tropical'} }, {Match 2: product: tea, content: {}, category: drinks , Price: 15, Available: False, Keywords: {'keywords': 'tea, drink, hot beverage, caffeine, herbal'} },",32.880783,1,"{Match 0: product: pepsi, content: {'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}, category: drinks, Price: 2.8, Available: False, Keywords: {'keywords': 'soda, beverage, cola, carbonated drink, pepsi'} }, {Match 1: product: sourcy, content: {}, category: drinks, Price: 15, Available: False, Keywords: {'keywords': 'water, drink, sourcy, mineral water, non-alcoholic'} }, {Match 2: product: coffee, content: {}, category: drinks, Price: 15, Available: False, Keywords: {'keywords': 'coffee, beverage, caffeine, espresso, hot drink'} },",444.898844,1,"{Match 0: {'available': False, 'category': 'drinks', 'content': ""{'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}"", 'item_id': 'd2', 'keywords': ""{'keywords': 'soda, beverage, cola, carbonated drink, pepsi'}"", 'price': '2.8', 'product': 'pepsi'} }, {Match 1: {'available': False, 'category': 'drinks', 'content': '{}', 'item_id': 'd12', 'keywords': ""{'keywords': 'soda, drink, citrus, refreshment, sisi'}"", 'price': '15', 'product': 'sisi'} }, {Match 2: {'available': False, 'category': 'drinks', 'content': '{}', 'item_id': 'd13', 'keywords': ""{'keywords': 'soda, drink, soft drink, fernandes, non-alcoholic'}"", 'price': '15', 'product': 'fernandes'} },",561.834097,1,"{Match 0: {'available': False, 'category': 'drinks', 'content': ""{'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}"", 'item_id': 'd2', 'keywords': ""{'keywords': 'soda, beverage, cola, carbonated drink, pepsi'}"", 'price': '2.8', 'product': 'pepsi'} },",27.16279,1,"{Match 0: {'available': False, 'category': 'drinks', 'content': ""{'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}"", 'item_id': 'd2', 'keywords': ""{'keywords': 'soda, beverage, cola, carbonated drink, pepsi'}"", 'price': '2.8', 'product': 'pepsi'} }, {Match 1: {'available': False, 'category': 'drinks', 'content': '{}', 'item_id': 'd12', 'keywords': ""{'keywords': 'soda, drink, citrus, refreshment, sisi'}"", 'price': '15', 'product': 'sisi'} }, {Match 2: {'available': False, 'category': 'drinks', 'content': '{}', 'item_id': 'd13', 'keywords': ""{'keywords': 'soda, drink, soft drink, fernandes, non-alcoholic'}"", 'price': '15', 'product': 'fernandes'} },",437.011003,1,"{Match 8: Product: pepsi; Content: {'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}; Category: drinks;, Price: 2.8 , Available: false, Keywords: {'keywords': 'soda, beverage, cola, carbonated drink, pepsi'} } {Match 13: Product: guava; Content: {}; Category: drinks;, Price: 15 , Available: false, Keywords: {'keywords': 'guava, fruit, drink, juice, tropical'} } {Match 16: Product: espresso; Content: {}; Category: drinks;, Price: 15 , Available: false, Keywords: {'keywords': 'coffee, hot drink, caffeine, espresso, beverage'} }",155.07412,1,"{Match 8: Product: pepsi; Content: {'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}; Category: drinks;, Price: 2.8 , Available: false, Keywords: {'keywords': 'soda, beverage, cola, carbonated drink, pepsi'} } {Match 30: Product: colonel stacker; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 66, 'allergens': ['']}; Category: burgers;, Price: 15 , Available: false, Keywords: {'keywords': 'chicken, food, burger, stacker, no allergens'} } {Match 32: Product: veggie tender; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 71, 'allergens': ['']}; Category: burgers;, Price: 15 , Available: false, Keywords: {'keywords': 'vegetarian, food, burger, plant-based, no meat'} }",36.714077,1,"{Match 1: Product: pepsi; Content: {'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}; Category: drinks; Keywords: {'keywords': 'soda, beverage, cola, carbonated drink, pepsi'}; Price: 2.8; Available: false; }, {Match 2: Product: family bucket; Content: [['hot wings', 12], ['original piece', 4], ['crispy tenders', 6], ['filet bites', 9], ['fries', 4], {'from': 'side dishes', 'choose': 4}]; Category: menus; Keywords: ; Price: 30; Available: true; }, {Match 3: Product: zinger burger; Content: {'kcal': 451, 'fat': 25, 'protein': 28, 'itemid': 1, 'allergens': ['wheat', 'soy']}; Category: burgers; Keywords: {'keywords': 'spicy, food, chicken, burger, zinger'}; Price: 4.5; Available: false; },",2.004862,1,"{Match 0: product: pepsi; content: {'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}; category: drinks, Price: 2.8, Available: false, Keywords: {'keywords': 'soda, beverage, cola, carbonated drink, pepsi'} },",561.373949,1,"{Match 0: product: pepsi; content: {'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}; category: drinks, Price: 2.8, Available: false, Keywords: {'keywords': 'soda, beverage, cola, carbonated drink, pepsi'} },",408.878088,1,"{Match 0: product: pepsi; content: {'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 9, 'allergens': ['dairy']}; category: drinks, Price: 2.8, Available: false, Keywords: {'keywords': 'soda, beverage, cola, carbonated drink, pepsi'} },",475.938797,1,"{Match 60: Product: variety box meal; Content: [['hot wings', 3], ['original piece', 1], ['crispy tenders', 2], {'from': 'drinks', 'size': 'l'}, {'from': 'side dishes', 'choose': 2}]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 86: Product: family bucket; Content: [['hot wings', 12], ['original piece', 4], ['crispy tenders', 6], ['filet bites', 9], ['fries', 4], {'from': 'side dishes', 'choose': 4}]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 44: Product: ice cream; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 95, 'allergens': ['']}; Category: desserts;, Price: 15 , Available: false, Keywords: {'keywords': 'sweet, dessert, cold, ice cream, treat'} }",36.532879,0
1,Hi I want to have a Fire Zinger Stacker without sauce and a cola,"{Match 0: product: fire zinger stacker meal, content: [['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}], category: menus , Price: 30, Available: True, Keywords: }, {Match 1: product: fire zinger stacker, content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 64, 'allergens': ['']}, category: burgers , Price: 15, Available: False, Keywords: {'keywords': 'spicy, food, chicken, burger, zinger'} }, {Match 2: product: fire zinger burger meal, content: [['fire zinger', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'm'}], category: menus , Price: 30, Available: True, Keywords: },",25.997162,1,"{Match 0: product: fire zinger stacker meal, content: [['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}], category: menus, Price: 30, Available: True, Keywords: }, {Match 1: product: fire zinger burger meal, content: [['fire zinger', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'm'}], category: menus, Price: 30, Available: True, Keywords: }, {Match 2: product: fire zinger stacker, content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 64, 'allergens': ['']}, category: burgers, Price: 15, Available: False, Keywords: {'keywords': 'spicy, food, chicken, burger, zinger'} },",116.695881,1,"{Match 0: {'available': True, 'category': 'menus', 'content': ""[['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]"", 'item_id': 'm43', 'keywords': 'zinger, meal, side dish, large drink, fire stacker', 'price': '30', 'product': 'fire zinger stacker meal'} }, {Match 1: {'available': False, 'category': 'burgers', 'content': ""{'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 64, 'allergens': ['']}"", 'item_id': 'b8', 'keywords': ""{'keywords': 'spicy, food, chicken, burger, zinger'}"", 'price': '15', 'product': 'fire zinger stacker'} }, {Match 2: {'available': True, 'category': 'menus', 'content': ""[['fire zinger', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'm'}]"", 'item_id': 'm4', 'keywords': 'burger, meal, side dish, drink, fire zinger', 'price': '30', 'product': 'fire zinger burger meal'} },",389.599085,1,"{Match 0: {'available': True, 'category': 'menus', 'content': ""[['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]"", 'item_id': 'm43', 'keywords': 'zinger, meal, side dish, large drink, fire stacker', 'price': '30', 'product': 'fire zinger stacker meal'} }, {Match 1: {'available': False, 'category': 'sauces', 'content': '{}', 'item_id': 's1', 'keywords': ""{'keywords': 'apple, condiment, sauce, dessert, apple sauce'}"", 'price': '15', 'product': 'apple sauce'} }, {Match 2: {'available': True, 'category': 'menus', 'content': ""[['fire zinger', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'm'}]"", 'item_id': 'm4', 'keywords': 'burger, meal, side dish, drink, fire zinger', 'price': '30', 'product': 'fire zinger burger meal'} },",22.738934,1,"{Match 0: {'available': False, 'category': 'burgers', 'content': ""{'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 64, 'allergens': ['']}"", 'item_id': 'b8', 'keywords': ""{'keywords': 'spicy, food, chicken, burger, zinger'}"", 'price': '15', 'product': 'fire zinger stacker'} }, {Match 1: {'available': False, 'category': 'burgers', 'content': ""{'kcal': 100, 'fat': 100, 'protein': 10, 'itemid': 65, 'allergens': ['']}"", 'item_id': 'b9', 'keywords': ""{'keywords': 'spicy, food, chicken, burger, zinger'}"", 'price': '15', 'product': 'fire zinger'} }, {Match 2: {'available': True, 'category': 'menus', 'content': ""[['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]"", 'item_id': 'm43', 'keywords': 'zinger, meal, side dish, large drink, fire stacker', 'price': '30', 'product': 'fire zinger stacker meal'} },",388.784885,1,"{Match 89: Product: fire zinger stacker meal; Content: [['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 28: Product: fire zinger stacker; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 64, 'allergens': ['']}; Category: burgers;, Price: 15 , Available: false, Keywords: {'keywords': 'spicy, food, chicken, burger, zinger'} } {Match 50: Product: fire zinger burger meal; Content: [['fire zinger', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'm'}]; Category: menus;, Price: 30 , Available: true, Keywords: }",49.728155,1,"{Match 89: Product: fire zinger stacker meal; Content: [['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 38: Product: apple sauce; Content: {}; Category: sauces;, Price: 15 , Available: false, Keywords: {'keywords': 'apple, condiment, sauce, dessert, apple sauce'} } {Match 28: Product: fire zinger stacker; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 64, 'allergens': ['']}; Category: burgers;, Price: 15 , Available: false, Keywords: {'keywords': 'spicy, food, chicken, burger, zinger'} }",37.504911,1,"{Match 1: Product: fire zinger stacker meal; Content: [['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; Category: menus; Keywords: ; Price: 30; Available: true; }, {Match 2: Product: fire zinger stacker; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 64, 'allergens': ['']}; Category: burgers; Keywords: {'keywords': 'spicy, food, chicken, burger, zinger'}; Price: 15; Available: false; }, {Match 3: Product: apple sauce; Content: {}; Category: sauces; Keywords: {'keywords': 'apple, condiment, sauce, dessert, apple sauce'}; Price: 15; Available: false; },",0.658274,1,"{Match 0: product: fire zinger stacker meal; content: [['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; category: menus, Price: 30, Available: true, Keywords: },",483.728886,1,"{Match 0: product: apple sauce; content: {}; category: sauces, Price: 15, Available: false, Keywords: {'keywords': 'apple, condiment, sauce, dessert, apple sauce'} }, {Match 1: product: fire zinger stacker; content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 64, 'allergens': ['']}; category: burgers, Price: 15, Available: false, Keywords: {'keywords': 'spicy, food, chicken, burger, zinger'} }, {Match 2: product: fire zinger stacker meal; content: [['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; category: menus, Price: 30, Available: true, Keywords: },",436.015844,1,"{Match 0: product: fire zinger stacker meal; content: [['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; category: menus, Price: 30, Available: true, Keywords: }, {Match 1: product: apple sauce; content: {}; category: sauces, Price: 15, Available: false, Keywords: {'keywords': 'apple, condiment, sauce, dessert, apple sauce'} }, {Match 2: product: fire zinger stacker; content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 64, 'allergens': ['']}; category: burgers, Price: 15, Available: false, Keywords: {'keywords': 'spicy, food, chicken, burger, zinger'} },",448.989868,1,"{Match 33: Product: filet bites; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 68, 'allergens': ['']}; Category: burgers;, Price: 15 , Available: false, Keywords: {'keywords': 'beef, food, burger, meat, bite-sized'} } {Match 89: Product: fire zinger stacker meal; Content: [['fire zinger stacker', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 64: Product: twister meal; Content: [['crispy tenders', 2], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; Category: menus;, Price: 30 , Available: true, Keywords: }",36.245108,1
2,"Give me a Veggie Tender, medium, with salad","{Match 0: product: snack box 2 veggie tenders, content: [['veggie tenders', 2], ['fries', 1]], category: menus , Price: 30, Available: True, Keywords: }, {Match 1: product: veggie tenders, content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 78, 'allergens': ['']}, category: vegetarian , Price: 15, Available: False, Keywords: {'keywords': 'vegetable, food, vegan, tenders, vegetarian'} }, {Match 2: product: veggie tender, content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 71, 'allergens': ['']}, category: burgers , Price: 15, Available: False, Keywords: {'keywords': 'vegetarian, food, burger, plant-based, no meat'} },",14.442921,1,"{Match 0: product: veggie tenders, content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 78, 'allergens': ['']}, category: vegetarian, Price: 15, Available: False, Keywords: {'keywords': 'vegetable, food, vegan, tenders, vegetarian'} }, {Match 1: product: 4 veggie tender meal, content: [['veggie tenders', 4], {'from': 'drinks', 'size': 'l'}, {'from': 'side dishes', 'choose': 1}], category: menus, Price: 30, Available: True, Keywords: }, {Match 2: product: snack box 2 veggie tenders, content: [['veggie tenders', 2], ['fries', 1]], category: menus, Price: 30, Available: True, Keywords: },",112.814665,1,"{Match 0: {'available': True, 'category': 'menus', 'content': ""[['veggie tenders', 5], {'from': 'side dishes', 'choose': 2}, {'from': 'drinks', 'size': 'l'}]"", 'item_id': 'm19', 'keywords': 'veggie, meal, tenders, side dishes, large drink', 'price': '30', 'product': 'veggie box meal'} }, {Match 1: {'available': True, 'category': 'menus', 'content': ""[['veggie tenders', 4], {'from': 'drinks', 'size': 'l'}, {'from': 'side dishes', 'choose': 1}]"", 'item_id': 'm20', 'keywords': 'veggie, meal, drink, side dish, tenders', 'price': '30', 'product': '4 veggie tender meal'} }, {Match 2: {'available': True, 'category': 'menus', 'content': ""[['veggie tenders', 2], {'from': 'drinks', 'size': 'l'}, {'from': 'sauces', 'choose': 1}, {'from': 'side dishes', 'choose': 1}]"", 'item_id': 'm37', 'keywords': 'veggie, kids meal, healthy, tenders, side dish', 'price': '30', 'product': 'veggie kids meal'} },",448.349953,1,"{Match 0: {'available': True, 'category': 'menus', 'content': ""[['hot wings', 6], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]"", 'item_id': 'm15', 'keywords': 'veggie, meal, hot wings, side dishes, large drink', 'price': '30', 'product': '4 veggie tender meal'} }, {Match 1: {'available': False, 'category': 'burgers', 'content': ""{'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 71, 'allergens': ['']}"", 'item_id': 'b12', 'keywords': ""{'keywords': 'vegetarian, food, burger, plant-based, no meat'}"", 'price': '15', 'product': 'veggie tender'} }, {Match 2: {'available': False, 'category': 'side dishes', 'content': ""{'kcal': 170, 'fat': 10, 'protein': 2, 'itemid': 7, 'allergens': ['dairy']}"", 'item_id': 'sd1', 'keywords': ""{'keywords': 'cabbage, salad, side dish, coleslaw, vegetable'}"", 'price': '1.7', 'product': 'coleslaw'} },",27.770042,1,"{Match 0: {'available': True, 'category': 'menus', 'content': ""[['veggie tenders', 4], {'from': 'drinks', 'size': 'l'}, {'from': 'side dishes', 'choose': 1}]"", 'item_id': 'm20', 'keywords': 'veggie, meal, drink, side dish, tenders', 'price': '30', 'product': '4 veggie tender meal'} }, {Match 1: {'available': False, 'category': 'burgers', 'content': ""{'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 71, 'allergens': ['']}"", 'item_id': 'b12', 'keywords': ""{'keywords': 'vegetarian, food, burger, plant-based, no meat'}"", 'price': '15', 'product': 'veggie tender'} }, {Match 2: {'available': True, 'category': 'menus', 'content': ""[['veggie tenders', 5], {'from': 'side dishes', 'choose': 2}, {'from': 'drinks', 'size': 'l'}]"", 'item_id': 'm19', 'keywords': 'veggie, meal, tenders, side dishes, large drink', 'price': '30', 'product': 'veggie box meal'} },",341.645002,1,"{Match 46: Product: veggie tenders; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 78, 'allergens': ['']}; Category: vegetarian;, Price: 15 , Available: false, Keywords: {'keywords': 'vegetable, food, vegan, tenders, vegetarian'} } {Match 78: Product: snack box 2 veggie tenders; Content: [['veggie tenders', 2], ['fries', 1]]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 32: Product: veggie tender; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 71, 'allergens': ['']}; Category: burgers;, Price: 15 , Available: false, Keywords: {'keywords': 'vegetarian, food, burger, plant-based, no meat'} }",49.252987,1,"{Match 66: Product: 4 veggie tender meal; Content: [['veggie tenders', 4], {'from': 'drinks', 'size': 'l'}, {'from': 'side dishes', 'choose': 1}]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 74: Product: veggie boxmeal; Content: [['veggie tender', 5], ['pepsi', 1], {'from': 'side dishes', 'choose': 2}, {'from': 'drinks', 'size': 'l'}]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 61: Product: 4 veggie tender meal; Content: [['hot wings', 6], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; Category: menus;, Price: 30 , Available: true, Keywords: }",36.555052,1,"{Match 1: Product: veggie tender; Content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 71, 'allergens': ['']}; Category: burgers; Keywords: {'keywords': 'vegetarian, food, burger, plant-based, no meat'}; Price: 15; Available: false; }, {Match 2: Product: 4 veggie tender meal; Content: [['veggie tenders', 4], {'from': 'drinks', 'size': 'l'}, {'from': 'side dishes', 'choose': 1}]; Category: menus; Keywords: ; Price: 30; Available: true; }, {Match 3: Product: 4 veggie tender meal; Content: [['hot wings', 6], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; Category: menus; Keywords: ; Price: 30; Available: true; },",0.597,1,"{Match 0: product: veggie tenders; content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 78, 'allergens': ['']}; category: vegetarian, Price: 15, Available: false, Keywords: {'keywords': 'vegetable, food, vegan, tenders, vegetarian'} },",445.387602,1,"{Match 0: product: tender chicken; content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 81, 'allergens': ['']}; category: chicken, Price: 15, Available: false, Keywords: {'keywords': 'poultry, food, chicken, tender, no allergens'} }, {Match 1: product: veggie; content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 72, 'allergens': ['']}; category: vegetarian, Price: 15, Available: false, Keywords: {'keywords': 'vegetable, healthy, vegan, plant-based, veggie'} }, {Match 2: product: 4 veggie tender meal; content: [['veggie tenders', 4], {'from': 'drinks', 'size': 'l'}, {'from': 'side dishes', 'choose': 1}]; category: menus, Price: 30, Available: true, Keywords: },",416.768074,1,"{Match 0: product: veggie tenders; content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 78, 'allergens': ['']}; category: vegetarian, Price: 15, Available: false, Keywords: {'keywords': 'vegetable, food, vegan, tenders, vegetarian'} }, {Match 1: product: tender chicken; content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 81, 'allergens': ['']}; category: chicken, Price: 15, Available: false, Keywords: {'keywords': 'poultry, food, chicken, tender, no allergens'} }, {Match 2: product: veggie; content: {'kcal': 150, 'fat': 100, 'protein': 10, 'itemid': 72, 'allergens': ['']}; category: vegetarian, Price: 15, Available: false, Keywords: {'keywords': 'vegetable, healthy, vegan, plant-based, veggie'} },",456.149101,1,"{Match 50: Product: fire zinger burger meal; Content: [['fire zinger', 1], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'm'}]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 61: Product: 4 veggie tender meal; Content: [['hot wings', 6], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; Category: menus;, Price: 30 , Available: true, Keywords: } {Match 88: Product: veggie kids meal; Content: [['veggie tenders', 2], {'from': 'side dishes', 'choose': 1}, {'from': 'drinks', 'size': 'l'}]; Category: menus;, Price: 30 , Available: true, Keywords: }",35.035133,1


In [11]:
# Print the results
print('Faiss_retrieve_vector ', 'Avg Time: ' , df['Faiss_time'].mean(), 'Avg Result: ', df['Faiss_result'].mean())
print('Qdrant_retrieve_vector ', 'Avg Time: ' , df['Qdrant_time'].mean(), 'Avg Result: ', df['Qdrant_result'].mean())
print('Weviate_retrieve_vector ', 'Avg Time: ' , df['Weviate_vector_time'].mean(), 'Avg Result: ', df['Weviate_vector_result'].mean())
print('Weviate_retrieve_keyword ', 'Avg Time: ' , df['Weviate_keyword_time'].mean(), 'Avg Result: ', df['Weviate_keyword_result'].mean())
print('Weviate_retrieve_hybrid ', 'Avg Time: ' , df['Weviate_hybrid_time'].mean(), 'Avg Result: ', df['Weviate_hybrid_result'].mean())
print('Native_retrieve_vector ', 'Avg Time: ' , df['Native_vector_time'].mean(), 'Avg Result: ', df['Native_vector_result'].mean())
print('Tfidf_retrieve_keyword ', 'Avg Time: ' , df['Tfidf_time'].mean(), 'Avg Result: ', df['Tfidf_result'].mean())
print('bm25_retrieve_keyword ', 'Avg Time: ' , df['bm25_time'].mean(), 'Avg Result: ', df['bm25_result'].mean())
print('Azure_retrieve_vector ', 'Avg Time: ' , df['Azure_vector_time'].mean(), 'Avg Result: ', df['Azure_vector_result'].mean())
print('Azure_retrieve_keyword ', 'Avg Time: ' , df['Azure_keyword_time'].mean(), 'Avg Result: ', df['Azure_keyword_result'].mean())
print('Azure_retrieve_hybrid ', 'Avg Time: ' , df['Azure_hybrid_time'].mean(), 'Avg Result: ', df['Azure_hybrid_result'].mean())
print('Random_retrieve ', 'Avg Time: ' , df['Random_time'].mean(), 'Avg Result: ', df['Random_result'].mean())



Faiss_retrieve_vector  Avg Time:  17.373289380754745 Avg Result:  0.5714285714285714
Qdrant_retrieve_vector  Avg Time:  165.34764426095145 Avg Result:  0.7142857142857143
Weviate_retrieve_vector  Avg Time:  417.3260075705392 Avg Result:  0.5714285714285714
Weviate_retrieve_keyword  Avg Time:  27.139527457101003 Avg Result:  0.5714285714285714
Weviate_retrieve_hybrid  Avg Time:  387.44265692574635 Avg Result:  0.5714285714285714
Native_retrieve_vector  Avg Time:  64.38820702689034 Avg Result:  0.5714285714285714
Tfidf_retrieve_keyword  Avg Time:  38.03965023585728 Avg Result:  0.5714285714285714
bm25_retrieve_keyword  Avg Time:  0.7999965122767857 Avg Result:  0.5714285714285714
Azure_retrieve_vector  Avg Time:  468.494176864624 Avg Result:  0.5714285714285714
Azure_retrieve_keyword  Avg Time:  427.03962326049805 Avg Result:  0.5714285714285714
Azure_retrieve_hybrid  Avg Time:  457.8538281576974 Avg Result:  0.5714285714285714
Random_retrieve  Avg Time:  36.07174328395298 Avg Result:  0

In [12]:
#Visualize the results
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

avg_df = pd.DataFrame({
    'Method': ['Faiss', 'Qdrant', 'Weviate_vector', 'Weviate_keyword', 'Weviate_hybrid',
               'Native_vector', 'Tfidf_keyword', 'bm25_keyword', 'Azure_vector', 'Azure_keyword', 'Azure_hybrid', 'Random (Reference)'],
    'Avg_Time': [df['Faiss_time'].mean(), df['Qdrant_time'].mean(), df['Weviate_vector_time'].mean(),
                 df['Weviate_keyword_time'].mean(), df['Weviate_hybrid_time'].mean(), df['Native_vector_time'].mean(),
                 df['Tfidf_time'].mean(), df['bm25_time'].mean(), df['Azure_vector_time'].mean(),
                 df['Azure_keyword_time'].mean(), df['Azure_hybrid_time'].mean(), df['Random_time'].mean()],
    'Avg_Result': [df['Faiss_result'].mean(), df['Qdrant_result'].mean(), df['Weviate_vector_result'].mean(),
                   df['Weviate_keyword_result'].mean(), df['Weviate_hybrid_result'].mean(), df['Native_vector_result'].mean(),
                   df['Tfidf_result'].mean(), df['bm25_result'].mean(), df['Azure_vector_result'].mean(),
                   df['Azure_keyword_result'].mean(), df['Azure_hybrid_result'].mean(), df['Random_result'].mean()],
    'Method_Type': ['Vector', 'Vector', 'Vector', 'Keyword', 'Hybrid', 'Vector', 'Keyword', 'Keyword', 'Vector', 'Keyword', 'Hybrid', 'Random']
})

# Plot using Plotly Express
fig = px.scatter(avg_df, x='Avg_Time', y='Avg_Result', color='Method_Type', symbol='Method',
                 labels={'Avg_Time': 'Average Time (milliseconds)', 'Avg_Result': 'Average Result'},
                 title='Average Time vs Average Result by Method Type')

# Add red vertical dashed line at 50 milliseconds
fig.add_shape(
    go.layout.Shape(
        type="line",
        x0=50,
        x1=50,
        y0=avg_df['Avg_Result'].min() - 0.1, 
        y1=avg_df['Avg_Result'].max() + 0.1,  
        line=dict(color="red", dash="dash")
    )
)
# Show the plot
fig.show()


# Conclusions


- Local implementation of BM25 (Keyword search) is the fastest retrieval method with an average proccessing time of 0.79 ms, while performance is acceptable compared to other retrieval methods. There are approaches to host .pkl files to scale this approach if required.

- Weaviate Keyword search is the cloud based fastest retriever with an average of 27.14 ms for retrieval, this approach can be scaled up easier and have an acceptable performance

- Faiss and my native implementation of vector search using Sentence Transformers are the fastest vector based retrievals, however performance for this use case is the same for keyword and vector based approach

- The preliminary dataset enrichment with keywords and product dictionaries was a key factor to equal the performance between keyword search and vector search

# Next steps:

- Validate the performance of the selected tool with real human labeling and stake-holders


