In [1]:
import pandas as pd
from tqdm.auto import tqdm

from meal_mentor.ingest import ingest_data, load_index
from meal_mentor.ingest import load_index


In [2]:
def minsearch_search(query, boost=None):
    if boost is None:
        boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [3]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)


def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [4]:
def evaluate(ground_truth, search_function):
    relevance_total = []
    results_dict = {}
    for q in tqdm(ground_truth):
        try:
            doc_id = q['id']
            results = search_function(q)

            relevance = [str(d['id']) == str(doc_id) for d in results]

            results_dict[q['id']] = (q, results)
            relevance_total.append(relevance)
            # if all(not r for r in relevance):
            #     print("No relevant results found for query: ", q)
            # print("Results: ", results)
        except Exception as e:
            print(f"Error processing query: {q} with exeption: {e}")

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

## Evaluate retrieval using MinSearch and the ground truth data generated with gpt-4o-mini

In [5]:
df_question = pd.read_csv('../data/ground-truth-retrieval_4o_mini.csv')
df_question.head()

Unnamed: 0,id,question
0,0,Can you provide the nutritional breakdown for ...
1,0,What makes this Potato Latkes recipe suitable ...
2,0,Are there any specific kosher dietary guidelin...
3,0,How can I adjust the ingredients in the Potato...
4,0,What type of cuisine do the Potato Latkes belo...


In [6]:
ground_truth = df_question.to_dict(orient='records')
ground_truth[0]

{'id': 0,
 'question': 'Can you provide the nutritional breakdown for the Potato Latkes recipe, including protein, carbs, and fat content?'}

In [9]:
documents = ingest_data(file_path='../data/data.csv')
documents[0]

   id diet_type                                  recipe_name cuisine_type  \
0   0      dash  Potato Latkes Made Simple: A Twitter Recipe       kosher   
1   1      dash                             Avocado Dressing     american   
2   2      dash                                  Bread Salad     american   
3   3      dash                             Ultimat Sparkler        world   
4   4      dash                                  Yogurt Pops     american   

   protein(g)  carbs(g)  fat(g)  
0       31.55    110.84  118.28  
1        4.18     18.97  118.42  
2       44.09    153.84   86.03  
3        0.32     16.32    0.08  
4        9.07     34.05    7.90  


{'id': 0,
 'diet_type': 'dash',
 'recipe_name': 'Potato Latkes Made Simple: A Twitter Recipe',
 'cuisine_type': 'kosher',
 'protein(g)': 31.55,
 'carbs(g)': 110.84,
 'fat(g)': 118.28}

In [10]:
text_fields = [
    'id',
    'recipe_name',
    'cuisine_type',
    'diet_type',
    'protein(g)',
    'carbs(g)',
    'fat(g)',
]
keyword_fields = ['id']
index = load_index(
    documents=documents,
    text_fields=text_fields,
    keyword_fields=keyword_fields
)

In [11]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/1000 [00:00<?, ?it/s]

{'hit_rate': 0.898, 'mrr': 0.85794126984127}

Results: {'hit_rate': 0.898, 'mrr': 0.85794126984127}

## Evaluate retrieval using MinSearch and the ground truth data generated with gpt-4o

In [15]:
df_question = pd.read_csv('../data/ground-truth-retrieval_4o.csv')
df_question.head()

Unnamed: 0,id,question
0,0,What is the protein content in the Potato Latk...
1,0,Can you tell me how many grams of carbohydrate...
2,0,I need to know the fat content in the Potato L...
3,0,What type of cuisine is the Potato Latkes Made...
4,0,Is the Potato Latkes Made Simple recipe suitab...


In [16]:
ground_truth = df_question.to_dict(orient='records')
ground_truth[0]

{'id': 0,
 'question': 'What is the protein content in the Potato Latkes Made Simple recipe?'}

In [17]:
documents = ingest_data(file_path='../data/data.csv')
documents[0]

{'id': 0,
 'diet_type': 'dash',
 'recipe_name': 'Potato Latkes Made Simple: A Twitter Recipe',
 'cuisine_type': 'kosher',
 'protein(g)': 31.55,
 'carbs(g)': 110.84,
 'fat(g)': 118.28}

In [18]:
text_fields = [
    'id',
    'recipe_name',
    'cuisine_type',
    'diet_type',
    'protein(g)',
    'carbs(g)',
    'fat(g)',
]
keyword_fields = ['id']
index = load_index(
    documents=documents,
    text_fields=text_fields,
    keyword_fields=keyword_fields
)

In [19]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/1000 [00:00<?, ?it/s]

{'hit_rate': 0.947, 'mrr': 0.9229761904761906}

Results: {'hit_rate': 0.947, 'mrr': 0.9229761904761906}