In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
!pip install -U sentence-transformers

from sentence_transformers import SentenceTransformer, util

Collecting sentence-transformers
  Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)
Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.5/171.5 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-2.7.0


In [2]:
!pip install unidecode

Collecting unidecode
  Downloading Unidecode-1.3.8-py3-none-any.whl.metadata (13 kB)
Downloading Unidecode-1.3.8-py3-none-any.whl (235 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.3.8


In [4]:
!pip install nltk

Collecting nltk
  Downloading nltk-3.8.1-py3-none-any.whl.metadata (2.8 kB)
Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: nltk
Successfully installed nltk-3.8.1


In [5]:
import re
import ast
import numpy as np
import pandas as pd
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import unidecode
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity

import nltk
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to /home/jupyter/nltk_data...
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/jupyter/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

# Recipe parsing code

In [None]:
class RecipeCSVParser:
    def __init__(self, csv_f_path):
        # Class initializer takes in path to csv file

        # Load the model once when the instance of the class is created
        self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        self.lemmatizer = WordNetLemmatizer()
        self.stop_words = set(stopwords.words('english'))

        self.df = pd.read_csv(csv_f_path)

        self.measures_and_words_to_remove = [
        'teaspoon', 't', 'tsp.', 'tablespoon', 'T', 'tsp', 'tbsp', 'cut', 'rinsed', 'drained',
        'skinned', 'thin', 'g', 'good', 'quality', 'sturdy', 'warm', 'cold', 'hot', 'grated',
        'cup', 'cups', 'ounce', 'ounces', 'oz', 'pound', 'pounds', 'lb', 'lbs', 'kg', 'mg',
        'liter', 'liters', 'milliliter', 'milliliters', 'quart', 'quarts', 'gallon', 'gallons',
        'pinch', 'pinches', 'dash', 'dashes', 'slice', 'slices', 'gram', 'grams', 'large', 'small',
        'medium', 'whole', 'chopped', 'sliced', 'diced', 'fresh', 'dried', 'ground', 'crushed',
        'minced', 'optional', 'finely', 'coarsely', 'about', 'approximately', 'more', 'less', 'divided',
        'for', 'serving', 'freshly', 'to taste', 'needed', 'packed', 'sifted', 'level', 'heaping',
        'heaped', 'rounded', 'sprig', 'sprigs', 'stalk', 'stalks', 'bunch', 'bunches', 'piece', 'pieces',
        'clove', 'cloves', 'leaf', 'leaves', 'and', 'or', 'with', 'without', 'if', 'then', 'else',
        'when', 'plus', 'minus', 'new', 'inch', 'inches', 'ft', 'in', 'in.', 'feet', 'fl', 'ml', 'thinly', 'l', 'liters', 'liter',
        'halved', 'lengthwise', 'separated', 'serve', 'steamed', 'washed', 'cooked', 'boiled', 'tablespoon', 'uncooked', 'removed', 'shredded', 'frozen', 'garnish', 'bit',
        'crosswise', 'strip', 'thin', 'thinly', 'skinless'
        ]


    def ingredient_parser(self, ingredients):
       # Function to parse ingredients from original .csv file

        if isinstance(ingredients, str):
            ingredients = ast.literal_eval(ingredients)

        ingred_list = []
        for i in ingredients:
            i = i.lower()
            i = re.sub(r'[\.,;:\-\–\—\!\?]', '', i)
            words = i.split()
            words = [word for word in words if word not in self.measures_and_words_to_remove and word not in self.stop_words]
            words = [unidecode.unidecode(self.lemmatizer.lemmatize(word)) for word in words if word.isalpha()]
            if words:
                ingred_list.append(' '.join(words))

        return ', '.join(ingred_list)

    def create_ingredients_sentence(self, ingredients):

      # Function to create sentence from input ingredients string

        if ingredients == '' or pd.isna(ingredients):
            return None
        sorted_ingredients = sorted(ingredients.split(', '))
        sorted_ingredients_sentence = ", ".join(sorted_ingredients)
        return f"A recipe containing {sorted_ingredients_sentence}"

    def embed_sentences(self, sentences):
      embedding = self.model.encode(sentences, show_progress_bar=True)
      return embedding


In [None]:
csv_f_path = '/content/drive/MyDrive/Applied CV Project/Recipes/Epicurious_Processed_Recipes_With_Embedding_sentence_alphabetical.csv'
recipeCSVParser = RecipeCSVParser(csv_f_path)

In [None]:
recipeCSVParser.df['Cleaned_Ingredients'] = recipeCSVParser.df['Ingredients'].apply(recipeCSVParser.ingredient_parser)

In [None]:
recipeCSVParser.df['ingredients_sentence'] = recipeCSVParser.df['Cleaned_Ingredients'].apply(recipeCSVParser.create_ingredients_sentence)

In [None]:
embeddings = recipeCSVParser.embed_sentences(recipeCSVParser.df['ingredients_sentence'].values)

Batches:   0%|          | 0/422 [00:00<?, ?it/s]

In [None]:
recipeCSVParser.df['embedding_sentence'] = list(zip(embeddings))
recipeCSVParser.df['embedding_sentence'] = recipeCSVParser.df['embedding_sentence'].apply(lambda x: x[0])

In [None]:
recipeCSVParser.df.drop('embedding',axis=1)

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients,ingredients_sentence,embedding_sentence
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,"chicken, kosher salt, acorn squash, sage, rose...","A recipe containing acorn squash, allpurpose f...","[-0.017373329, 0.030207174, 0.025743747, 0.049..."
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,"egg white, potato, teaspoon kosher salt, black...","A recipe containing black pepper, egg white, p...","[-0.09964901, -0.011831253, -0.020506151, 0.00..."
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,"evaporated milk, milk, garlic powder, onion po...","A recipe containing black pepper, elbow macaro...","[-0.08682483, 0.005830869, 0.021211958, 0.0320..."
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,"round italian loaf cube, tablespoon olive oil,...","A recipe containing celery rib, egg lightly be...","[-0.05839272, 0.008410925, 0.061784226, 0.0176..."
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,"dark brown sugar, water, bourbon, lemon juice,...","A recipe containing bourbon, dark brown sugar,...","[-0.07590774, -0.0077568544, 0.059338614, 0.05..."
...,...,...,...,...,...,...,...,...
13496,13496,Brownie Pudding Cake,"['1 cup all-purpose flour', '2/3 cup unsweeten...",Preheat the oven to 350°F. Into a bowl sift to...,brownie-pudding-cake-14408,"allpurpose flour, unsweetened cocoa powder, do...",A recipe containing accompaniment coffee ice c...,"[-0.013627266, -0.055409785, 0.10973016, 0.097..."
13497,13497,Israeli Couscous with Roasted Butternut Squash...,"['1 preserved lemon', '1 1/2 pound butternut s...",Preheat oven to 475°F.\nHalve lemons and scoop...,israeli-couscous-with-roasted-butternut-squash...,"preserved lemon, butternut squash peeled seede...",A recipe containing butternut squash peeled se...,"[-0.001726016, 0.009676224, 0.013495842, 0.059..."
13498,13498,Rice with Soy-Glazed Bonito Flakes and Sesame ...,['Leftover katsuo bushi (dried bonito flakes) ...,"If using katsuo bushi flakes from package, moi...",rice-with-soy-glazed-bonito-flakes-and-sesame-...,leftover katsuo bushi bonito making dashi kats...,"A recipe containing japanese shortgrain rice, ...","[-0.08239394, 0.040297136, 0.061719555, 0.0388..."
13499,13499,Spanakopita,['1 stick (1/2 cup) plus 1 tablespoon unsalted...,Melt 1 tablespoon butter in a 12-inch heavy sk...,spanakopita-107344,"stick unsalted butter, baby spinach, feta crum...","A recipe containing baby spinach, feta crumble...","[-0.056342967, -0.036117602, -0.015733968, 0.0..."


In [None]:
recipeCSVParser.df.to_csv('/content/drive/MyDrive/Applied CV Project/Recipes/Epicurious_Processed_Recipes_With_Embedding_alphabetical.csv', index=False)

# Recipe Matching Code

In [6]:
class RecipeSearcher:
  """
    A class for searching recipes based on ingredient similarity and brute force matching.

    This class loads a pre-trained sentence transformer model and a CSV file containing recipes.
    It provides methods for searching recipes that best match a given set of ingredients
    using both cosine similarity and brute force matching.

    Attributes:
        model (SentenceTransformer): Pre-trained model for embedding sentences.
        lemmatizer (WordNetLemmatizer): Lemmatizer for processing text.
        stop_words (set): Set of stop words to be ignored.
        df (DataFrame): DataFrame containing recipe data.
  """

  def __init__(self, csv_f_path:str):
    """
    Initialize the RecipeSearcher class by loading the sentence transformer model,
    lemmatizer, stop words, and the CSV file containing recipe data.

    Args:
        csv_f_path (str): Path to the CSV file containing recipe data.
    """

    # Load the model once when the instance of the class is created
    self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    self.lemmatizer = WordNetLemmatizer()
    self.stop_words = set(stopwords.words('english'))

    self.df = pd.read_csv(csv_f_path)

  def create_ingredients_sentence(self, ingredients:str) -> str:
    """
    Create a descriptive sentence from a list of ingredients.
    This method sorts the given ingredients and formats them into a single sentence.

    Args: ingredients (str): Comma-separated string of ingredients.

    Returns: str: Formatted sentence describing the ingredients.
    """

    if ingredients == '' or pd.isna(ingredients):
      return None
    sorted_ingredients = sorted(ingredients.split(', '))
    sorted_ingredients_sentence = ", ".join(sorted_ingredients)
    return f"A recipe containing {sorted_ingredients_sentence}"

  def recipe_search(self, query:str, N:int):
    """
    Hybrid recipe matching algorithm
    1) Sentence similarity
    2) Brute force matching

    This method takes advantage of sentence similarity to downscope results
    before performing more effective brute force matching.

    Returned results are based on average of 1) cosine similarity score and 2) number of matches


    Args:
      query (str): A sentence describing the query ingredients.
      N (int): The number of top results to return.

    Returns:
      DataFrame: A DataFrame containing the top N matching recipes.
    """

    query_embedding = self.model.encode(query)

    df = self.df.copy()


    df['embedding_sentence'] = df['embedding_sentence'].apply(lambda x: np.fromstring(x.strip('[]'), sep=' '))
    df_embeddings = np.vstack(df['embedding_sentence'].values)

    cos_similarities = cosine_similarity(df_embeddings, query_embedding.reshape(1, -1)).flatten() # compute cosein similarity

    df['cos_similarity'] = cos_similarities.astype(float)

    sorted_indices = np.argsort(cos_similarities)[::-1]

    top_N_indices = sorted_indices[:20 * N]
    top_recipes_df = df.iloc[top_N_indices]

    # Brute force matching
    top_recipes_df['overlap_score'] = top_recipes_df['ingredients_sentence'].apply(
        lambda x: self.calculate_overlap(x, query))

    top_recipes_df = top_recipes_df.sort_values(by='overlap_score', ascending=False)

    top_recipes_df['final_score'] = (top_recipes_df['cos_similarity'] + top_recipes_df['overlap_score']) / 2

    top_recipes_df = top_recipes_df.sort_values(by='final_score', ascending=False)

    return top_recipes_df.iloc[:N]

  def calculate_overlap(self, recipe_ingredients: str, query_ingredients: str) -> int:
    """
    Manual matching

    Args:
      recipe_ingredients (str): Comma-separated string of recipe ingredients.
      query_ingredients (str): Comma-separated string of query ingredients.

    Returns:
      int: The overlap score between the recipe and the query.
    """
    recipe_ingredients_list = [ingredient.lower() for ingredient in recipe_ingredients.split(", ")]
    query_ingredients_list = [ingredient.lower() for ingredient in query_ingredients.split(", ")]
    overlap_score = 0
    for query_ingredient in query_ingredients_list:
        for recipe_ingredient in recipe_ingredients_list:
            if query_ingredient in recipe_ingredient:
                overlap_score += 1
                break
    return overlap_score


In [8]:
!pwd

/home/jupyter/Recipes and Matching


In [9]:
csv_f_path = 'Epicurious_Processed_Recipes_With_Embedding_alphabetical.csv'
recipeMatcher = RecipeSearcher(csv_f_path)

In [10]:
recipeMatcher.df

Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,embedding,Cleaned_Ingredients,ingredients_sentence,embedding_sentence
0,0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,[-2.10687723e-02 1.19345924e-02 1.94131434e-...,"chicken, kosher salt, acorn squash, sage, rose...","A recipe containing acorn squash, allpurpose f...",[-1.73733290e-02 3.02071739e-02 2.57437471e-...
1,1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,[-3.01422533e-02 -3.48264985e-02 -2.97063421e-...,"egg white, potato, teaspoon kosher salt, black...","A recipe containing black pepper, egg white, p...",[-9.96490121e-02 -1.18312528e-02 -2.05061510e-...
2,2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,[-4.03071009e-02 -1.87259371e-04 4.95206825e-...,"evaporated milk, milk, garlic powder, onion po...","A recipe containing black pepper, elbow macaro...",[-8.68248269e-02 5.83086908e-03 2.12119576e-...
3,3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,[-3.84613238e-02 7.89814219e-02 5.50255477e-...,"round italian loaf cube, tablespoon olive oil,...","A recipe containing celery rib, egg lightly be...",[-5.83927184e-02 8.41092505e-03 6.17842264e-...
4,4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,[-4.46295664e-02 -2.29880735e-02 3.37989144e-...,"dark brown sugar, water, bourbon, lemon juice,...","A recipe containing bourbon, dark brown sugar,...",[-7.59077370e-02 -7.75685441e-03 5.93386143e-...
...,...,...,...,...,...,...,...,...,...
13496,13496,Brownie Pudding Cake,"['1 cup all-purpose flour', '2/3 cup unsweeten...",Preheat the oven to 350°F. Into a bowl sift to...,brownie-pudding-cake-14408,[ 1.40010496e-03 -6.29682243e-02 7.26151466e-...,"allpurpose flour, unsweetened cocoa powder, do...",A recipe containing accompaniment coffee ice c...,[-1.36272656e-02 -5.54097854e-02 1.09730162e-...
13497,13497,Israeli Couscous with Roasted Butternut Squash...,"['1 preserved lemon', '1 1/2 pound butternut s...",Preheat oven to 475°F.\nHalve lemons and scoop...,israeli-couscous-with-roasted-butternut-squash...,[ 5.01855602e-03 -5.16437460e-04 2.73376834e-...,"preserved lemon, butternut squash peeled seede...",A recipe containing butternut squash peeled se...,[-1.72601605e-03 9.67622362e-03 1.34958420e-...
13498,13498,Rice with Soy-Glazed Bonito Flakes and Sesame ...,['Leftover katsuo bushi (dried bonito flakes) ...,"If using katsuo bushi flakes from package, moi...",rice-with-soy-glazed-bonito-flakes-and-sesame-...,[-1.01301380e-01 2.18883585e-02 4.91159149e-...,leftover katsuo bushi bonito making dashi kats...,"A recipe containing japanese shortgrain rice, ...",[-8.23939368e-02 4.02971357e-02 6.17195554e-...
13499,13499,Spanakopita,['1 stick (1/2 cup) plus 1 tablespoon unsalted...,Melt 1 tablespoon butter in a 12-inch heavy sk...,spanakopita-107344,[-3.02052200e-02 -4.98416200e-02 2.66024079e-...,"stick unsalted butter, baby spinach, feta crum...","A recipe containing baby spinach, feta crumble...",[-5.63429669e-02 -3.61176021e-02 -1.57339685e-...


# Example Queries

In [None]:
query = 'milk, strawberry, egg, flour'

ordered_query_sentence = recipeMatcher.create_ingredients_sentence(query)
print(ordered_query_sentence)

N = 10

recipeMatcher.recipe_search(ordered_query_sentence, N)

A recipe containing egg, flour, milk, strawberry


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_recipes_df['overlap_score'] = top_recipes_df['ingredients_sentence'].apply(


Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,embedding,Cleaned_Ingredients,ingredients_sentence,embedding_sentence,cos_similarity,overlap_score,final_score
10853,10853,Puffed Pancake with Strawberries,"['1 pound small strawberries, hulled, sliced (...",Preheat oven to 450°F. Stir strawberries and 2...,puffed-pancake-with-strawberries-238536,[-2.95778345e-02 -3.94599997e-02 8.13689083e-...,"strawberry hulled, tablespoon powdered sugar a...","A recipe containing egg room temperature, lemo...","[-0.067988582, -0.0284373276, 0.0812725127, 0....",0.759889,4,2.379944
4061,4061,Easy Crepes,"['2 large eggs', '1 cup flour', '3/4 cup milk'...",Place all the ingredients in a blender and pur...,easy-crepes-51262490,[-1.80271436e-02 -6.59274459e-02 5.24571761e-...,"egg, flour, milk, water, vanilla, tablespoon b...","A recipe containing egg, flour, milk, tablespo...","[-0.0505162589, -0.0500176176, 0.0879107788, 0...",0.78319,3,1.891595
7528,7528,Bitter Orange Crème Brûlée,"['3 large eggs, separated', '1/2 cup/100 g sug...",Preheat the oven to 350°F/180°C/gas 4. Line a ...,bitter-orange-creme-brulee-361549,[-3.51325460e-02 -4.63775545e-02 5.01832478e-...,"egg, sugar, vanilla, flour, sugar, heavy doubl...","A recipe containing egg, egg yolk, flour, heav...","[-0.0639387146, -0.0160927344, 0.0803429708, 0...",0.777748,3,1.888874
10934,10934,Hazelnut Gâteau Breton,"['1 1/4 cups vanilla sugar , divided', '1/2 cu...",Position rack in center of oven and preheat to...,hazelnut-gateau-breton-238257,[-3.87507640e-02 -9.80922487e-03 6.61807433e-...,"vanilla sugar, hazelnut lightly toasted husked...","A recipe containing egg yolk, egg yolk beaten ...","[-0.0535757877, -0.00967997685, 0.109198391, 0...",0.745095,3,1.872547
12439,12439,Triple-Chocolate Cookie and Strawberry Ice Cre...,"['1 1/2 cups chopped fresh strawberries', '1/3...","Combine strawberries, 1/3 cup sugar, and corn ...",triple-chocolate-cookie-and-strawberry-ice-cre...,[-9.72344168e-03 -8.85731131e-02 5.55280261e-...,"strawberry, sugar, tablespoon light corn syrup...","A recipe containing egg yolk, heavy whipping c...","[-0.0282111149, -0.0739245489, 0.089554958, 0....",0.739559,3,1.86978
7241,7241,Swedish Pancakes with Raspberries,"['1 large egg yolk', '2 tablespoons Splenda or...","In a medium bowl, whisk the egg yolk, Splenda,...",swedish-pancakes-with-raspberries-363382,[ 9.26869549e-03 -3.72918732e-02 2.07381397e-...,"egg yolk, tablespoon splenda sugar substitute,...","A recipe containing egg white, egg yolk, nonfa...","[-0.0374721214, -0.0337930731, 0.0648884252, 0...",0.738739,3,1.869369
10003,10003,Oven Crespella with Nutella Sauce,"['3/4 cup all-purpose flour', '2 large eggs', ...",Put a 12-inch ovenproof heavy skillet in middl...,oven-crespella-with-nutella-sauce-241993,[ 2.96434131e-03 -6.57012388e-02 5.92883788e-...,"allpurpose flour, egg, granulated sugar, pure ...","A recipe containing accompaniment strawberry, ...","[-0.0226937085, -0.0529852323, 0.0705456808, 0...",0.730596,3,1.865298
12228,12228,Ginger-Pumpkin Soufflé,"['1 1/2 cups unsweetened soymilk, not low- or ...",Preheat oven to 375°F. Butter 8 6-ounce rameki...,ginger-pumpkin-souffle-233086,[-5.63070504e-03 -6.81316033e-02 2.06718855e-...,"unsweetened soymilk low nofat, peeled ginger, ...","A recipe containing egg white, egg yolk, peele...","[-0.0588756055, -0.0274128728, 0.0461090803, 0...",0.729185,3,1.864592
2461,2461,Overnight Waffles,"['2 cups flour', '1 tablespoon sugar', '2 teas...",The night before you want to serve the waffles...,overnight-waffles,[-5.69426380e-02 -6.72455058e-02 4.05529812e-...,"flour, sugar, teaspoon instant yeast, salt, mi...","A recipe containing egg, flour, milk lukewarm,...","[-0.0996342525, -0.0380949304, 0.074484773, 0....",0.719121,3,1.85956
9834,9834,Strawberry Shortcakes,"['4 pints strawberries, lightly rinsed, hulled...",1. Preheat the oven to 400°F. Grease a baking ...,strawberry-shortcakes-242601,[-6.28045248e-03 -5.83770797e-02 6.40467778e-...,"pint strawberry lightly hulled, lemon juice, s...","A recipe containing heavy cream, heavy cream, ...","[-0.0126830963, -0.0341164023, 0.0959895104, 0...",0.718233,3,1.859117


In [None]:
query = 'chicken, soysauce, rice'

ordered_query_sentence = recipeMatcher.create_ingredients_sentence(query)
print(ordered_query_sentence)

N = 10

recipeMatcher.recipe_search(ordered_query_sentence, N)

A recipe containing chicken, rice, soysauce


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_recipes_df['overlap_score'] = top_recipes_df['ingredients_sentence'].apply(


Unnamed: 0.1,Unnamed: 0,Title,Ingredients,Instructions,Image_Name,embedding,Cleaned_Ingredients,ingredients_sentence,embedding_sentence,cos_similarity,overlap_score,final_score
2264,2264,Soy Sauce Chicken,"['1 (3 1/4-lb/1.5-kg) whole chicken', '1 table...",Air-dry the chicken in the refrigerator for 30...,soy-sauce-chicken,[-7.26777241e-02 -7.06458539e-02 7.33513758e-...,"chicken, dark soy sauce, vegetable oil, light ...","A recipe containing chicken, dark soy sauce, g...","[-0.121591516, -0.0739155784, 0.0658811331, 0....",0.773583,2,1.386791
12620,12620,Chicken and Cilantro Bites,"['1/4 cup soy sauce', '1/4 cup rice vinegar (n...",Stir together soy and vinegar in a bowl for di...,chicken-and-cilantro-bites-231978,[-8.29583257e-02 -2.17033289e-02 6.55900314e-...,"soy sauce, rice vinegar, chicken breast, egg, ...","A recipe containing chicken breast, cilantro, ...","[-0.111978084, -0.0310924295, 0.0655422881, 0....",0.730682,2,1.365341
9914,9914,Steamed Chicken with Xiao Fan's Special Sauce,"['1 chicken (about 3 1/2 pounds), halved lengt...",Fit a large steamer rack inside a large wide p...,steamed-chicken-with-xiao-fans-special-sauce-2...,[-6.47412241e-02 1.88572239e-03 4.66672890e-...,"chicken, tablespoon matchstick peeled ginger, ...","A recipe containing chicken, cilantro, garlic,...","[-0.119347192, -0.0215065405, 0.0427221842, 0....",0.699711,2,1.349856
8948,8948,Avgolemono,"['4 cups chicken broth', '1/4 cup uncooked orz...","1. In a large saucepan, bring the broth to a b...",avgolemono-352269,[-6.49949312e-02 -2.97271721e-02 7.16230599e-...,"chicken broth, orzo rice, salt pepper taste, e...","A recipe containing chicken broth, egg, orzo r...","[-0.106704876, -0.00603844272, 0.033757735, 0....",0.699561,2,1.34978
1682,1682,Fragrant Green Chicken Curry,"['1 cup (200g) white rice', '1 1/2 cups (375ml...","Place the rice, water, and salt in a medium sa...",fragrant-thai-green-chicken-curry-donna-hay,[-5.82663249e-03 -1.18859550e-02 5.93260638e-...,"white rice, water, sea salt flake, x quantity ...","A recipe containing chicken stock, chicken thi...","[-0.0464756563, -0.0281256549, 0.0592962243, 0...",0.679669,2,1.339834
8776,8776,Spicy Orange Chicken Stir-Fry,['1 1/2 cups jasmine rice or long-grain white ...,Cook rice according to package directions. Cov...,spicy-orange-chicken-stir-fry-353398,[-4.88636084e-02 -3.09629515e-02 5.71517423e-...,"jasmine rice longgrain white rice, orange juic...","A recipe containing chicken cutlet strip, corn...","[-0.100183293, -0.0731616095, 0.0517476499, -0...",0.669713,2,1.334856
8614,8614,Chilled and Dilled Avgolemono Soup,"['4 cups chicken stock, or 3 cups reduced-sodi...",Simmer stock and rice in a heavy medium saucep...,chilled-and-dilled-avgolemono-soup-354232,[-2.23163348e-02 4.35884707e-02 7.07301944e-...,chicken stock reducedsodium chicken broth wate...,A recipe containing chicken stock reducedsodiu...,"[-0.0640702993, 0.0123624913, 0.0647469461, 0....",0.669467,2,1.334733
4736,4736,Noodle Salad With Chicken and Chile-Scallion Oil,"['2 scallions, thinly sliced', '2 garlic clove...",Cook all ingredients in a small saucepan over ...,noodle-salad-with-chicken-and-chile-scallion-o...,[-2.85713971e-02 -3.82446237e-02 2.84413863e-...,"scallion, garlic, star anise pod, tablespoon r...","A recipe containing chicken, cilantro sprout, ...","[-0.0909311399, -0.0561952814, 0.0471331663, 0...",0.665557,2,1.332779
452,452,Super-Simple Overnight Porridge,"['2 cups packed cooked white rice', 'About 5 c...","In a 4-quart saucepan, combine the rice, chick...",overnight-porridge-congee-chao-andrea-nguyen-v...,[-5.38762547e-02 1.61706433e-02 2.73859072e-...,"white rice, chicken stock vegetable stock stor...",A recipe containing chicken stock vegetable st...,"[-0.0901651308, 0.00144303963, 0.00769881904, ...",0.659771,2,1.329885
1399,1399,Arroz Caldo (Chicken Rice Porridge),"['200 g (7 oz / 1 cup) white glutinous rice', ...","Place both of the rices in a bowl, cover with ...",arroz-caldo-filipino-chicken-rice-porridge,[-6.31129071e-02 -1.47434156e-02 6.27684742e-...,"white glutinous rice, jasmine mediumgrain rice...","A recipe containing chicken jointed bone, cm g...","[-0.121263795, -0.0458940566, 0.0653000474, 0....",0.657902,2,1.328951


# RecipeMatchingEvaluator

To run on test data and test the number of successful queries.


In [11]:
import warnings


class RecipeMatchingEvaluator:
  """
  A class for evaluating the accuracy of the final system.

  This class uses a `RecipeSearcher` instance to search for recipes based on a set of ingredients
  and then evaluates the accuracy of the matching algorithm by checking if specific recipes
  are among the top results returned.

  Attributes:
    recipeSearcher (RecipeSearcher): Instance of RecipeSearcher used for searching recipes.
    recipe_label_dic (dict): Dictionary containing recipe IDs and their associated labels (ingredients).
  """

  def __init__(self, csv_f_path: str, recipe_label_dic: dict):
    """
    Initialize the RecipeMatchingEvaluator class by creating a `RecipeSearcher` instance
    and setting the recipe label dictionary.

    Args:
        csv_f_path (str): Path to the CSV file containing recipe data.
        recipe_label_dic (dict): Dictionary with recipe IDs as keys and lists of ingredient labels as values.
    """
    self.recipeSearcher = RecipeSearcher(csv_f_path=csv_f_path) # Create recipe searcher instance
    self.recipe_label_dic = recipe_label_dic # Dictionary of {true_recipe_id, labels:[]}

  def run_test(self, N: int) -> dict:
    """
    Run a test to evaluate the accuracy of recipe matching.

    This method checks whether the correct recipes (as identified by their recipe ID)
    are among the top N returned recipes for each query derived from the recipe label dictionary.
    It calculates the accuracy based on the number of correct matches found within the top N results.

    Args:
        N (int): The number of top results to consider for matching. Adjust accoridingly, depending on test strictness.

    Returns:
        dict: A dictionary containing:
            - "accuracy": The accuracy of the matching algorithm as a proportion.
            - "matches": The total number of correct matches found.
            - "total_recipes": The total number of recipes evaluated.
            - "unmatched_recipes": A list of recipe IDs that were not found in the top N results.
    """

    # Suppress SettingWithCopyWarning for the scope of this method
    with warnings.catch_warnings():
      warnings.simplefilter(action="ignore", category=pd.errors.SettingWithCopyWarning)

      matches = 0 # Counter for matches
      total_recipes = len(self.recipe_label_dic)

      unmatched_recipes = []

      for recipe_id, labels in self.recipe_label_dic.items(): # iterate through all recipes

        label_set = set(labels)

        query_sentence = self.recipeSearcher.create_ingredients_sentence(", ".join(sorted(label_set)))
        print(query_sentence)

        top_recipes_df = self.recipeSearcher.recipe_search(query_sentence, N) # query

        if recipe_id in top_recipes_df['Unnamed: 0'].values: # if recipe_id found in top N returned recipes
          matches += 1
        else:
          unmatched_recipes.append(recipe_id)

      total_recipes = len(self.recipe_label_dic)
      accuracy = matches / total_recipes if total_recipes > 0 else 0

      return {
          "accuracy":accuracy,
          "matches":matches,
          "total_recipes":total_recipes,
          "unmatched_recipes": unmatched_recipes,
      }



# Test Code for RecipeMatchingEvaluator

In [54]:
# Update fpath below accordingly. CSV should contain pre-computed embeddings
CSV_F_PATH = '/content/drive/MyDrive/Applied CV Project/Recipes and Matching Algorithm/Epicurious_Processed_Recipes_With_Embedding_alphabetical.csv'

# Dummy test data
test = {767:['egg','egg','egg'],
        796:['banana','avocado','avocado'],
        2278:['orange','orange']}

# Set up and load evaluator
evaluator = RecipeMatchingEvaluator(CSV_F_PATH, test)

In [55]:
# Run evaluation
print(evaluator.run_test(5))

A recipe containing egg
A recipe containing avocado, banana
A recipe containing orange
{'accuracy': 0.6666666666666666, 'matches': 2, 'total_recipes': 3, 'unmatched_recipes': [2278]}


In [None]:
# Testing on the labels produced post segmentation + detection on grounded-SAM and DeiT model

In [31]:
# Function to read in text and convert to dict

def parse_txt_to_dict(filepath):
    result_dict = {}
    
    with open(filepath, "r") as f:
        for line in f:
            
            key_part, values_part = line.split(": [")
            key = int(key_part.strip())
            values = values_part.strip("]\n").replace("'", "").split(", ")
            
            result_dict[key] = values
    return result_dict


In [32]:
testset1 = parse_txt_to_dict("../detection_output_newver.txt")
testset2 = parse_txt_to_dict("../detection_output_augmented.txt")


In [33]:
print(testset1)

{7679: ['Egg', 'Melon Piel de Sapo'], 2936: ['Cantaloupe 1', 'Pear Red', 'Cantaloupe 1'], 2972: ['Tomato 3', 'Butter', 'Butter', 'Apple Crimson Snow', 'Tomato 3', 'Pepper Orange', 'Butter', 'Pear Red'], 4922: ['Fig', 'Pitahaya Red', 'Pear Abate', 'Pomelo Sweetie', 'Maracuja', 'Ginger Root'], 7063: ['Limes'], 23: ['Cantaloupe 1', 'Milk'], 11194: ['Flour', 'Egg', 'Nut Pecan', 'Nut Pecan', 'Nut Pecan', 'Nut Pecan', 'Apple Red 3', 'Egg', 'Egg', 'Kiwi'], 5112: ['Maracuja', 'Cantaloupe 2', 'Pepper Green', 'Quince', 'Pepper Green'], 2865: ['Limes', 'Butter'], 7099: ['Apricot', 'Pear 2', 'Cactus fruit', 'Pear Williams', 'Egg', 'Egg', 'Egg'], 11397: ['Pear Abate', 'Apple Red Yellow 1'], 11549: ['Limes', 'Onion Red Peeled'], 828: ['Eggs', 'Pepper Green', 'Mango'], 11350: ['Cantaloupe 2', 'Pear Stone', 'Cantaloupe 1', 'Pear Williams'], 13240: ['Butter', 'Pear Red', 'Pear Red', 'Cantaloupe 1', 'Salmon', 'Butter', 'Butter', 'Butter', 'Butter', 'Butter', 'Butter', 'Butter', 'Butter'], 4983: ['Egg', 

In [36]:
# Function to clean dictionary
def clean_dictionary(input_dict):
    cleaned_dict = {}
    for key, values in input_dict.items():
        cleaned_values = [
            re.sub(r'\d', '', item).strip().lower()  # Remove digits, strip whitespace, and convert to lowercase
            for item in values
        ]
        # Remove duplicates and empty strings, if any
        cleaned_values = list(set(filter(None, cleaned_values)))  # Ensure unique non-empty items
        cleaned_dict[key] = cleaned_values
    return cleaned_dict


In [41]:
testset1 = clean_dictionary(testset1) # Contains real, augmented scenes (non-training image generated ones)

In [42]:
testset2 = clean_dictionary(testset2) # Contains augmented scenes created using augmented images using training/evaluation images

In [43]:
CSV_F_PATH = 'Epicurious_Processed_Recipes_With_Embedding_alphabetical.csv'

# Set up and load evaluator for real scenes and augmented scenes (non-training image generated ones)
evaluator1 = RecipeMatchingEvaluator(CSV_F_PATH, testset1)

In [44]:
# Run evaluation
print(evaluator1.run_test(10)) 

A recipe containing egg, melon piel de sapo
A recipe containing cantaloupe, pear red
A recipe containing apple crimson snow, butter, pear red, pepper orange, tomato
A recipe containing fig, ginger root, maracuja, pear abate, pitahaya red, pomelo sweetie
A recipe containing limes
A recipe containing cantaloupe, milk
A recipe containing apple red, egg, flour, kiwi, nut pecan
A recipe containing cantaloupe, maracuja, pepper green, quince
A recipe containing butter, limes
A recipe containing apricot, cactus fruit, egg, pear, pear williams
A recipe containing apple red yellow, pear abate
A recipe containing limes, onion red peeled
A recipe containing eggs, mango, pepper green
A recipe containing cantaloupe, pear stone, pear williams
A recipe containing butter, cantaloupe, pear red, salmon
A recipe containing egg, mango
A recipe containing apple golden, cantaloupe, chicken, pomelo sweetie
A recipe containing apple red, egg, rice
A recipe containing melon piel de sapo
A recipe containing butt

In [45]:
# Run evaluation
print(evaluator1.run_test(100)) 

A recipe containing egg, melon piel de sapo
A recipe containing cantaloupe, pear red
A recipe containing apple crimson snow, butter, pear red, pepper orange, tomato
A recipe containing fig, ginger root, maracuja, pear abate, pitahaya red, pomelo sweetie
A recipe containing limes
A recipe containing cantaloupe, milk
A recipe containing apple red, egg, flour, kiwi, nut pecan
A recipe containing cantaloupe, maracuja, pepper green, quince
A recipe containing butter, limes
A recipe containing apricot, cactus fruit, egg, pear, pear williams
A recipe containing apple red yellow, pear abate
A recipe containing limes, onion red peeled
A recipe containing eggs, mango, pepper green
A recipe containing cantaloupe, pear stone, pear williams
A recipe containing butter, cantaloupe, pear red, salmon
A recipe containing egg, mango
A recipe containing apple golden, cantaloupe, chicken, pomelo sweetie
A recipe containing apple red, egg, rice
A recipe containing melon piel de sapo
A recipe containing butt

In [47]:
# Run evaluation
print(evaluator1.run_test(500)) 

A recipe containing egg, melon piel de sapo
A recipe containing cantaloupe, pear red
A recipe containing apple crimson snow, butter, pear red, pepper orange, tomato
A recipe containing fig, ginger root, maracuja, pear abate, pitahaya red, pomelo sweetie
A recipe containing limes
A recipe containing cantaloupe, milk
A recipe containing apple red, egg, flour, kiwi, nut pecan
A recipe containing cantaloupe, maracuja, pepper green, quince
A recipe containing butter, limes
A recipe containing apricot, cactus fruit, egg, pear, pear williams
A recipe containing apple red yellow, pear abate
A recipe containing limes, onion red peeled
A recipe containing eggs, mango, pepper green
A recipe containing cantaloupe, pear stone, pear williams
A recipe containing butter, cantaloupe, pear red, salmon
A recipe containing egg, mango
A recipe containing apple golden, cantaloupe, chicken, pomelo sweetie
A recipe containing apple red, egg, rice
A recipe containing melon piel de sapo
A recipe containing butt

In [48]:
# Contains augmented scenes created using augmented images using training/evaluation images
evaluator2 = RecipeMatchingEvaluator(CSV_F_PATH, testset2)

In [49]:
# Run evaluation
print(evaluator2.run_test(10))

A recipe containing ginger root, pineapple
A recipe containing apple red, dates, walnut
A recipe containing apple granny smith, beetroot
A recipe containing dates, limes, mango red, watermelon
A recipe containing limes, pear
A recipe containing apple pink lady, walnut
A recipe containing limes, onion red
A recipe containing avocado, limes
A recipe containing apple golden, egg, nut pecan, peach, strawberry
A recipe containing flour, potato white
A recipe containing apple golden, blueberry
A recipe containing apple granny smith, kohlrabi, limes
A recipe containing pineapple
A recipe containing kiwi, limes, mango red
A recipe containing apple pink lady, grape pink, walnut
A recipe containing cauliflower, limes
A recipe containing avocado, onion red
A recipe containing eggs, potato white
A recipe containing apple golden, guava
A recipe containing cauliflower, flour, limes
A recipe containing apple golden, strawberry
A recipe containing cucumber ripe, limes
A recipe containing apple golden,

In [50]:
# Run evaluation
print(evaluator2.run_test(100))

A recipe containing ginger root, pineapple
A recipe containing apple red, dates, walnut
A recipe containing apple granny smith, beetroot
A recipe containing dates, limes, mango red, watermelon
A recipe containing limes, pear
A recipe containing apple pink lady, walnut
A recipe containing limes, onion red
A recipe containing avocado, limes
A recipe containing apple golden, egg, nut pecan, peach, strawberry
A recipe containing flour, potato white
A recipe containing apple golden, blueberry
A recipe containing apple granny smith, kohlrabi, limes
A recipe containing pineapple
A recipe containing kiwi, limes, mango red
A recipe containing apple pink lady, grape pink, walnut
A recipe containing cauliflower, limes
A recipe containing avocado, onion red
A recipe containing eggs, potato white
A recipe containing apple golden, guava
A recipe containing cauliflower, flour, limes
A recipe containing apple golden, strawberry
A recipe containing cucumber ripe, limes
A recipe containing apple golden,

In [51]:
# Run evaluation
print(evaluator2.run_test(500))

A recipe containing ginger root, pineapple
A recipe containing apple red, dates, walnut
A recipe containing apple granny smith, beetroot
A recipe containing dates, limes, mango red, watermelon
A recipe containing limes, pear
A recipe containing apple pink lady, walnut
A recipe containing limes, onion red
A recipe containing avocado, limes
A recipe containing apple golden, egg, nut pecan, peach, strawberry
A recipe containing flour, potato white
A recipe containing apple golden, blueberry
A recipe containing apple granny smith, kohlrabi, limes
A recipe containing pineapple
A recipe containing kiwi, limes, mango red
A recipe containing apple pink lady, grape pink, walnut
A recipe containing cauliflower, limes
A recipe containing avocado, onion red
A recipe containing eggs, potato white
A recipe containing apple golden, guava
A recipe containing cauliflower, flour, limes
A recipe containing apple golden, strawberry
A recipe containing cucumber ripe, limes
A recipe containing apple golden,

In [57]:
print("FINAL RESULTS:\n")

print("   Test scenes containing real scenes, augmented scenes (created using non-training/validation dataset images):\n")
print("   N=10: 'accuracy': 0.021739130434782608, 'matches': 2, 'total_recipes': 92")
print("   N=100: 'accuracy': 0.06521739130434782, 'matches': 6, 'total_recipes': 92")
print("   N=500: 'accuracy': 0.18478260869565216, 'matches': 17, 'total_recipes': 92")


print("\n-------------------------------------------------------------------------------------------------------------------")
print("\n   Test scenes containing augmented scenes created using training/validation dataset images):\n")
print("   N=10: 'accuracy': 0.4, 'matches': 16, 'total_recipes': 40")
print("   N=100: 'accuracy': 0.75, 'matches': 30, 'total_recipes': 40")
print("   N=500: 'accuracy': 0.875, 'matches': 35, 'total_recipes': 40")



FINAL RESULTS:

   Test scenes containing real scenes, augmented scenes (created using non-training/validation dataset images):

   N=10: 'accuracy': 0.021739130434782608, 'matches': 2, 'total_recipes': 92
   N=100: 'accuracy': 0.06521739130434782, 'matches': 6, 'total_recipes': 92
   N=500: 'accuracy': 0.18478260869565216, 'matches': 17, 'total_recipes': 92

-------------------------------------------------------------------------------------------------------------------

   Test scenes containing augmented scenes created using training/validation dataset images):

   N=10: 'accuracy': 0.4, 'matches': 16, 'total_recipes': 40
   N=100: 'accuracy': 0.75, 'matches': 30, 'total_recipes': 40
   N=500: 'accuracy': 0.875, 'matches': 35, 'total_recipes': 40
