In [1]:
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from joblib import dump, load

from data.preprocess_data import combine_json_to_dataframe

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma



In [2]:
# Call the function with the path to the zip file
recipe_data = combine_json_to_dataframe("../data/recipes_raw.zip")

recipe_data.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Index: 124434 entries, p3pKOD6jIHEcjf20CCXohP8uqkG5dGi to 2Q3Zpfgt/PUwn1YABjJ5A9T3ZW8xwVa
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   instructions  124434 non-null  object 
 1   ingredients   124434 non-null  object 
 2   title         124434 non-null  object 
 3   full_text     124434 non-null  object 
 4   num_words     124434 non-null  float64
dtypes: float64(1), object(4)
memory usage: 5.7+ MB


In [3]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2")

In [4]:
db = Chroma.from_texts(texts=[doc for doc in recipe_data.full_text], 
                           embedding=embeddings,
                           persist_directory="../embeddings/chromadb"
                           )

Using embedded DuckDB with persistence: data will be stored in: ../embeddings/chromadb


In [5]:
test_recipe = """
Ingredients:

4 veal cutlets (or pork if you can't find veal)
Salt
Pepper
1 cup flour
2 eggs
1 cup breadcrumbs (preferably made from stale bread)
1/2 cup vegetable oil (for frying)
1 lemon (optional)
Instructions:

Pound the cutlets with a meat mallet until they are about 1/4 inch thick. Season both sides with salt and pepper.

Place the flour in a shallow dish. In another dish, beat the eggs. In a third dish, place the breadcrumbs.

Coat each cutlet with flour, shaking off any excess. Dip it into the beaten eggs, and then coat with the breadcrumbs. Repeat this for all of the cutlets.

Heat the vegetable oil in a large frying pan until hot.

Fry each cutlet for about 2-3 minutes on each side, or until golden brown and crispy. Be careful not to overcrowd the pan, you may need to fry them in batches.

Remove the cutlets from the pan with a slotted spoon and place them on paper towels to drain any excess oil.

"""

results = db.similarity_search_with_score(test_recipe)

for doc in results[:3]:
    print("#"*50)
    print("Distance: ", doc[1])
    print(doc[0].page_content[:100])
    
print("+"*19 + "MMR RESULTS" + "+"*20)
# Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents
mmr = db.max_marginal_relevance_search(test_recipe)
for doc in mmr[:3]:
    print("#"*50)
    print(doc.page_content[:100])



##################################################
Distance:  0.4617595076560974
Recipe title: Pork Cutlets. Ingredients: 1 egg white plus 1 tablespoon water; 1 teaspoon dry mustard
##################################################
Distance:  0.4874432682991028
Recipe title: Crispy Pork Cutlets with Capers, Lemon, Arugula, and Chopped Eggs . Ingredients: 1 lem
##################################################
Distance:  0.49450206756591797
Recipe title: Crispy Pork Cutlets. Ingredients: 2 (1 1/4 pound) fully trimmed pork tenderloins, cut 
+++++++++++++++++++MMR RESULTS++++++++++++++++++++
##################################################
Recipe title: Pork Cutlets. Ingredients: 1 egg white plus 1 tablespoon water; 1 teaspoon dry mustard
##################################################
Recipe title: Becca's Chicken-Fried Pork Chops. Ingredients: 1 1/2 cups all-purpose flour ; 1 1/2 te
##################################################
Recipe title: Pork Tenderloin with Herbed Bread

In [13]:
test_recipe_2 =  """Ingredients:
- 2 cups heavy cream
- 1 vanilla bean, split and scraped or 1 tsp vanilla extract
- 5 egg yolks
- 1/2 cup granulated sugar, plus more for caramelizing

Instructions:

1. Preheat the oven to 325°F.

2. In a medium saucepan, heat the cream and vanilla bean (both the seeds and the pod) over medium heat until it just begins to simmer. Remove from heat and let sit for 15 minutes to infuse the vanilla flavor.

3. Whisk together the egg yolks and sugar in a medium bowl until light and fluffy.

4. Remove the vanilla pod from the cream and scrape the seeds back into the cream. Discard the pod.

5. Slowly add the cream to the egg mixture, whisking constantly, until well combined.

6. Divide the mixture among four 6-ounce ramekins or custard cups.

7. Place the ramekins in a baking dish and add enough hot water to the dish to reach halfway up the sides of the ramekins.

8. Bake for 30-35 minutes or until the custard is set but still slightly jiggly in the center.

9. Remove the ramekins from the water bath and let cool to room temperature. Refrigerate for at least 2 hours or overnight.

10. When ready to serve, sprinkle a thin layer of sugar over the top of each custard. Either use a culinary torch to caramelize the sugar or place the ramekins under a broiler until the sugar is melted and caramelized. Serve immediately.
"""

results = db.similarity_search_with_score(test_recipe_2)

for doc in results[:3]:
    print("#"*50)
    print("Distance: ", doc[1])
    print(doc[0].page_content[:100])
    
print("+"*19 + "MMR RESULTS" + "+"*20)
# Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents
mmr = db.max_marginal_relevance_search(test_recipe_2)
for doc in mmr[:3]:
    print("#"*50)
    print(doc.page_content[:100])


##################################################
Distance:  0.40821030735969543
Recipe title: Vanilla Cream and Glaze . Ingredients: 2 cups whole milk; 1/2 vanilla bean, split leng
##################################################
Distance:  0.44332942366600037
Recipe title: Vanilla Bean Ice Cream. Ingredients: 3 cups heavy cream; 1 cup whole milk; 1 vanilla b
##################################################
Distance:  0.46760663390159607
Recipe title: Vanilla-Bean Ice Cream . Ingredients: 2 cups heavy cream; 1 cup whole milk; 3/4 cup su
+++++++++++++++++++MMR RESULTS++++++++++++++++++++
##################################################
Recipe title: Vanilla Cream and Glaze . Ingredients: 2 cups whole milk; 1/2 vanilla bean, split leng
##################################################
Recipe title: Creme Caramel. Ingredients: 3 cups whole milk; 1 vanilla bean, split lengthwise, seeds
##################################################
Recipe title: Vanilla Sugar. Ingredients: 1 v

In [7]:
# ensure vectors are written to disk
db.persist()
db = None

FloatProgress(value=0.0, layout=Layout(width='100%'), style=ProgressStyle(bar_color='black'))

FloatProgress(value=0.0, layout=Layout(width='100%'), style=ProgressStyle(bar_color='black'))

In [11]:
# Now we can load the persisted database from disk, and use it as normal. 
db = Chroma(persist_directory="../embeddings/chromadb/", embedding_function=embeddings)

Using embedded DuckDB with persistence: data will be stored in: ../embeddings/chromadb/


FloatProgress(value=0.0, layout=Layout(width='100%'), style=ProgressStyle(bar_color='black'))

In [12]:
db.similarity_search_with_score(test_recipe_2)

[(Document(page_content='Recipe title: Vanilla Cream and Glaze . Ingredients: 2 cups whole milk; 1/2 vanilla bean, split lengthwise; 4 large egg yolks; 2/3 cup sugar; 1/4 cup cornstarch; 1/4 teaspoon kosher salt; 2 tablespoons unsalted butter; 1/2 cup powdered sugar. Instructions: Pour milk into a medium saucepan; scrape in vanilla beans and add pod. Heat over medium heat until steaming. Remove vanilla pod; discard. Meanwhile, whisk egg yolks, sugar, cornstarch, and salt in a medium bowl. Whisking constantly, gradually add milk. Return to saucepan and cook over medium-low heat, whisking constantly, until thickened and whisk leaves a trail, about 2 minutes. Remove from heat and whisk in butter until melted and mixture is smooth. Transfer to another medium bowl; press plastic wrap directly onto surface of pastry cream. Chill until set, at least 2 hours or up to 4 days. Whisk powdered sugar and 1 tablespoon water in a small bowl until smooth. Dip 1 side of warm sufganiyot in glaze; fill w