In [1]:
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from joblib import dump, load

from data.preprocess_data import combine_json_to_dataframe

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Call the function with the path to the zip file
recipe_data = combine_json_to_dataframe("../data/recipes_raw.zip")

recipe_data.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Index: 124434 entries, p3pKOD6jIHEcjf20CCXohP8uqkG5dGi to 2Q3Zpfgt/PUwn1YABjJ5A9T3ZW8xwVa
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   instructions  124434 non-null  object 
 1   ingredients   124434 non-null  object 
 2   title         124434 non-null  object 
 3   full_text     124434 non-null  object 
 4   num_words     124434 non-null  float64
dtypes: float64(1), object(4)
memory usage: 5.7+ MB


In [6]:
vectoriser = SentenceTransformer("all-MiniLM-L12-v2")
vectoriser.max_seq_length = 512

embeddings = vectoriser.encode([rec for rec in recipe_data.full_text], show_progress_bar=True)
print(embeddings.shape)

Batches: 100%|██████████| 3889/3889 [5:34:50<00:00,  5.17s/it]  


(124434, 384)


In [7]:
def get_most_similar_doc(text, vectorized_corpus, original_data, vectoriser=vectoriser, top_n = 5):
    new_doc_vector = vectoriser.encode(text)
    sim = cosine_similarity(X = vectorized_corpus, Y = new_doc_vector).flatten()
    argmax = np.argpartition(sim, -top_n)[-top_n:]
    return(
        (original_data.
         iloc[argmax].
         assign(similarity = sim[argmax]).
         loc[:, ['title', 'ingredients', 'similarity']].
         sort_values('similarity', ascending = False).
         reset_index(drop=True)
         )

    )

In [11]:
test_recipe = ["""
Ingredients:

4 veal cutlets (or pork if you can't find veal)
Salt
Pepper
1 cup flour
2 eggs
1 cup breadcrumbs (preferably made from stale bread)
1/2 cup vegetable oil (for frying)
1 lemon (optional)
Instructions:

Pound the cutlets with a meat mallet until they are about 1/4 inch thick. Season both sides with salt and pepper.

Place the flour in a shallow dish. In another dish, beat the eggs. In a third dish, place the breadcrumbs.

Coat each cutlet with flour, shaking off any excess. Dip it into the beaten eggs, and then coat with the breadcrumbs. Repeat this for all of the cutlets.

Heat the vegetable oil in a large frying pan until hot.

Fry each cutlet for about 2-3 minutes on each side, or until golden brown and crispy. Be careful not to overcrowd the pan, you may need to fry them in batches.

Remove the cutlets from the pan with a slotted spoon and place them on paper towels to drain any excess oil.

"""]

get_most_similar_doc(text = test_recipe, 
                     vectorized_corpus = embeddings, 
                     original_data = recipe_data,
                     top_n = 10)

Unnamed: 0,title,ingredients,similarity
0,Pork Cutlets,"[1 egg white plus 1 tablespoon water, 1 teaspo...",0.840093
1,Breaded Pork Chops,"[Four 1/2-inch-thick, bone-in, pork chops (abo...",0.773805
2,Crispy Pork Cutlets,[2 (1 1/4 pound) fully trimmed pork tenderloin...,0.770084
3,Fried Pork Chops,"[4 cups vegetable oil, 8 (8-ounce) bone-in por...",0.761982
4,"Crispy Pork Cutlets with Capers, Lemon, Arugul...","[1 lemon, 1 teaspoon chopped fresh sage, 1 tea...",0.756354
5,Olive Brushed Pork Cutlets,"[8 boneless pork loin cutlets, trimmed of fat ...",0.754591
6,Pork Chops with Mustard and Cream,"[3 tablespoons olive oil, 4 pork chops (1-inch...",0.754549
7,Pork Fries,[1 (19 ounce) package Smithfield® Peppercorn &...,0.753094
8,Pan Fried Pork Chops,"[1 teaspoon seasoned salt, plus more for seaso...",0.746066
9,Paneed Veal with Fried Lemon Slices,"[2 large eggs, 1/2 cup water, 1 1/2 cups fine ...",0.742709


In [12]:
test_recipe_2 = [
    """Ingredients:
- 2 cups heavy cream
- 1 vanilla bean, split and scraped or 1 tsp vanilla extract
- 5 egg yolks
- 1/2 cup granulated sugar, plus more for caramelizing

Instructions:

1. Preheat the oven to 325°F.

2. In a medium saucepan, heat the cream and vanilla bean (both the seeds and the pod) over medium heat until it just begins to simmer. Remove from heat and let sit for 15 minutes to infuse the vanilla flavor.

3. Whisk together the egg yolks and sugar in a medium bowl until light and fluffy.

4. Remove the vanilla pod from the cream and scrape the seeds back into the cream. Discard the pod.

5. Slowly add the cream to the egg mixture, whisking constantly, until well combined.

6. Divide the mixture among four 6-ounce ramekins or custard cups.

7. Place the ramekins in a baking dish and add enough hot water to the dish to reach halfway up the sides of the ramekins.

8. Bake for 30-35 minutes or until the custard is set but still slightly jiggly in the center.

9. Remove the ramekins from the water bath and let cool to room temperature. Refrigerate for at least 2 hours or overnight.

10. When ready to serve, sprinkle a thin layer of sugar over the top of each custard. Either use a culinary torch to caramelize the sugar or place the ramekins under a broiler until the sugar is melted and caramelized. Serve immediately.
"""
]

get_most_similar_doc(text = test_recipe_2, 
                     vectorized_corpus = embeddings, 
                     original_data = recipe_data,
                     top_n=10)

Unnamed: 0,title,ingredients,similarity
0,Crème Brûlée,"[2 1/2 cups heavy cream, 3/4 cup sugar, 1/2 va...",0.87068
1,Chocolate Sprinkled Creme Brulee,"[2 1/3 cups heavy cream, 1/3 cup half-and-half...",0.822773
2,Vanilla Bean Creme Brulee with Raspberries,"[2 1/3 cups heavy cream, 2/3 cup sugar, plus m...",0.812624
3,Bread Pudding,"[4 cups whole milk, 1 cup granulated sugar, 1 ...",0.80283
4,Creme Caramel,"[2/3 cup sugar, 1/3 cup water, 1 cup milk, 1 c...",0.793676
5,Coffee Crème Brûlée,"[6 large egg yolks, 1 large whole egg, 2/3 cup...",0.793207
6,Vanilla Creme Brulee,"[1 vanilla bean, 2 cups heavy cream, 2 cups ha...",0.791325
7,Chocolate Creme Brulee,"[4 cups heavy whipping cream ADVERTISEMENT, 1 ...",0.78803
8,Vanilla Custard Sauce,"[1 1/2 cups half and half, 1 vanilla bean, spl...",0.786678
9,Black and White Crème Brûlée,"[1/2 cup whole milk, 2 cups heavy cream, 1/2 c...",0.784769


## Save model and embeddings

In [10]:
dump(embeddings, "../embeddings/st_embeddings.joblib")

['../embeddings/st_embeddings.joblib']