In [7]:
%load_ext autoreload
%autoreload 2

In [268]:
import pandas as pd
import numpy as np
import time
from typing import List
import pickle

import sys

sys.path.insert(0, "../src")  # needed for using the utils file in the notebook.
from utils import nutrition_facts_parser, sort_ingredients_alphabetically
from sentence_transformers import SentenceTransformer, util

from rich import inspect
from tqdm.notebook import tqdm

from tqdm.auto import tqdm

tqdm.pandas()

from transformers import AutoTokenizer, AutoModel
import torch

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

## merging the title of the recipes back:

In [9]:
recipes_path = "../data/full_dataset.csv"
recipe_title_link = pd.read_csv(
    recipes_path,
    index_col=0,
    usecols=["title", "link"],
    dtype={
        "title": "string[pyarrow]",
        "link": "string[pyarrow]",
    },
)
recipe_title_link = recipe_title_link.query(
    " link.str.contains('allrecipes.com') "
)


data = pd.read_csv("all_recipes_data_w_nutrition_facts_unstructured.csv")
data_w_nutrition = data.query(" Nutrition_facts_unstructured.notnull() ")
print(data_w_nutrition.shape)

data_w_nutrition.head()

(41080, 3)


Unnamed: 0,link,NER,Nutrition_facts_unstructured
0,www.allrecipes.com/recipe/241895/deconstructed...,"['orange juice', 'jiggers vodka']",Nutrition Facts\nServings Per Recipe 1\nCalori...
1,www.allrecipes.com/recipe/20808/kettle-corn/,"['vegetable oil', 'white sugar', 'popcorn kern...",Nutrition Facts\nServings Per Recipe 5\nCalori...
2,www.allrecipes.com/recipe/238733/pops-fabulous...,"['eggplants', 'green bell peppers', 'red bell ...",Nutrition Facts\nServings Per Recipe 10\nCalor...
3,www.allrecipes.com/recipe/241254/chipotle-mang...,"['avocados', 'tomatoes', 'mango', 'cilantro', ...",Nutrition Facts\nServings Per Recipe 20\nCalor...
4,www.allrecipes.com/recipe/261696/basic-homemad...,"['milk', 'heavy whipping cream', 'white vinega...",Nutrition Facts\nServings Per Recipe 4\nCalori...


In [10]:
data_w_nutrition = data_w_nutrition.merge(
    recipe_title_link.reset_index(),
    on="link",
    how="left",
    validate="1:1",
    copy=False,
)
print(data_w_nutrition.shape)
data_w_nutrition.head()

(41080, 4)


Unnamed: 0,link,NER,Nutrition_facts_unstructured,title
0,www.allrecipes.com/recipe/241895/deconstructed...,"['orange juice', 'jiggers vodka']",Nutrition Facts\nServings Per Recipe 1\nCalori...,Deconstructed Screwdriver (The Raw Egg)
1,www.allrecipes.com/recipe/20808/kettle-corn/,"['vegetable oil', 'white sugar', 'popcorn kern...",Nutrition Facts\nServings Per Recipe 5\nCalori...,Kettle Corn
2,www.allrecipes.com/recipe/238733/pops-fabulous...,"['eggplants', 'green bell peppers', 'red bell ...",Nutrition Facts\nServings Per Recipe 10\nCalor...,Pop'S Fabulous Ajvar
3,www.allrecipes.com/recipe/241254/chipotle-mang...,"['avocados', 'tomatoes', 'mango', 'cilantro', ...",Nutrition Facts\nServings Per Recipe 20\nCalor...,Chipotle-Mango Guacamole
4,www.allrecipes.com/recipe/261696/basic-homemad...,"['milk', 'heavy whipping cream', 'white vinega...",Nutrition Facts\nServings Per Recipe 4\nCalori...,Basic Homemade Ricotta Cheese


## sort NER ingredients alphabetically:

In [12]:
data_w_nutrition.NER = data_w_nutrition.NER.apply(eval)

data_w_nutrition["sorted_NER"] = data_w_nutrition.NER.apply(
    sort_ingredients_alphabetically
)
data_w_nutrition.head()

Unnamed: 0,link,NER,Nutrition_facts_unstructured,title,sorted_NER
0,www.allrecipes.com/recipe/241895/deconstructed...,"[orange juice, jiggers vodka]",Nutrition Facts\nServings Per Recipe 1\nCalori...,Deconstructed Screwdriver (The Raw Egg),"[jiggers vodka, orange juice]"
1,www.allrecipes.com/recipe/20808/kettle-corn/,"[vegetable oil, white sugar, popcorn kernels]",Nutrition Facts\nServings Per Recipe 5\nCalori...,Kettle Corn,"[popcorn kernels, vegetable oil, white sugar]"
2,www.allrecipes.com/recipe/238733/pops-fabulous...,"[eggplants, green bell peppers, red bell peppe...",Nutrition Facts\nServings Per Recipe 10\nCalor...,Pop'S Fabulous Ajvar,"[cooking spray, corn oil, eggplants, garlic, g..."
3,www.allrecipes.com/recipe/241254/chipotle-mang...,"[avocados, tomatoes, mango, cilantro, red onio...",Nutrition Facts\nServings Per Recipe 20\nCalor...,Chipotle-Mango Guacamole,"[avocados, black pepper, cayenne pepper, cilan..."
4,www.allrecipes.com/recipe/261696/basic-homemad...,"[milk, heavy whipping cream, white vinegar, salt]",Nutrition Facts\nServings Per Recipe 4\nCalori...,Basic Homemade Ricotta Cheese,"[heavy whipping cream, milk, salt, white vinegar]"


## Prepare for Sentence Transformer:
- create singular "recipes" from each unique ingredient in the corpus --> each ingredient would have its own embedding as well.
- get an embedding for each recipe based on the sorted NER ingredients.

In [13]:
all_ingredients = set(data_w_nutrition.NER.explode())
all_ingredients.remove(np.nan)
len(all_ingredients)

9900

In [14]:
data_w_nutrition.columns

Index(['link', 'NER', 'Nutrition_facts_unstructured', 'title', 'sorted_NER'], dtype='object')

In [15]:
# create a 1 ingredient recipe so that we can get the embedding of each ingredient:

singular_ingredient_recipes_data = []

for ingredient in all_ingredients:
    singular_ingredient_recipes_data.append(
        {
            "link": np.nan,
            "NER": [ingredient],
            "Nutrition_facts_unstructured": "",
            "title": ingredient,
            "sorted_NER": [ingredient],
        }
    )
singular_ingredient_recipes = pd.DataFrame(
    data=singular_ingredient_recipes_data
)
print(singular_ingredient_recipes.shape)
singular_ingredient_recipes.head()

(9900, 5)


Unnamed: 0,link,NER,Nutrition_facts_unstructured,title,sorted_NER
0,,[coconut-almond milk blend],,coconut-almond milk blend,[coconut-almond milk blend]
1,,[sushi rice],,sushi rice,[sushi rice]
2,,[roux],,roux,[roux]
3,,[strawberry tomatoes],,strawberry tomatoes,[strawberry tomatoes]
4,,[Vegetable cooking spray],,Vegetable cooking spray,[Vegetable cooking spray]


In [16]:
data_w_nutrition = pd.concat([data_w_nutrition, singular_ingredient_recipes])
print(data_w_nutrition.shape)

(50980, 5)


In [17]:
data_w_nutrition.isnull().sum()

link                            9900
NER                                0
Nutrition_facts_unstructured    9900
title                              0
sorted_NER                         0
dtype: int64

In [18]:
data_w_nutrition["sorted_joined_NER"] = data_w_nutrition.sorted_NER.str.join(
    " "
)

data_w_nutrition.sample()

Unnamed: 0,link,NER,Nutrition_facts_unstructured,title,sorted_NER,sorted_joined_NER
37886,www.allrecipes.com/recipe/214204/roasted-pecan...,"[pecan pieces, butter, mushrooms, onion, fresh...",Nutrition Facts\nServings Per Recipe 4\nCalori...,Roasted Pecan Couscous With Sun Dried Tomatoes...,"[butter, butter, fresh garlic, mushrooms, onio...",butter butter fresh garlic mushrooms onion Par...


In [19]:
# max ingredient count in a recipe:
max_number_ingredients_per_recipe = data_w_nutrition.NER.apply(len).max()
max_number_ingredients_per_recipe

37

In [20]:
device = "mps" if torch.backends.mps.is_available() else "cpu"


# Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[
        0
    ]  # First element of model_output contains all token embeddings
    input_mask_expanded = (
        attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    )
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    return sum_embeddings / sum_mask


def get_embedding(sentence: str):
    # Load AutoModel from huggingface model repository
    tokenizer = AutoTokenizer.from_pretrained(
        "sentence-transformers/all-MiniLM-L6-v2"
    )
    model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
    model.to(device=device)
    # Tokenize sentences
    encoded_input = tokenizer(
        sentence,
        padding=True,
        truncation=True,
        max_length=max_number_ingredients_per_recipe,
        return_tensors="pt",
    ).to(device=device)

    # Compute token embeddings
    with torch.no_grad():
        model_output = model(**encoded_input)

    # Perform pooling. In this case, mean pooling
    sentence_embeddings = mean_pooling(
        model_output, encoded_input["attention_mask"]
    )
    return sentence_embeddings


# make sure we apply new indexing to retrieve the recipes later:
data_w_nutrition = data_w_nutrition.reset_index(drop=True)

In [21]:
data_w_nutrition.sorted_joined_NER.values[0]

'jiggers vodka orange juice'

## retrieving the embedding of each sorted list of ingredients and pickle-ing them with the data recipe index:

In [23]:
# with open('recipe_title_embeddings.pkl', "rb") as fIn:
#     recipe_title_embeddings = pickle.load(fIn)


# try:
#     for index in tqdm(
#         data_w_nutrition.index.values
#     ):
#         if str(index) not in recipe_title_embeddings.keys():
#             title_index = str(index)
#             # print(title_index)
#             ner = data_w_nutrition.iloc[index].sorted_joined_NER

#             recipe_title_embeddings[title_index] = get_embedding(sentence=ner)
#         # else:
#             # print("in dict already !")
# except (KeyboardInterrupt):
#     with open('recipe_title_embeddings.pkl', "wb") as fOut:
#         pickle.dump(recipe_title_embeddings, fOut, protocol=pickle.HIGHEST_PROTOCOL)


# with open('recipe_title_embeddings.pkl', "wb") as fOut:
#         pickle.dump(recipe_title_embeddings, fOut, protocol=pickle.HIGHEST_PROTOCOL)

In [24]:
# with open('recipe_title_embeddings_final.pkl', "wb") as fOut:
#         pickle.dump(recipe_title_embeddings, fOut, protocol=pickle.HIGHEST_PROTOCOL)

## example cosine similarity - Sesame Seed Oil Hummus:

In [123]:
with open("recipe_title_embeddings_final.pkl", "rb") as fIn:
    recipe_title_embeddings = pickle.load(fIn)

data_w_nutrition.Nutrition_facts_unstructured.fillna("", inplace=True)

In [208]:
hummus_index = 300

hummus_recipe_embedding = recipe_title_embeddings[str(hummus_index)].to(
    device=device
)

# Sesame Seed Oil Hummus
data_w_nutrition.iloc[hummus_index].sorted_NER

['chickpeas',
 'garlic',
 'ground black pepper',
 'ground cumin',
 'lemon juice',
 'olive oil',
 'salt',
 'sesame seed oil',
 'water']

In [225]:
corpus_embeddings.shape

torch.Size([50980, 384])

In [192]:
corpus_embeddings = torch.stack(list(recipe_title_embeddings.values()))
corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
corpus_embeddings = torch.squeeze(corpus_embeddings, dim=1)

query_embedding = hummus_recipe_embedding.to(device)
query_embedding = torch.squeeze(query_embedding, dim=1)

hits = util.semantic_search(
    query_embedding, corpus_embeddings, score_function=util.dot_score
)

hits

[[{'corpus_id': 300, 'score': 50.34003829956055},
  {'corpus_id': 26864, 'score': 45.16669464111328},
  {'corpus_id': 7416, 'score': 43.72105407714844},
  {'corpus_id': 591, 'score': 43.44393539428711},
  {'corpus_id': 21131, 'score': 43.02387237548828},
  {'corpus_id': 23173, 'score': 42.98396682739258},
  {'corpus_id': 29324, 'score': 42.870750427246094},
  {'corpus_id': 11703, 'score': 42.285736083984375},
  {'corpus_id': 19812, 'score': 42.14165496826172},
  {'corpus_id': 5500, 'score': 42.04046630859375}]]

In [195]:
data_w_nutrition.iloc[hits[0][1]["corpus_id"]]

link                            www.allrecipes.com/recipe/245692/fullas-roaste...
NER                             [garlic, olive oil, chickpeas, tahini, olive o...
Nutrition_facts_unstructured    Nutrition Facts\nServings Per Recipe 12\nCalor...
title                                               Fulla'S Roasted Garlic Hummus
sorted_NER                      [chickpeas, garlic, ground black pepper, groun...
sorted_joined_NER               chickpeas garlic ground black pepper ground cu...
Name: 26864, dtype: object

In [167]:
data_w_nutrition.query(" title.str.lower().str.contains('cake') ")

Unnamed: 0,link,NER,Nutrition_facts_unstructured,title,sorted_NER,sorted_joined_NER
37,www.allrecipes.com/recipe/244635/grandma-sadie...,"[white sugar, butter, eggs, honey, flour, baki...",Nutrition Facts\nServings Per Recipe 24\nCalor...,Grandma Sadie'S Honey Cake,"[almonds, baking powder, baking soda, butter, ...",almonds baking powder baking soda butter coffe...
55,www.allrecipes.com/recipe/8327/chocolate-cherr...,"[flour, white sugar, cocoa, baking soda, bakin...",Nutrition Facts\nServings Per Recipe 24\nCalor...,Chocolate Cherry Chip Cake,"[baking powder, baking soda, butter, chocolate...",baking powder baking soda butter chocolate chi...
82,www.allrecipes.com/recipe/246663/easy-ricotta-...,"[yellow cake, eggs, milk, butter, ricotta chee...",Nutrition Facts\nServings Per Recipe 12\nCalor...,Easy Ricotta Cake,"[butter, eggs, milk, ricotta cheese, vanilla, ...",butter eggs milk ricotta cheese vanilla white ...
157,www.allrecipes.com/recipe/9525/christmas-nut-c...,"[almonds, nuts, walnut halves, pecan, dates, c...",Nutrition Facts\nServings Per Recipe 24\nCalor...,Christmas Nut Cake,"[almonds, baking powder, cherries, cherries, d...",almonds baking powder cherries cherries dates ...
174,www.allrecipes.com/recipe/220705/carlees-celeb...,"[chocolate cake, water, vegetable oil, eggs, i...",Nutrition Facts\nServings Per Recipe 24\nCalor...,Carlee'S Celebrate Spring Cupcakes,"[chocolate, chocolate cake, chocolate sprinkle...",chocolate chocolate cake chocolate sprinkles e...
196,www.allrecipes.com/recipe/162899/chocolate-haz...,"[white sugar, flour, ground toasted hazelnuts,...",Nutrition Facts\nServings Per Recipe 18\nCalor...,Chocolate Hazelnut Cupcakes,"[baking powder, baking soda, chocolate-hazelnu...",baking powder baking soda chocolate-hazelnut c...
197,www.allrecipes.com/recipe/7296/quick-black-for...,"[cake, eggs, almond, cherry pie filling, choco...",Nutrition Facts\nServings Per Recipe 14\nCalor...,Quick Black Forest Cake,"[almond, butter, cake, cherry pie filling, cho...",almond butter cake cherry pie filling chocolat...
218,www.allrecipes.com/recipe/263853/fresh-nectari...,"[unsalted butter, eggs, white sugar, salt, flo...",Nutrition Facts\nServings Per Recipe 8\nCalori...,Fresh Nectarine Cake With Blackberries,"[baking powder, blackberries, eggs, flour, mil...",baking powder blackberries eggs flour milk sal...
223,www.allrecipes.com/recipe/26024/cupcake-surprise/,"[cream cheese, confectioners, egg, chocolate c...",Nutrition Facts\nServings Per Recipe 24\nCalor...,Cupcake Surprise,"[butter, chocolate cake, chocolate chips, conf...",butter chocolate cake chocolate chips confecti...
241,www.allrecipes.com/recipe/260538/blini-russian...,"[eggs, white sugar, salt, milk, flour, vegetab...",Nutrition Facts\nServings Per Recipe 6\nCalori...,Blini - Russian Pancakes,"[eggs, flour, milk, salt, vegetable oil, veget...",eggs flour milk salt vegetable oil vegetable o...


In [148]:
nutrition_features = pd.DataFrame(
    list(
        data_w_nutrition.Nutrition_facts_unstructured.progress_apply(
            nutrition_facts_parser
        ).values
    )
)

  0%|          | 0/50980 [00:00<?, ?it/s]

In [156]:
data_w_nutrition.Nutrition_facts_unstructured.iloc[1]

'Nutrition Facts\nServings Per Recipe 5\nCalories 209\n% Daily Value *\nTotal Fat 12g 15%\nSaturated Fat 2g 9%\nSodium 1mg 0%\nTotal Carbohydrate 25g 9%\nDietary Fiber 3g 10%\nTotal Sugars 10g\nProtein 2g\nCalcium 2mg 0%\nIron 1mg 3%\nPotassium 58mg 1%\n* Percent Daily Values are based on a 2,000 calorie diet. Your daily values may be higher or lower depending on your calorie needs.\n** Nutrient information is not available for all ingredients. Amount is based on available nutrient data.\n(-) Information is not currently available for this nutrient. If you are following a medically restrictive diet, please consult your doctor or registered dietitian before preparing this recipe for personal consumption.\nPowered by the ESHA Research Database © 2018, ESHA Research, Inc. All Rights Reserved'

## Merging nutrition features to the recipes' metadata:

In [153]:
cols = ["title", "link", "sorted_NER"]
recipes_with_nutrition_features = data_w_nutrition[cols].join(
    nutrition_features
)
print(
    "recipe with link, ingredients and nutrition facts features: ",
    recipes_with_nutrition_features.shape,
)

recipes_with_nutrition_features.head()

recipe with link, ingredients and nutrition facts features:  (50980, 27)


Unnamed: 0,title,link,sorted_NER,servings,calories,total_fat_g,total_fat_prct_daily,sodium_mg,sodium_prct_daily,total_carbs_g,total_carbs_prct_daily,dietary_fiber_g,dietary_fiber_prct_daily,total_sugars_g,protein_g,vitamin_c_mg,vitamin_c_prct_daily,calcium_mg,calcium_prct_daily,iron_mg,iron_prct_daily,potassium_mg,potassium_prct_daily,saturated_fat_g,saturated_fat_prct_daily,cholesterol_mg,cholesterol_prct_daily
0,Deconstructed Screwdriver (The Raw Egg),www.allrecipes.com/recipe/241895/deconstructed...,"[jiggers vodka, orange juice]",1.0,222.0,0.0,0.0,2.0,0.0,6.0,2.0,0.0,0.0,5.0,0.0,31.0,155.0,7.0,1.0,0.0,1.0,125.0,3.0,,,,
1,Kettle Corn,www.allrecipes.com/recipe/20808/kettle-corn/,"[popcorn kernels, vegetable oil, white sugar]",5.0,209.0,12.0,15.0,1.0,0.0,25.0,9.0,3.0,10.0,10.0,2.0,,,2.0,0.0,1.0,3.0,58.0,1.0,2.0,9.0,,
2,Pop'S Fabulous Ajvar,www.allrecipes.com/recipe/238733/pops-fabulous...,"[cooking spray, corn oil, eggplants, garlic, g...",10.0,201.0,17.0,22.0,22.0,1.0,12.0,4.0,6.0,20.0,6.0,2.0,105.0,527.0,21.0,2.0,1.0,4.0,457.0,10.0,2.0,11.0,,
3,Chipotle-Mango Guacamole,www.allrecipes.com/recipe/241254/chipotle-mang...,"[avocados, black pepper, cayenne pepper, cilan...",20.0,77.0,6.0,8.0,15.0,1.0,6.0,2.0,3.0,11.0,2.0,1.0,8.0,42.0,11.0,1.0,0.0,2.0,242.0,5.0,1.0,6.0,1.0,0.0
4,Basic Homemade Ricotta Cheese,www.allrecipes.com/recipe/261696/basic-homemad...,"[heavy whipping cream, milk, salt, white vinegar]",4.0,359.0,25.0,32.0,473.0,21.0,20.0,7.0,,,19.0,14.0,0.0,1.0,502.0,39.0,0.0,1.0,633.0,13.0,15.0,74.0,84.0,28.0


In [158]:
recipes_with_nutrition_features[nutrition_features.columns].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
servings,41073.0,11.271005,13.506715,1.0,4.0,8.0,12.0,472.0
calories,41075.0,323.3051,218.522628,0.0,170.0,286.0,428.0,5974.0
total_fat_g,40428.0,16.553329,14.944319,0.0,7.0,13.0,23.0,492.0
total_fat_prct_daily,40428.0,21.162338,19.151932,0.0,8.0,17.0,29.0,630.0
sodium_mg,41046.0,559.104298,1674.32983,0.0,127.0,322.0,721.0,181849.0
sodium_prct_daily,41046.0,24.303927,72.79857,0.0,6.0,14.0,31.0,7906.0
total_carbs_g,41043.0,31.681334,25.185341,0.0,14.0,27.0,44.0,622.0
total_carbs_prct_daily,41043.0,11.50374,9.163301,0.0,5.0,10.0,16.0,226.0
dietary_fiber_g,39603.0,2.901422,3.121244,0.0,1.0,2.0,4.0,44.0
dietary_fiber_prct_daily,39603.0,10.254122,11.06364,0.0,3.0,7.0,14.0,159.0


In [171]:
recipes_with_nutrition_features.to_csv(
    "recipes_with_nutrition_features.csv", index=False
)

## Recommend based on ingredients:


In [190]:
ingredients_df = pd.DataFrame(
    recipes_with_nutrition_features.query(" link.isnull() ")["title"]
).reset_index()
ingredients_df.columns = ["ingredient_index", "ingredient"]
ingredients_df.to_csv("../data/ingredient_indices.csv", index=False)

In [207]:
ingredients_df.query(" ingredient.str.lower().str.contains('broccoli') ")

In [254]:
red_tomato_index = (
    ingredients_df.query(" ingredient == 'red tomato' ")
    .ingredient_index.astype(str)
    .squeeze()
)
eggs_index = (
    ingredients_df.query(" ingredient == 'eggs' ")
    .ingredient_index.astype(str)
    .squeeze()
)
broccoli_index = (
    ingredients_df.query(" ingredient == 'broccoli' ")
    .ingredient_index.astype(str)
    .squeeze()
)

red_tomato_index, eggs_index, broccoli_index

('43048', '46668', '46777')

In [279]:
list(recommendable_recipes_embeddings.keys())[-1]

'41079'

In [257]:
# filter out the indices of the ingredients:
all_ingredients_indices = ingredients_df.ingredient_index.astype(str).tolist()

# get the ingredients indices from query:
query_ingredient_indices = [red_tomato_index, eggs_index, broccoli_index]

recommendable_recipes_embeddings = {
    key: value
    for key, value in recipe_title_embeddings.items()
    if key not in all_ingredients_indices
}

corpus_embeddings = torch.stack(list(recommendable_recipes_embeddings.values()))
corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
corpus_embeddings = torch.squeeze(corpus_embeddings, dim=1)


ingredients_embeddings = [
    recipe_title_embeddings[index].to(device)
    for index in query_ingredient_indices
]

query_embeddings = torch.stack(ingredients_embeddings)
query_embeddings_normalized = util.normalize_embeddings(query_embeddings)
query_combined_embedding = torch.mean(query_embeddings_normalized, dim=0)

hits = util.semantic_search(
    query_combined_embedding, corpus_embeddings, score_function=util.dot_score
)

hits

[[{'corpus_id': 29495, 'score': 114.66667938232422},
  {'corpus_id': 21051, 'score': 111.99999237060547},
  {'corpus_id': 39386, 'score': 108.66668701171875},
  {'corpus_id': 19805, 'score': 108.00003051757812},
  {'corpus_id': 2495, 'score': 108.00000762939453},
  {'corpus_id': 16588, 'score': 106.66668701171875},
  {'corpus_id': 33112, 'score': 105.33333587646484},
  {'corpus_id': 5922, 'score': 104.0},
  {'corpus_id': 31578, 'score': 104.0},
  {'corpus_id': 14436, 'score': 103.3333511352539}]]

In [264]:
corpus_embeddings.shape, query_combined_embedding.shape

(torch.Size([41080, 384]), torch.Size([1, 384]))

In [255]:
with open('../data/recommendable_recipes_embeddings.pkl', "wb") as fOut:
    pickle.dump(recommendable_recipes_embeddings, fOut, protocol=pickle.HIGHEST_PROTOCOL)


In [283]:
# data_w_nutrition.iloc[hits[0][1]["corpus_id"]].sorted_joined_NER

data_w_nutrition.iloc[21051].sorted_joined_NER


'Barilla® broccoli rabe chicken sausage extra-virgin olive oil garlic Parmesan cheese Salt white wine'

In [285]:
data_w_nutrition.iloc[14436].sorted_joined_NER


'Barilla® Jumbo Shells chicken stock chili powder cilantro fresco cheese ground coriander ground cumin lemon juice lime juice olive oil orange juice packets sazon seasoning with coriander paprika pork shoulder salt'