### This file connects embeddings information from ingredients of Flavors and Menu to Pre-aggrerated Embedding from Graph Neural Network. This is the pre work needed to build the recommendation agorithm.

In [33]:
import numpy as np
import pandas as pd
import zipfile
import random
import pickle

# Data loading

In [34]:
df_cleaned = pd.read_csv('new_menu.csv')
df_cleaned.head(5)

Unnamed: 0.1,Unnamed: 0,restaurant_id,menu_type,section,menu_item,description,price,dietary_prefences,source,image_name,ingredients,ingredients_mapped
0,0,laylak_toronto,,Cold Dips,Hummus,Chickpea tahini lemon,17,,,,"['chickpeas', 'tahini', 'lemon juice', 'olive ...","chickpea, tahini, lemon_juice, olive_oil, garl..."
1,1,laylak_toronto,,Cold Dips,Baba Ghanoush,Fire roasted eggplant tahini lemon,17,,,,"['eggplant', 'tahini', 'lemon juice', 'olive o...","eggplant, tahini, lemon_juice, olive_oil, garl..."
2,2,laylak_toronto,,Cold Dips,Mohamarah,"Fire roasted red pepper, Aleppo pepper, walnut",17,,,,"['roasted red peppers', 'aleppo pepper', 'waln...","roasted_red_pepper, aleppo_pepper, walnut, bre..."
3,3,laylak_toronto,,Cold Dips,Labneh Bel Toum,Strained yogurt mint garlic,17,,,,"['strained yogurt labneh', 'fresh mint', 'garl...","strained yogurt labneh, of_fresh_mint, garlic,..."
4,4,laylak_toronto,,Cold Dips,Trio,Choice of 3 dips,22,,,,"['combination of hummus', 'baba ghanoush', 'an...","combination of hummus, baba ghanoush, and moha..."


In [35]:
# load dataframe of node_ids and ingredients
id_ing_df = pd.read_csv('new_nodes.csv')

In [36]:
# open and load data from pickle file
file = open('FlavorGraph Node Embedding.pickle', 'rb')

# keys are strings for node_id, value is embedding
data = pickle.load(file)

file.close()


# Preparing Embedding

In [37]:
# getting the embeddings for ingredients
ing_embeddings = {}

for i in range(len(id_ing_df)):
    if id_ing_df.loc[i, 'node_type'] == "ingredient":
        # map the name of the ingredient to the embedding
        ing_embeddings[id_ing_df.loc[i, 'name']] = data[str(id_ing_df.loc[i, 'node_id'])]

In [38]:
def get_embeddings_only(ingredient_string):
    # Split the string into individual ingredients and strip any extra spaces
    ing_list = [ing.strip() for ing in ingredient_string.split(',')]

    emb_list = []

    for ing in ing_list:
        emb = ing_embeddings.get(ing, [])

        if len(emb) != 0:
            emb_list.append(emb)

    return emb_list


In [39]:
def get_missing_ingredients(ingredient_string):
    # Check if the input is null or not a string
    if not isinstance(ingredient_string, str) or pd.isnull(ingredient_string):
        return None

    # Split the string into individual ingredients and strip any extra spaces
    ing_list = [ing.strip() for ing in ingredient_string.split(',')]

    missing_ingredients = []

    for ing in ing_list:
        emb = ing_embeddings.get(ing, [])

        if len(emb) == 0:
            missing_ingredients.append(ing)

    # Return missing ingredients or None if there are none
    return missing_ingredients if missing_ingredients else None

# Embdding datasets

In [40]:
# For menu_data

df_cleaned['Embeddings'] = df_cleaned['ingredients_mapped'].apply(get_embeddings_only)

df_cleaned['Missing_Ingredients'] = df_cleaned['ingredients_mapped'].apply(get_missing_ingredients)


In [41]:
# For flavor

flavor_df = pd.read_csv('new_flavorgraph_df.csv')

flavor_df['Embeddings'] = flavor_df['ingredients_mapped'].apply(get_embeddings_only)

flavor_df['Missing_Ingredients'] = flavor_df['ingredients_mapped'].apply(get_missing_ingredients)

In [42]:
# For cuisine

cuisine_df = pd.read_csv('new_cuisine_df.csv')

cuisine_df['Embeddings'] = flavor_df['ingredients_mapped'].apply(get_embeddings_only)

cuisine_df['Missing_Ingredients'] = flavor_df['ingredients_mapped'].apply(get_missing_ingredients)

In [43]:
cuisine_df.head()

Unnamed: 0.1,Unnamed: 0,flavour,ingredients,ingredients_mapped,Embeddings,Missing_Ingredients
0,0,Cajun,"['roux', 'spice', 'onions', 'celery', 'peppers...","roux, spice, onion, celery, pepper, garlic, ca...","[[-0.23929013, 0.0592715, -0.09344146, 0.26480...",
1,1,Creole,"['roux', 'spice', 'onions', 'celery', 'peppers...","roux, spice, onion, celery, pepper, garlic, ca...","[[0.030734919, 0.05719392, -0.38024616, 0.3099...",
2,2,Caribbean,"['allspice', 'peppers', 'garlic', 'rum', 'jerk...","allspice, pepper, garlic, rum, jerk, corn, pla...","[[0.11249744, -0.28831434, 0.16757697, 0.18586...",
3,3,Chinese,"['soy', 'oil', 'sesame', 'garlic', 'ginger', '...","shoyu, oil, sesame, garlic, ginger, chestnut, ...","[[-0.2989276, 0.105471276, -0.069953315, 0.046...",
4,4,French,"['butter', 'shallots', 'onions', 'celery', 'ca...","butter, shallot, onion, celery, carrot, thyme,...","[[-0.110827886, 0.017259488, 0.028557366, 0.02...",


In [44]:
flavor_df.head()

Unnamed: 0.1,Unnamed: 0,Element,Ingredients,ingredients_mapped,Embeddings,Missing_Ingredients
0,0,Sour,"['lemon', 'lime', 'orange', 'vinegar', 'tomato...","lemon, lime, orange, vinegar, tomato, pickle, ...","[[-0.23929013, 0.0592715, -0.09344146, 0.26480...",
1,1,Sweet,"['carrots', 'sweetpotatoes', 'corn', 'butternu...","carrot, sweet_potato, corn, butter, sugar, fen...","[[0.030734919, 0.05719392, -0.38024616, 0.3099...",
2,2,Salty,"['kosher', 'sea', 'fishsauce', 'soy', 'seaweed...","kosher_salt, steak, fish_sauce, shoyu, seed, p...","[[0.11249744, -0.28831434, 0.16757697, 0.18586...",
3,3,Spicy,"['hotsauce', 'wasabi', 'horseradish', 'dijon',...","hot_sauce, wasabi, horseradish, onion, harissa...","[[-0.2989276, 0.105471276, -0.069953315, 0.046...",
4,4,Umami,"['mushrooms', 'bacon', 'meats', 'soy', 'tomato...","mushroom, bacon, meat, shoyu, tomato, anchovy,...","[[-0.110827886, 0.017259488, 0.028557366, 0.02...",


In [45]:
# Save
#df_cleaned[['ingredients_mapped', 'Missing_Ingredients']].to_csv('Flavor_embeded.csv')

In [46]:
df_cleaned.head()

Unnamed: 0.1,Unnamed: 0,restaurant_id,menu_type,section,menu_item,description,price,dietary_prefences,source,image_name,ingredients,ingredients_mapped,Embeddings,Missing_Ingredients
0,0,laylak_toronto,,Cold Dips,Hummus,Chickpea tahini lemon,17,,,,"['chickpeas', 'tahini', 'lemon juice', 'olive ...","chickpea, tahini, lemon_juice, olive_oil, garl...","[[0.0121464785, 0.18054625, -0.07192151, -0.19...",
1,1,laylak_toronto,,Cold Dips,Baba Ghanoush,Fire roasted eggplant tahini lemon,17,,,,"['eggplant', 'tahini', 'lemon juice', 'olive o...","eggplant, tahini, lemon_juice, olive_oil, garl...","[[0.20060113, 0.15472414, -0.32121348, -0.0977...",
2,2,laylak_toronto,,Cold Dips,Mohamarah,"Fire roasted red pepper, Aleppo pepper, walnut",17,,,,"['roasted red peppers', 'aleppo pepper', 'waln...","roasted_red_pepper, aleppo_pepper, walnut, bre...","[[-0.3076723, -0.08307011, -0.14726506, 0.1041...",
3,3,laylak_toronto,,Cold Dips,Labneh Bel Toum,Strained yogurt mint garlic,17,,,,"['strained yogurt labneh', 'fresh mint', 'garl...","strained yogurt labneh, of_fresh_mint, garlic,...","[[-0.3328095, -0.124348424, 0.04044519, -0.085...",[strained yogurt labneh]
4,4,laylak_toronto,,Cold Dips,Trio,Choice of 3 dips,22,,,,"['combination of hummus', 'baba ghanoush', 'an...","combination of hummus, baba ghanoush, and moha...",[],"[combination of hummus, baba ghanoush, and moh..."


In [47]:
sum(df_cleaned['Missing_Ingredients'].notna())

160

In [48]:
#df_cleaned[['Ingredients','Embeddings']].to_pickle('Ingredients_Only_Cleaned_Embeddings.pkl')

In [49]:
df_cleaned[['ingredients', 'ingredients_mapped', 'Missing_Ingredients']].to_csv('Missing_Ingredients3.csv', index=False)