In [1]:
import pandas as pd

In [2]:
from sklearn.feature_extraction.text import CountVectorizer
import ast

In [3]:
df = pd.read_csv('RAW_recipes_cleaned.csv')

In [4]:
import pandas as pd

nodes_df = pd.read_csv('nodes.csv')
print(nodes_df.head())
print(nodes_df.columns)

   node_id                    name  id   node_type  is_hub
0        0       1%_fat_buttermilk NaN  ingredient  no_hub
1        1   1%_fat_cottage_cheese NaN  ingredient  no_hub
2        3               10%_cream NaN  ingredient  no_hub
3        4               100%_bran NaN  ingredient  no_hub
4        5  10_inch_flour_tortilla NaN  ingredient  no_hub
Index(['node_id', 'name', 'id', 'node_type', 'is_hub'], dtype='object')


In [5]:
id2name = dict(zip(nodes_df['node_id'].astype(str), nodes_df['name'].str.lower()))
name2id = dict(zip(nodes_df['name'].str.lower(), nodes_df['node_id'].astype(str)))

In [6]:
import re

def normalize_ingredient(ingredient):
    ingredient = ingredient.lower()
    ingredient = re.sub(r'\bfresh\b|\bdried\b|\bchopped\b|\bsliced\b|\bgrated\b|\bminced\b|\bpowdered\b|\bfrozen\b', '', ingredient)
    ingredient = ingredient.strip()
    if ingredient.endswith('s') and not ingredient.endswith('ss'):
        ingredient = ingredient[:-1]
    ingredient = ingredient.replace(' ', '_')
    return ingredient

In [7]:
flavor_labels = pd.read_csv('ingredient-flavor.csv')
print(flavor_labels.head())

   index      ingredient  spicy  sweet  umami  sour  salty
0      1            Salt   0.00    0.0    0.0   0.0    1.0
1      2           Sugar   0.00    1.0    0.0   0.0    0.0
2      3    Black Pepper   0.70    0.0    0.0   0.0    0.0
3      4  Cayenne Pepper   0.95    0.0    0.0   0.0    0.0
4      5    Chili Powder   0.80    0.0    0.0   0.0    0.0


In [8]:
ingredient_flavor_map = {}
for _, row in flavor_labels.iterrows():
    key = str(row['ingredient']).strip().lower() # safer for float/NaN
    ingredient_flavor_map[key] = {
        "spicy": row['spicy'],
        "sweet": row['sweet'],
        "umami": row['umami'],
        "sour": row['sour'],
        "salty": row['salty']
    }

In [9]:
def compose_flavor_profile(ingredient_list):
    profile = {"spicy": 0, "sweet": 0, "umami": 0, "sour": 0, "salty": 0}
    count = 0
    for ing in ingredient_list:
        ing_norm = ing.strip().lower()
        if ing_norm in ingredient_flavor_map:
            for key in profile:
                profile[key] += ingredient_flavor_map[ing_norm][key]
            count += 1
    if count > 0:
        for key in profile:
            profile[key] = round(profile[key]/count, 2)
    return profile

In [10]:
import ast

df['flavor_profile'] = df['ingredients'].apply(
    lambda x: compose_flavor_profile(ast.literal_eval(x))
)
print(df[['name', 'flavor_profile']].head(20))

                                          name  \
0   arriba   baked winter squash mexican style   
1             a bit different  breakfast pizza   
2                    all in the kitchen  chili   
3                           alouette  potatoes   
4           amish  tomato ketchup  for canning   
5                      apple a day  milk shake   
6                        aww  marinated olives   
7               backyard style  barbecued ribs   
8                     bananas 4 ice cream  pie   
9                      beat this  banana bread   
10                 berry  good sandwich spread   
11               better than sex  strawberries   
12             better then bush s  baked beans   
13                  boat house  collard greens   
14                     calm your nerves  tonic   
15            chicken lickin  good  pork chops   
16                              chile rellenos   
17                              chinese  candy   
18                          chinese  chop suey   


In [11]:
df.to_csv('recipes_with_flavour_profiles.csv', index=False)