In [1]:
# Imports
import pandas as pd
import numpy as np

import ast
import re

In [2]:
df = pd.read_csv('../data/recipes_cleaned.csv')
df.head()

Unnamed: 0,title,intro,prep_time,cook_time,total_time,servings,ingredients,directions,recipe_url,calories,fat,carbs,protein,cleaned_ingredients,time_category,calorie_category,fat_category,carbs_category,protein_category
0,French Silk Pie Bars,These French silk pie bars are sooo good. They...,40.0,20.0,300,16.0,"['1/3 cup butter, melted', '1/4 cup white suga...",['Gather all ingredients.\n\n \n\n\n\n\n\n \n ...,https://www.allrecipes.com/french-silk-pie-bar...,405,31,28,5,"['butter', 'white sugar', 'chocolate graham cr...",Livin in the kitchin!,Average (300–600),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10)
1,No Bake Espresso Martini Cheesecakes,These no bake espresso martini cheesecakes hav...,20.0,5.0,25,6.0,"['1 cup dark chocolate chips', '20 creme-fille...",['Melt chocolate chips in a microwave-safe bow...,https://www.allrecipes.com/no-bake-espresso-ma...,1058,65,113,10,"['chocolate chips', 'cremefilled chocolate coo...",30 minutes or less!,Don't look! (>1000),High Fat! (> 25),High Carbs! (> 80),Low Protein! (< 10)
2,Blackout Cake,Blackout cake is a moist and tender cake with ...,40.0,20.0,135,12.0,"['cooking spray', '2 1/4 cups all-purpose flou...",['Gather all ingredients.\n\n \n\n\n\n\n\n \n ...,https://www.allrecipes.com/blackout-cake-recip...,824,55,80,9,"['cooking spray', 'allpurpose flour', 'white s...",Livin in the kitchin!,High (600–1000),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10)
3,Sleeping Gingerbread Treats,Shhhh they're sleeping!,15.0,15.0,30,9.0,"['1 sheet of prepared puff pastry, thawed', '9...",['Gather the ingredients. Preheat the oven to ...,https://www.allrecipes.com/sleeping-gingerbrea...,231,12,29,3,"['puff pastry', 'chocolate squares', 'gingerbr...",30 minutes or less!,Low cal (<300),Average Fat! (11-25),Average Carbs! (25-80),Low Protein! (< 10)
4,Little Debbie Brownie Tree Dip,Turn your favorite sweet treat into the best h...,15.0,,135,6.0,"['5 Little Debbie® Christmas Tree Brownies', '...","['Cut 4 brownies into small pieces, and set as...",https://www.allrecipes.com/little-debbie-brown...,534,31,61,6,"['little debbie® christmas tree brownies', 'cr...",Livin in the kitchin!,Average (300–600),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10)


In [3]:
df['cleaned_ingredients'][0]

"['butter', 'white sugar', 'chocolate graham crackers', 'chocolate', 'eggs', 'white sugar', 'brown sugar', 'water', 'salt', 'vanilla extract', 'unsalted butter', 'heavy cream', 'cream cheese', 'white sugar', 'vanilla extract', 'salt', 'heavy cream', 'chocolate sprinkles']"

### Lemmatanize

In [4]:
import ast
from nltk.stem import WordNetLemmatizer
import nltk

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to normalize and lemmatize each ingredient
def normalize_ingredient(ingredient):
    
    # Lemmatize each word in the ingredient
    words = ingredient.split()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
    
    return ' '.join(lemmatized_words)

# Convert string to a list of ingredients
df['cleaned_ingredients'] = df['cleaned_ingredients'].apply(ast.literal_eval)

# Now apply normalization and lemmatization to each ingredient in the list
df['normalized_ingredients'] = df['cleaned_ingredients'].apply(
    lambda ingr_list: [normalize_ingredient(i) for i in ingr_list]
)

# Example output
df['normalized_ingredients'].iloc[0]

['butter',
 'white sugar',
 'chocolate graham cracker',
 'chocolate',
 'egg',
 'white sugar',
 'brown sugar',
 'water',
 'salt',
 'vanilla extract',
 'unsalted butter',
 'heavy cream',
 'cream cheese',
 'white sugar',
 'vanilla extract',
 'salt',
 'heavy cream',
 'chocolate sprinkle']

In [5]:
def underscore_ingredients(ingredients):
    # Replace spaces in multi-word ingredients with underscores
    underscored = [ingredient.replace(' ', '_') for ingredient in ingredients]
    
    # Join into a space-separated string 
    return ' '.join(underscored)

# Apply the function to the 'normalized_ingredients' column
df['ingredients_str'] = df['normalized_ingredients'].apply(underscore_ingredients)

In [6]:
df['ingredients_str'][0]

'butter white_sugar chocolate_graham_cracker chocolate egg white_sugar brown_sugar water salt vanilla_extract unsalted_butter heavy_cream cream_cheese white_sugar vanilla_extract salt heavy_cream chocolate_sprinkle'

### TF-IDF
- tokenize ingredients

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [8]:
# Custom tokenizer that ensures multi-word ingredients remain with underscores
def custom_tokenizer(text):
    # Replace multi-word ingredient phrases with underscores and split by spaces
    return text.split(' ')

# Instantiate
tfidf = TfidfVectorizer(tokenizer = custom_tokenizer)

In [9]:
# Fit
tfidf_matrix = tfidf.fit_transform(df['ingredients_str'])



In [10]:
# Get the feature names (words in the vocabulary)
feature_names = tfidf.get_feature_names_out()

In [11]:
print(feature_names[:20])

['achiote_powder' 'active_yeast' 'adobo_sauce'
 'adobo_sauce_chipotle_pepper' 'adobo_seasoning' 'agave_nectar'
 'agave_syrup' 'aleppo_chile' 'aleppo_chili' 'aleppo_chili_any_chili'
 'aleppo_pepper' 'alfredo_sauce' 'all_purpose_flour' 'allpurpose_baking'
 'allpurpose_flour' 'allpurpose_flour_bread_flour'
 'allpurpose_flour_coating' 'allpurpose_flour_toss_berry'
 'allpurpose_flour_work_surface' 'allspice']


In [12]:
# Example pantry entry
user_input = ['butter', 'chocolate', 'white_sugar', 'eggs', 'vanilla_extract']
pantry_vector = tfidf.transform([' '.join(user_input)])

In [13]:
# Look at cosine similarity
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
cosine_sim = cosine_similarity(pantry_vector, tfidf_matrix)

In [15]:
cosine_sim

array([[0.58046792, 0.10284715, 0.20325993, ..., 0.        , 0.06620717,
        0.06576008]])

In [16]:
cosine_sim_flat = cosine_sim.flatten()

In [17]:
# Get the indices of the top 5 most similar recipes
top_n_indices = cosine_sim_flat.argsort()[::-1][:5]

In [18]:
top_recipes = df.iloc[top_n_indices]

In [19]:
top_recipes[['title', 'intro', 'recipe_url']]

Unnamed: 0,title,intro,recipe_url
0,French Silk Pie Bars,These French silk pie bars are sooo good. They...,https://www.allrecipes.com/french-silk-pie-bar...
20,Chocolate Mousse for Beginners,This chocolate mousse for beginners is the all...,https://www.allrecipes.com/chocolate-mousse-fo...
1594,Very Chocolate Ice Cream,"This chocolate ice cream is a rich, custard-st...",https://www.allrecipes.com/recipe/56803/very-c...
19,Banana Bread Brownies,These banana bread brownies are chocolate brow...,https://www.allrecipes.com/banana-bread-browni...
10,Chocolate-Graham Cracker S’mores Sliders,These baked s’mores sliders bring the campfire...,https://www.allrecipes.com/chocolate-graham-cr...


## Word2Vec Model (Chosen Model)

In [20]:
df.head()

Unnamed: 0,title,intro,prep_time,cook_time,total_time,servings,ingredients,directions,recipe_url,calories,...,carbs,protein,cleaned_ingredients,time_category,calorie_category,fat_category,carbs_category,protein_category,normalized_ingredients,ingredients_str
0,French Silk Pie Bars,These French silk pie bars are sooo good. They...,40.0,20.0,300,16.0,"['1/3 cup butter, melted', '1/4 cup white suga...",['Gather all ingredients.\n\n \n\n\n\n\n\n \n ...,https://www.allrecipes.com/french-silk-pie-bar...,405,...,28,5,"[butter, white sugar, chocolate graham cracker...",Livin in the kitchin!,Average (300–600),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10),"[butter, white sugar, chocolate graham cracker...",butter white_sugar chocolate_graham_cracker ch...
1,No Bake Espresso Martini Cheesecakes,These no bake espresso martini cheesecakes hav...,20.0,5.0,25,6.0,"['1 cup dark chocolate chips', '20 creme-fille...",['Melt chocolate chips in a microwave-safe bow...,https://www.allrecipes.com/no-bake-espresso-ma...,1058,...,113,10,"[chocolate chips, cremefilled chocolate cookie...",30 minutes or less!,Don't look! (>1000),High Fat! (> 25),High Carbs! (> 80),Low Protein! (< 10),"[chocolate chip, cremefilled chocolate cooky, ...",chocolate_chip cremefilled_chocolate_cooky but...
2,Blackout Cake,Blackout cake is a moist and tender cake with ...,40.0,20.0,135,12.0,"['cooking spray', '2 1/4 cups all-purpose flou...",['Gather all ingredients.\n\n \n\n\n\n\n\n \n ...,https://www.allrecipes.com/blackout-cake-recip...,824,...,80,9,"[cooking spray, allpurpose flour, white sugar,...",Livin in the kitchin!,High (600–1000),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10),"[cooking spray, allpurpose flour, white sugar,...",cooking_spray allpurpose_flour white_sugar coc...
3,Sleeping Gingerbread Treats,Shhhh they're sleeping!,15.0,15.0,30,9.0,"['1 sheet of prepared puff pastry, thawed', '9...",['Gather the ingredients. Preheat the oven to ...,https://www.allrecipes.com/sleeping-gingerbrea...,231,...,29,3,"[puff pastry, chocolate squares, gingerbread m...",30 minutes or less!,Low cal (<300),Average Fat! (11-25),Average Carbs! (25-80),Low Protein! (< 10),"[puff pastry, chocolate square, gingerbread me...",puff_pastry chocolate_square gingerbread_men_c...
4,Little Debbie Brownie Tree Dip,Turn your favorite sweet treat into the best h...,15.0,,135,6.0,"['5 Little Debbie® Christmas Tree Brownies', '...","['Cut 4 brownies into small pieces, and set as...",https://www.allrecipes.com/little-debbie-brown...,534,...,61,6,"[little debbie® christmas tree brownies, cream...",Livin in the kitchin!,Average (300–600),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10),"[little debbie® christmas tree brownie, cream ...",little_debbie®_christmas_tree_brownie cream_ch...


In [21]:
# Split the ingredients string into tokens (words)
df['tokenized_ingredients'] = df['ingredients_str'].apply(lambda x: x.split())

# Check the tokenized ingredients
df['tokenized_ingredients'].head()

0    [butter, white_sugar, chocolate_graham_cracker...
1    [chocolate_chip, cremefilled_chocolate_cooky, ...
2    [cooking_spray, allpurpose_flour, white_sugar,...
3    [puff_pastry, chocolate_square, gingerbread_me...
4    [little_debbie®_christmas_tree_brownie, cream_...
Name: tokenized_ingredients, dtype: object

In [22]:
from gensim.models import Word2Vec # learns word vectors (embeddings) based on data (clean_ingredients)

In [23]:
# Train the Word2Vec model on tokenized ingredients
model = Word2Vec(sentences=df['tokenized_ingredients'], vector_size=100, window=5, min_count=1, workers=4)

In [24]:
# View vocabulary to see which words are included
vocab = list(model.wv.index_to_key)
vocab[:10] # Display first 10 words 

['salt',
 'garlic',
 'black_pepper',
 'olive_oil',
 'onion',
 'egg',
 'water',
 'butter',
 'soy_sauce',
 'white_sugar']

In [25]:
word_vector = model.wv['butter']
word_vector

array([-0.08202236,  0.2144164 ,  0.21778657,  0.10251838,  0.07281725,
       -0.7517202 ,  0.15523085,  0.90929025, -0.3727912 , -0.25988725,
        0.0127749 , -0.64406437,  0.1695776 ,  0.36813936,  0.12516041,
       -0.25998142,  0.05710654, -0.46497715,  0.13425374, -1.0114855 ,
        0.40225035,  0.17786236,  0.05830863, -0.2232007 , -0.19830573,
        0.1025289 , -0.26110822, -0.11110315, -0.27451304,  0.12529112,
        0.41336155, -0.01048626,  0.03757157, -0.26441035, -0.14755023,
        0.5289455 ,  0.11161505, -0.11893066, -0.03982544, -0.74318975,
        0.09803057, -0.394326  , -0.22621508,  0.09455436,  0.38722375,
       -0.13427782, -0.31455866, -0.10174282,  0.11715883, -0.01273359,
        0.23353417, -0.3481821 , -0.07799574,  0.01579941, -0.19922987,
        0.19573863,  0.20306647, -0.08925352, -0.3952784 ,  0.06074984,
        0.03943221, -0.01829366,  0.11814557, -0.08226629, -0.53122765,
        0.5122635 ,  0.22844604,  0.4456882 , -0.57864934,  0.43

In [26]:
similar_words = model.wv.most_similar('butter', topn=5)
similar_words

[('cilantro', 0.9997397065162659),
 ('salt', 0.999713122844696),
 ('water', 0.9996798038482666),
 ('unsalted_butter', 0.9996736645698547),
 ('onion', 0.99966961145401)]

In [27]:
# Function to compute average vector for each recipe
def get_average_embedding(tokens, model):
    vectors = [model.wv[word] for word in tokens if word in model.wv]
    if vectors:
        return np.mean(vectors, axis=0)
    else:
        return np.zeros(model.vector_size)

# Apply to each row
df['embedding'] = df['tokenized_ingredients'].apply(lambda tokens: get_average_embedding(tokens, model))

In [28]:
from numpy.linalg import norm

# Stack recipe vectors into a matrix
recipe_vectors = np.vstack(df['embedding'].values)

In [29]:
# Compute pairwise cosine similarity
cosine_sim = cosine_similarity(recipe_vectors)

In [30]:
# Example user ingredients
user_ingredients = ['butter', 'chocolate', 'white_sugar', 'eggs', 'vanilla_extract']
user_vector = get_average_embedding(user_ingredients, model)

In [31]:
user_vector = user_vector.reshape(1, -1)
similarities = cosine_similarity(user_vector, recipe_vectors)[0]

In [32]:
# Get indices of top 5 matches
top_indices = similarities.argsort()[::-1][:5]

# Show top recipes
top_recipes = df.iloc[top_indices][['title', 'ingredients_str']]
print(top_recipes)

                                        title  \
1488  Cheesecake-Stuffed Snickerdoodle Cookie   
1453                 Banoffee Cheesecake Bars   
0                        French Silk Pie Bars   
55           Chocolate Chocolate Chip Cookies   
53                            No-Bake Cookies   

                                        ingredients_str  
1488  cream_cheese white_sugar butter brown_sugar wh...  
1453  graham_cracker white_sugar butter butter brown...  
0     butter white_sugar chocolate_graham_cracker ch...  
55    white_sugar butter egg vanilla_extract allpurp...  
53    white_sugar butter milk cocoa_powder peanut_bu...  


### Ingredient Substitution Matching

In [33]:
df['ingredients_str']

0       butter white_sugar chocolate_graham_cracker ch...
1       chocolate_chip cremefilled_chocolate_cooky but...
2       cooking_spray allpurpose_flour white_sugar coc...
3       puff_pastry chocolate_square gingerbread_men_c...
4       little_debbie®_christmas_tree_brownie cream_ch...
                              ...                        
1865    milk allpurpose_flour egg vegetable_oil almond...
1866    beef_chuck salt black_pepper hickory_smoked_ba...
1867    avocado rom_tomato shallot jalapeno_pepper lem...
1868    french_bread_dough olive_oil potato smoked_bac...
1869    allpurpose_flour salt black_pepper stew_meat b...
Name: ingredients_str, Length: 1870, dtype: object

In [34]:
def find_substitutable_matches_for_df(df, user_pantry):
    
    def find_substitutable_matches(recipe_ingredients, user_pantry):
        matches = []
        missing = []
        
        recipe_list = recipe_ingredients.split()  # Split ingredients into words
        
        for ingredient in recipe_list:
            if ingredient in user_pantry:
                matches.append((ingredient, ingredient))  # Perfect match
            else:
                # Check word-level overlap
                ingredient_words = set(ingredient.split('_'))
                found = False
                for pantry_item in user_pantry:
                    pantry_words = set(pantry_item.split('_'))
                    if ingredient_words & pantry_words:  # Overlap between words
                        matches.append((ingredient, pantry_item))  # Substitution match
                        found = True
                        break
                if not found:
                    missing.append(ingredient)  # No match or substitution
        
        return matches, missing
    
    # Apply the substitution matching to each recipe in the dataframe
    df['matches'], df['missing'] = zip(*df['ingredients_str'].apply(lambda x: find_substitutable_matches(x, user_pantry)))
    
    return df

In [35]:
# Example user pantry
user_pantry = ['butter', 'sugar', 'almond_milk', 'egg', 'cheese']

# Apply the substitution logic to the dataframe
#df_with_substitutions = find_substitutable_matches_for_df(df, user_pantry)

# df_with_substitutions[['title','ingredients_str', 'matches', 'missing']].head()

In [36]:
df.head()

Unnamed: 0,title,intro,prep_time,cook_time,total_time,servings,ingredients,directions,recipe_url,calories,...,cleaned_ingredients,time_category,calorie_category,fat_category,carbs_category,protein_category,normalized_ingredients,ingredients_str,tokenized_ingredients,embedding
0,French Silk Pie Bars,These French silk pie bars are sooo good. They...,40.0,20.0,300,16.0,"['1/3 cup butter, melted', '1/4 cup white suga...",['Gather all ingredients.\n\n \n\n\n\n\n\n \n ...,https://www.allrecipes.com/french-silk-pie-bar...,405,...,"[butter, white sugar, chocolate graham cracker...",Livin in the kitchin!,Average (300–600),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10),"[butter, white sugar, chocolate graham cracker...",butter white_sugar chocolate_graham_cracker ch...,"[butter, white_sugar, chocolate_graham_cracker...","[-0.07107284, 0.17288677, 0.16692406, 0.079809..."
1,No Bake Espresso Martini Cheesecakes,These no bake espresso martini cheesecakes hav...,20.0,5.0,25,6.0,"['1 cup dark chocolate chips', '20 creme-fille...",['Melt chocolate chips in a microwave-safe bow...,https://www.allrecipes.com/no-bake-espresso-ma...,1058,...,"[chocolate chips, cremefilled chocolate cookie...",30 minutes or less!,Don't look! (>1000),High Fat! (> 25),High Carbs! (> 80),Low Protein! (< 10),"[chocolate chip, cremefilled chocolate cooky, ...",chocolate_chip cremefilled_chocolate_cooky but...,"[chocolate_chip, cremefilled_chocolate_cooky, ...","[-0.032688417, 0.0794256, 0.07708426, 0.035861..."
2,Blackout Cake,Blackout cake is a moist and tender cake with ...,40.0,20.0,135,12.0,"['cooking spray', '2 1/4 cups all-purpose flou...",['Gather all ingredients.\n\n \n\n\n\n\n\n \n ...,https://www.allrecipes.com/blackout-cake-recip...,824,...,"[cooking spray, allpurpose flour, white sugar,...",Livin in the kitchin!,High (600–1000),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10),"[cooking spray, allpurpose flour, white sugar,...",cooking_spray allpurpose_flour white_sugar coc...,"[cooking_spray, allpurpose_flour, white_sugar,...","[-0.060761597, 0.14692679, 0.1416942, 0.070401..."
3,Sleeping Gingerbread Treats,Shhhh they're sleeping!,15.0,15.0,30,9.0,"['1 sheet of prepared puff pastry, thawed', '9...",['Gather the ingredients. Preheat the oven to ...,https://www.allrecipes.com/sleeping-gingerbrea...,231,...,"[puff pastry, chocolate squares, gingerbread m...",30 minutes or less!,Low cal (<300),Average Fat! (11-25),Average Carbs! (25-80),Low Protein! (< 10),"[puff pastry, chocolate square, gingerbread me...",puff_pastry chocolate_square gingerbread_men_c...,"[puff_pastry, chocolate_square, gingerbread_me...","[-0.037950676, 0.09743297, 0.08909875, 0.03889..."
4,Little Debbie Brownie Tree Dip,Turn your favorite sweet treat into the best h...,15.0,,135,6.0,"['5 Little Debbie® Christmas Tree Brownies', '...","['Cut 4 brownies into small pieces, and set as...",https://www.allrecipes.com/little-debbie-brown...,534,...,"[little debbie® christmas tree brownies, cream...",Livin in the kitchin!,Average (300–600),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10),"[little debbie® christmas tree brownie, cream ...",little_debbie®_christmas_tree_brownie cream_ch...,"[little_debbie®_christmas_tree_brownie, cream_...","[-0.019250043, 0.04749311, 0.046183024, 0.0218..."


In [37]:
df.drop(columns = ['prep_time', 'cook_time', 'cleaned_ingredients', 'normalized_ingredients'], inplace = True)

In [38]:
model_df = df

In [39]:
model_df.head()

Unnamed: 0,title,intro,total_time,servings,ingredients,directions,recipe_url,calories,fat,carbs,protein,time_category,calorie_category,fat_category,carbs_category,protein_category,ingredients_str,tokenized_ingredients,embedding
0,French Silk Pie Bars,These French silk pie bars are sooo good. They...,300,16.0,"['1/3 cup butter, melted', '1/4 cup white suga...",['Gather all ingredients.\n\n \n\n\n\n\n\n \n ...,https://www.allrecipes.com/french-silk-pie-bar...,405,31,28,5,Livin in the kitchin!,Average (300–600),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10),butter white_sugar chocolate_graham_cracker ch...,"[butter, white_sugar, chocolate_graham_cracker...","[-0.07107284, 0.17288677, 0.16692406, 0.079809..."
1,No Bake Espresso Martini Cheesecakes,These no bake espresso martini cheesecakes hav...,25,6.0,"['1 cup dark chocolate chips', '20 creme-fille...",['Melt chocolate chips in a microwave-safe bow...,https://www.allrecipes.com/no-bake-espresso-ma...,1058,65,113,10,30 minutes or less!,Don't look! (>1000),High Fat! (> 25),High Carbs! (> 80),Low Protein! (< 10),chocolate_chip cremefilled_chocolate_cooky but...,"[chocolate_chip, cremefilled_chocolate_cooky, ...","[-0.032688417, 0.0794256, 0.07708426, 0.035861..."
2,Blackout Cake,Blackout cake is a moist and tender cake with ...,135,12.0,"['cooking spray', '2 1/4 cups all-purpose flou...",['Gather all ingredients.\n\n \n\n\n\n\n\n \n ...,https://www.allrecipes.com/blackout-cake-recip...,824,55,80,9,Livin in the kitchin!,High (600–1000),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10),cooking_spray allpurpose_flour white_sugar coc...,"[cooking_spray, allpurpose_flour, white_sugar,...","[-0.060761597, 0.14692679, 0.1416942, 0.070401..."
3,Sleeping Gingerbread Treats,Shhhh they're sleeping!,30,9.0,"['1 sheet of prepared puff pastry, thawed', '9...",['Gather the ingredients. Preheat the oven to ...,https://www.allrecipes.com/sleeping-gingerbrea...,231,12,29,3,30 minutes or less!,Low cal (<300),Average Fat! (11-25),Average Carbs! (25-80),Low Protein! (< 10),puff_pastry chocolate_square gingerbread_men_c...,"[puff_pastry, chocolate_square, gingerbread_me...","[-0.037950676, 0.09743297, 0.08909875, 0.03889..."
4,Little Debbie Brownie Tree Dip,Turn your favorite sweet treat into the best h...,135,6.0,"['5 Little Debbie® Christmas Tree Brownies', '...","['Cut 4 brownies into small pieces, and set as...",https://www.allrecipes.com/little-debbie-brown...,534,31,61,6,Livin in the kitchin!,Average (300–600),High Fat! (> 25),Average Carbs! (25-80),Low Protein! (< 10),little_debbie®_christmas_tree_brownie cream_ch...,"[little_debbie®_christmas_tree_brownie, cream_...","[-0.019250043, 0.04749311, 0.046183024, 0.0218..."


In [40]:
df.to_csv('../data/model_df.csv', index=False)

In [41]:
model_df.shape

(1870, 19)

In [42]:
model_df.dtypes

title                     object
intro                     object
total_time                 int64
servings                 float64
ingredients               object
directions                object
recipe_url                object
calories                   int64
fat                        int64
carbs                      int64
protein                    int64
time_category             object
calorie_category          object
fat_category              object
carbs_category            object
protein_category          object
ingredients_str           object
tokenized_ingredients     object
embedding                 object
dtype: object