In [1]:
# Imports
import pandas as pd
import numpy as np

import ast
import re

from sentence_transformers import SentenceTransformer

In [2]:
df = pd.read_csv('../data/recipes_cleaned.csv')
df.head()

Unnamed: 0,title,intro,prep_time,cook_time,total_time,servings,ingredients,recipe_url,calories,fat,carbs,protein
0,French Silk Pie Bars,french silk pie bar sooo good consist chocolat...,40,20.0,300.0,16.0,"['butter melted', 'white sugar', 'finely crush...",https://www.allrecipes.com/french-silk-pie-bar...,405,31,28,5
1,No Bake Espresso Martini Cheesecakes,bake espresso martini cheesecakes base layer c...,20,5.0,25.0,6.0,"['dark chocolate chips', 'creme-filled chocola...",https://www.allrecipes.com/no-bake-espresso-ma...,1058,65,113,10
2,Blackout Cake,blackout cake moist tender cake deep cocoa fla...,40,20.0,135.0,12.0,"['cooking spray', 'all-purpose flour', 'white ...",https://www.allrecipes.com/blackout-cake-recip...,824,55,80,9
3,Sleeping Gingerbread Treats,shhhh sleeping,15,15.0,30.0,9.0,"['of prepared puff pastry thawed', 'chocolate ...",https://www.allrecipes.com/sleeping-gingerbrea...,231,12,29,3
4,Little Debbie Brownie Tree Dip,turn favorite sweet treat good holiday snack,15,0.0,135.0,6.0,"['little debbie® christmas tree brownies', 'cr...",https://www.allrecipes.com/little-debbie-brown...,534,31,61,6


In [3]:
df['ingredients'][0]

"['butter melted', 'white sugar', 'finely crushed chocolate or regular graham crackers', 'bittersweet chocolate', 'large eggs at room temperature', 'white sugar', 'packed brown sugar', 'water at room temperature', 'salt', 'vanilla extract', 'unsalted butter at room temperature cut into pieces', 'heavy cream', 'cream cheese softened', 'white sugar', 'vanilla extract', 'salt', 'heavy cream', 'chocolate curls or chocolate sprinkles']"

In [4]:
df['ingredients_list'] = df['ingredients'].apply(ast.literal_eval)

In [5]:
# Load a pretrained embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')  # Good balance of speed and performance

# Turn stringified list into actual list
df['ingredients_list'] = df['ingredients'].apply(ast.literal_eval)

# Join list into a single string for each recipe
df['ingredients_text'] = df['ingredients_list'].apply(lambda x: ', '.join(x))

# Generate embeddings
df['embedding'] = df['ingredients_text'].apply(lambda x: model.encode(x))

In [6]:
df.head()

Unnamed: 0,title,intro,prep_time,cook_time,total_time,servings,ingredients,recipe_url,calories,fat,carbs,protein,ingredients_list,ingredients_text,embedding
0,French Silk Pie Bars,french silk pie bar sooo good consist chocolat...,40,20.0,300.0,16.0,"['butter melted', 'white sugar', 'finely crush...",https://www.allrecipes.com/french-silk-pie-bar...,405,31,28,5,"[butter melted, white sugar, finely crushed ch...","butter melted, white sugar, finely crushed cho...","[-0.045996264, -0.06792308, 0.06503015, 0.0440..."
1,No Bake Espresso Martini Cheesecakes,bake espresso martini cheesecakes base layer c...,20,5.0,25.0,6.0,"['dark chocolate chips', 'creme-filled chocola...",https://www.allrecipes.com/no-bake-espresso-ma...,1058,65,113,10,"[dark chocolate chips, creme-filled chocolate ...","dark chocolate chips, creme-filled chocolate s...","[-0.082189634, -0.037129056, 0.019682711, 0.07..."
2,Blackout Cake,blackout cake moist tender cake deep cocoa fla...,40,20.0,135.0,12.0,"['cooking spray', 'all-purpose flour', 'white ...",https://www.allrecipes.com/blackout-cake-recip...,824,55,80,9,"[cooking spray, all-purpose flour, white sugar...","cooking spray, all-purpose flour, white sugar,...","[-0.017987631, -0.013042789, 0.06342994, 0.006..."
3,Sleeping Gingerbread Treats,shhhh sleeping,15,15.0,30.0,9.0,"['of prepared puff pastry thawed', 'chocolate ...",https://www.allrecipes.com/sleeping-gingerbrea...,231,12,29,3,"[of prepared puff pastry thawed, chocolate squ...","of prepared puff pastry thawed, chocolate squa...","[0.0102335075, 0.030051216, 0.043988287, 0.081..."
4,Little Debbie Brownie Tree Dip,turn favorite sweet treat good holiday snack,15,0.0,135.0,6.0,"['little debbie® christmas tree brownies', 'cr...",https://www.allrecipes.com/little-debbie-brown...,534,31,61,6,"[little debbie® christmas tree brownies, cream...","little debbie® christmas tree brownies, cream ...","[0.0012255397, -0.041548047, 0.04948033, 0.000..."


In [7]:
from sklearn.metrics.pairwise import cosine_similarity

# Assuming `df['embedding']` contains your embeddings
embeddings = np.vstack(df['embedding'].values)  # Convert the list of embeddings into a 2D numpy array

# Calculate the cosine similarity matrix
similarity_matrix = cosine_similarity(embeddings)

# Convert it into a DataFrame for easier inspection
similarity_df = pd.DataFrame(similarity_matrix, index=df['title'], columns=df['title'])

# View the most similar recipes for a specific recipe (e.g., "Recipe A")
title = "French Silk Pie Bars"
most_similar_recipes = similarity_df[title].sort_values(ascending=False).head(6)  # Top 5 similar recipes
print(most_similar_recipes)

title
French Silk Pie Bars                    1.000000
Funfetti Cheesecake Sandwich Cookies    0.872046
S'Mores Cupcakes                        0.872032
Lemon Crumb Bars                        0.870518
Christmas Cheesecake Bars               0.869854
Tiramisu Blondies                       0.868965
Name: French Silk Pie Bars, dtype: float32


In [8]:
# Create categories for total_time
def categorize_time(minutes):
    if minutes <= 25:
        return 'Quick and Easy!'
    elif minutes <= 60:
        return 'Hour or Less'
    else:
        return 'Live in the Kitchen'

df['time_level'] = df['total_time'].apply(categorize_time)

In [9]:
df.head()

Unnamed: 0,title,intro,prep_time,cook_time,total_time,servings,ingredients,recipe_url,calories,fat,carbs,protein,ingredients_list,ingredients_text,embedding,time_level
0,French Silk Pie Bars,french silk pie bar sooo good consist chocolat...,40,20.0,300.0,16.0,"['butter melted', 'white sugar', 'finely crush...",https://www.allrecipes.com/french-silk-pie-bar...,405,31,28,5,"[butter melted, white sugar, finely crushed ch...","butter melted, white sugar, finely crushed cho...","[-0.045996264, -0.06792308, 0.06503015, 0.0440...",Live in the Kitchen
1,No Bake Espresso Martini Cheesecakes,bake espresso martini cheesecakes base layer c...,20,5.0,25.0,6.0,"['dark chocolate chips', 'creme-filled chocola...",https://www.allrecipes.com/no-bake-espresso-ma...,1058,65,113,10,"[dark chocolate chips, creme-filled chocolate ...","dark chocolate chips, creme-filled chocolate s...","[-0.082189634, -0.037129056, 0.019682711, 0.07...",Quick and Easy!
2,Blackout Cake,blackout cake moist tender cake deep cocoa fla...,40,20.0,135.0,12.0,"['cooking spray', 'all-purpose flour', 'white ...",https://www.allrecipes.com/blackout-cake-recip...,824,55,80,9,"[cooking spray, all-purpose flour, white sugar...","cooking spray, all-purpose flour, white sugar,...","[-0.017987631, -0.013042789, 0.06342994, 0.006...",Live in the Kitchen
3,Sleeping Gingerbread Treats,shhhh sleeping,15,15.0,30.0,9.0,"['of prepared puff pastry thawed', 'chocolate ...",https://www.allrecipes.com/sleeping-gingerbrea...,231,12,29,3,"[of prepared puff pastry thawed, chocolate squ...","of prepared puff pastry thawed, chocolate squa...","[0.0102335075, 0.030051216, 0.043988287, 0.081...",Hour or Less
4,Little Debbie Brownie Tree Dip,turn favorite sweet treat good holiday snack,15,0.0,135.0,6.0,"['little debbie® christmas tree brownies', 'cr...",https://www.allrecipes.com/little-debbie-brown...,534,31,61,6,"[little debbie® christmas tree brownies, cream...","little debbie® christmas tree brownies, cream ...","[0.0012255397, -0.041548047, 0.04948033, 0.000...",Live in the Kitchen


In [10]:
# Create categories for calories
def categorize_calories(cals):
    if cals <= 300:
        return "Low"
    elif cals <= 600:
        return "Medium"
    else:
        return "High"

df['calorie_level'] = df['calories'].apply(categorize_calories)

In [11]:
def categorize_protein(p):
    if p <= 25:
        return "Low"
    elif p <= 40:
        return "Average"
    else:
        return "High"

df['protein_level'] = df['protein'].apply(categorize_protein)

In [12]:
def categorize_carbs(c):
    if c <= 40:
        return "Low"
    elif c <= 75:
        return "Average"
    else:
        return "High"

df['carb_level'] = df['carbs'].apply(categorize_carbs)

In [13]:
def categorize_fat(f):
    if f <= 15:
        return "Low"
    elif f <= 30:
        return "Average"
    else:
        return "High"

df['fat_level'] = df['fat'].apply(categorize_fat)

In [14]:
df.head()

Unnamed: 0,title,intro,prep_time,cook_time,total_time,servings,ingredients,recipe_url,calories,fat,carbs,protein,ingredients_list,ingredients_text,embedding,time_level,calorie_level,protein_level,carb_level,fat_level
0,French Silk Pie Bars,french silk pie bar sooo good consist chocolat...,40,20.0,300.0,16.0,"['butter melted', 'white sugar', 'finely crush...",https://www.allrecipes.com/french-silk-pie-bar...,405,31,28,5,"[butter melted, white sugar, finely crushed ch...","butter melted, white sugar, finely crushed cho...","[-0.045996264, -0.06792308, 0.06503015, 0.0440...",Live in the Kitchen,Medium,Low,Low,High
1,No Bake Espresso Martini Cheesecakes,bake espresso martini cheesecakes base layer c...,20,5.0,25.0,6.0,"['dark chocolate chips', 'creme-filled chocola...",https://www.allrecipes.com/no-bake-espresso-ma...,1058,65,113,10,"[dark chocolate chips, creme-filled chocolate ...","dark chocolate chips, creme-filled chocolate s...","[-0.082189634, -0.037129056, 0.019682711, 0.07...",Quick and Easy!,High,Low,High,High
2,Blackout Cake,blackout cake moist tender cake deep cocoa fla...,40,20.0,135.0,12.0,"['cooking spray', 'all-purpose flour', 'white ...",https://www.allrecipes.com/blackout-cake-recip...,824,55,80,9,"[cooking spray, all-purpose flour, white sugar...","cooking spray, all-purpose flour, white sugar,...","[-0.017987631, -0.013042789, 0.06342994, 0.006...",Live in the Kitchen,High,Low,High,High
3,Sleeping Gingerbread Treats,shhhh sleeping,15,15.0,30.0,9.0,"['of prepared puff pastry thawed', 'chocolate ...",https://www.allrecipes.com/sleeping-gingerbrea...,231,12,29,3,"[of prepared puff pastry thawed, chocolate squ...","of prepared puff pastry thawed, chocolate squa...","[0.0102335075, 0.030051216, 0.043988287, 0.081...",Hour or Less,Low,Low,Low,Low
4,Little Debbie Brownie Tree Dip,turn favorite sweet treat good holiday snack,15,0.0,135.0,6.0,"['little debbie® christmas tree brownies', 'cr...",https://www.allrecipes.com/little-debbie-brown...,534,31,61,6,"[little debbie® christmas tree brownies, cream...","little debbie® christmas tree brownies, cream ...","[0.0012255397, -0.041548047, 0.04948033, 0.000...",Live in the Kitchen,Medium,Low,Average,High


In [15]:
# View the most similar recipes for a specific recipe (e.g., "Recipe A")
title = "Steak Burrito"
most_similar_recipes = similarity_df[title].sort_values(ascending=False).head(6)  # Top 5 similar recipes
print(most_similar_recipes)

title
Steak Burrito                        1.000000
Black Bean Tacos                     0.825561
Sheet Pan Quesadillas                0.824234
Shrimp Quesadillas                   0.823019
Everything Bagel Seared Tuna Bowl    0.812544
Lobster Tacos                        0.809856
Name: Steak Burrito, dtype: float32
