# Recipe Recommendation System

#### This script processes a recipe dataset to implement Content-Based Filtering, Collaborative Filtering, and Knowledge-Based Filtering. It provides personalized recipe recommendations based on user preferences.

In [1]:
# Importing Necessary Libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ast
from typing import List, Optional
import matplotlib.pyplot as plt

## Helper Functions

### Data Cleaning and Preprocessing
	•	Cleans recipe and review datasets.
	•	Parses nutritional info and ingredients.
	•	Merges aggregated ratings.

In [2]:
def preprocess_data(recipes_df, reviews_df):
    """
    Preprocess recipes and reviews datasets: remove outliers, clean text fields, and extract nutrition details.
    """
    # Rename ID column for clarity
    recipes_df = recipes_df.rename(columns={'id': 'recipe_id'})

    # Convert 'nutrition' column values from string to list
    recipes_df['nutrition'] = recipes_df['nutrition'].apply(eval)

    # Create separate columns for each nutrition component
    nutrition_columns = ['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 
                         'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']
    recipes_df[nutrition_columns] = pd.DataFrame(recipes_df['nutrition'].tolist(), index=recipes_df.index)

    # Remove the original 'nutrition' column
    recipes_df = recipes_df.drop(['nutrition'], axis=1)

    # Drop duplicate rows
    recipes_df = recipes_df.drop_duplicates()

    # Process ingredients field
    recipes_df["ingredients"] = recipes_df["ingredients"].fillna("[]")
    recipes_df["ingredients"] = recipes_df["ingredients"].apply(
        lambda x: [ingredient.lower() for ingredient in ast.literal_eval(x)] if isinstance(x, str) else []
    )

    # Filter reviews with no ratings
    reviews_df = reviews_df[reviews_df["rating"] != 0]

    return recipes_df, reviews_df

In [3]:
# TF-IDF Vectorization for Content-Based Filtering
def create_tfidf_matrix(recipes_df):
    """
    Create TF-IDF matrix from the 'steps' column of the recipes dataset.
    """
    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(recipes_df["steps"].fillna(""))
    return tfidf, tfidf_matrix


# Cosine Similarity for Recommendations
def get_cosine_similarity_recommendations(tfidf_matrix, query_text, tfidf, num_recommendations=5):
    """
    Recommend recipes based on cosine similarity between query text and TF-IDF vectors.
    """
    query_vector = tfidf.transform([query_text])
    similarity_scores = cosine_similarity(tfidf_matrix, query_vector).flatten()
    top_indices = similarity_scores.argsort()[-num_recommendations - 1 : -1][::-1]
    return top_indices, similarity_scores


# Knowledge-Based Filtering
def filter_recipes_by_preferences(
    recipes_df, preferred_ingredients=None, max_time=None, max_calories=None, sort_by="rating"
):
    """
    Filter recipes based on user preferences for ingredients, time, and calories.
    """
    filtered_df = recipes_df.copy()

    if preferred_ingredients:
        filtered_df = filtered_df[
            filtered_df["ingredients"].apply(lambda x: set(preferred_ingredients).issubset(set(x)))
        ]

    if max_time:
        filtered_df = filtered_df[filtered_df["minutes"] <= max_time]

    if max_calories:
        filtered_df = filtered_df[filtered_df["calories"] <= max_calories]

    if sort_by == "rating":
        filtered_df = filtered_df.sort_values(by="mean_rating", ascending=False)

    return filtered_df


# Display Recommendations
def display_recommendations(recommendations_df, num_recommendations=5):
    """
    Display the top recommended recipes.
    """
    print("\nTop Recommendations:")
    display_cols = ["name", "minutes", "ingredients", "mean_rating"]
    print(recommendations_df[display_cols].head(num_recommendations))



## Main Functionality
	•	Asks the user to enter query text and number of recommendations.
	•	Outputs the top recommended recipes.

In [4]:
#Loading the data
recipes_df = pd.read_csv("RAW_recipes.csv")
reviews_df = pd.read_csv("RAW_interactions.csv")
#Renaming ID -> Recipe_ID
recipes_df = recipes_df.rename(columns = {'id': 'recipe_id'})
recipes_df, reviews_df = preprocess_data(recipes_df, reviews_df)

# Aggregate Ratings for Recipes
agg_ratings = reviews_df.groupby("recipe_id").agg(
    mean_rating=("rating", "mean"), number_of_ratings=("rating", "count")
).reset_index()
recipes_df = recipes_df.merge(agg_ratings, on="recipe_id", how="left")

# Content-Based Filtering
tfidf, tfidf_matrix = create_tfidf_matrix(recipes_df)

In [5]:
import joblib

# Create a folder for processed data
import os
os.makedirs("processed", exist_ok=True)

# Save preprocessed DataFrame
recipes_df.to_pickle("processed/recipes_df.pkl")

# Save TF-IDF vectorizer and matrix
joblib.dump(tfidf, "processed/tfidf_vectorizer.pkl")
joblib.dump(tfidf_matrix, "processed/tfidf_matrix.pkl")

['processed/tfidf_matrix.pkl']

### Content-Based Filtering:
	•	Uses TF-IDF on recipe steps.
	•	Recommends recipes similar to user input via cosine similarity.

In [5]:
# Interactive Content-Based Recommendations
print("\n--- Content-Based Recommendations ---")
recipe_query = input("Enter a recipe or list of ingredients to base recommendations on: ").lower()
num_recommendations = int(input("How many recommendations would you like? "))

top_indices, scores = get_cosine_similarity_recommendations(
    tfidf_matrix, recipe_query, tfidf, num_recommendations
)
recommended_recipes = recipes_df.iloc[top_indices]

# Display Content-Based Recommendations
display_recommendations(recommended_recipes, num_recommendations)


--- Content-Based Recommendations ---

Top Recommendations:
                                                     name  minutes  \
50417                      chocolate crunch cake  no bake      140   
4347    always tastes great  gluten free easy cake wit...       40   
77112                easy melt in your mouth coconut cake       45   
77771                     easy quick mix gluten free cake       40   
33898                  cake mix chocolate mayonnaise cake       55   
104449                         heavenly light yellow cake       45   
69576                   danish layer cake  dansk lagekage       75   
72792                                donut  birthday cake       15   
200480                               strawberry 7 up cake      180   
216703  tropical coconut cake  aka better than sex ver...       90   

                                              ingredients  mean_rating  
50417   [dark chocolate, butter, condensed milk, diges...     4.500000  
4347    [gluten-free s

###	Knowledge-Based Filtering:
	•	Filters recipes based on user-specified:
	•	Ingredients
	•	Maximum cooking time
	•	Maximum calories
	•	Optionally sorts by rating.

In [6]:
# Knowledge-Based Recommendations
print("\n--- Knowledge-Based Filtering ---")

# Step 1: Collect User Preferences
# Ask for preferred ingredients
preferred_ingredients = input("Enter preferred ingredients (comma-separated, or leave blank for no preference): ").lower().split(",")
if preferred_ingredients == [""]:
    preferred_ingredients = None  # Handle no preference case

# Ask for time constraint
try:
    max_time = int(input("Enter maximum preparation time (in minutes, or -1 for no limit): "))
    max_time = None if max_time == -1 else max_time  # Handle no limit case
except ValueError:
    max_time = None  # Default to no limit if input is invalid

# Ask for calorie constraint
try:
    max_calories = int(input("Enter maximum calories (or -1 for no limit): "))
    max_calories = None if max_calories == -1 else max_calories  # Handle no limit case
except ValueError:
    max_calories = None  # Default to no limit if input is invalid

# Step 2: Filter Recipes Based on Preferences
kb_recommendations = filter_recipes_by_preferences(
    recipes_df,
    preferred_ingredients=preferred_ingredients,
    max_time=max_time,
    max_calories=max_calories,
    sort_by="rating"  # Sort by mean rating
)

# Step 3: Display Knowledge-Based Recommendations
num_recommendations = int(input("How many recommendations would you like to see? "))
display_recommendations(kb_recommendations, num_recommendations)

print("\nThank you for using the Recipe Recommendation System!")


--- Knowledge-Based Filtering ---

Top Recommendations:
Empty DataFrame
Columns: [name, minutes, ingredients, mean_rating]
Index: []

Thank you for using the Recipe Recommendation System!


In [7]:
recipes_df.head()

Unnamed: 0,name,recipe_id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV),mean_rating,number_of_ratings
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"[winter squash, mexican seasoning, mixed spice...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0,5.0,3.0
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0,4.666667,3.0
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"[ground beef, yellow onions, diced tomatoes, t...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0,4.0,1.0
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","[spreadable cheese with garlic and herbs, new ...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0,4.5,2.0
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"[tomato juice, apple cider vinegar, sugar, sal...",8,352.9,1.0,337.0,23.0,3.0,0.0,28.0,5.0,1.0


In [8]:
reviews_df.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."
