# Recipe Recommendation Project
### By: Salvatore Palmeri

In [1]:
#Import libraries
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds

In [2]:
#Load content based dataset
recipes = pd.read_csv('C:/Users/salpa/OneDrive/CSC Stuff/GitHub/recipe-recommender/data/RAW_recipes.csv')

In [3]:
#Print content based dataset
recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [4]:
#Load collaborative based dataset
interactions = df = pd.read_csv('C:/Users/salpa/OneDrive/CSC Stuff/GitHub/recipe-recommender/data/RAW_interactions.csv')

In [5]:
#Print collaborative based dataset
interactions.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."


# Exploratory Data Analysis

#### Content Based Dataset

In [6]:
recipes.shape

(231637, 12)

In [7]:
recipes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231637 entries, 0 to 231636
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   name            231636 non-null  object
 1   id              231637 non-null  int64 
 2   minutes         231637 non-null  int64 
 3   contributor_id  231637 non-null  int64 
 4   submitted       231637 non-null  object
 5   tags            231637 non-null  object
 6   nutrition       231637 non-null  object
 7   n_steps         231637 non-null  int64 
 8   steps           231637 non-null  object
 9   description     226658 non-null  object
 10  ingredients     231637 non-null  object
 11  n_ingredients   231637 non-null  int64 
dtypes: int64(5), object(7)
memory usage: 21.2+ MB


In [8]:
#First few recipes in the list
recipes['name'][0:21]

0     arriba   baked winter squash mexican style
1               a bit different  breakfast pizza
2                      all in the kitchen  chili
3                             alouette  potatoes
4             amish  tomato ketchup  for canning
5                        apple a day  milk shake
6                          aww  marinated olives
7                 backyard style  barbecued ribs
8                       bananas 4 ice cream  pie
9                        beat this  banana bread
10                   berry  good sandwich spread
11                 better than sex  strawberries
12               better then bush s  baked beans
13                    boat house  collard greens
14                       calm your nerves  tonic
15              chicken lickin  good  pork chops
16                                chile rellenos
17                                chinese  candy
18                            chinese  chop suey
19             cream  of cauliflower soup  vegan
20                  

#### Collaborative Based Dataset

In [9]:
interactions.shape

(1132367, 5)

In [10]:
interactions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1132367 entries, 0 to 1132366
Data columns (total 5 columns):
 #   Column     Non-Null Count    Dtype 
---  ------     --------------    ----- 
 0   user_id    1132367 non-null  int64 
 1   recipe_id  1132367 non-null  int64 
 2   date       1132367 non-null  object
 3   rating     1132367 non-null  int64 
 4   review     1132198 non-null  object
dtypes: int64(3), object(2)
memory usage: 43.2+ MB


# Data Preprocessing

In [11]:
#Creating subsets of each dataset because of their large size
recipes_subset = recipes.head(5000)
interactions_subset = interactions.head(5000)

In [12]:
#Merge datasets on contributor_id and user_id
merged_data = pd.merge(interactions_subset, recipes_subset, left_on='user_id', right_on='contributor_id')

#New merged dataset
merged_data.head()

Unnamed: 0,user_id,recipe_id,date,rating,review,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...,almond chicken with savory plum sauce,434158,35,126440,2010-08-03,"['60-minutes-or-less', 'time-to-make', 'course...","[1310.2, 135.0, 125.0, 98.0, 135.0, 51.0, 25.0]",15,"['preheat oven 375 deg', 'combine first four i...",i've had this for years and don't remember whe...,"['egg', 'cornstarch', 'soy sauce', 'garlic clo...",12
1,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin...",almond cinnamon rolls,136388,35,57222,2005-09-06,"['60-minutes-or-less', 'time-to-make', 'course...","[381.6, 25.0, 44.0, 7.0, 16.0, 41.0, 16.0]",16,"['crumble yeast into a bowl', 'melt butter , a...",cinnamon rolls made even more delicious with a...,"['butter', 'milk', 'fresh yeast', 'ground card...",11
2,52282,120345,2005-05-21,4,very very sweet. after i waited the 2 days i b...,3 ingredient chocolate cake,81108,35,52282,2004-01-14,"['60-minutes-or-less', 'time-to-make', 'course...","[464.6, 73.0, 3.0, 3.0, 24.0, 142.0, 5.0]",6,"['preheat oven to 375', 'line the bottom of an...","this is decadent, delicious, and so easy! and ...","['semisweet chocolate', 'unsalted butter', 'ex...",4
3,52282,120345,2005-05-21,4,very very sweet. after i waited the 2 days i b...,3 potato salad,66520,45,52282,2003-07-10,"['60-minutes-or-less', 'time-to-make', 'course...","[282.5, 11.0, 25.0, 10.0, 10.0, 5.0, 17.0]",7,"['heat the grill to high', 'wrap the potatoes ...","this recipe is from jack mcdavid, a wonderful ...","['sweet potato', 'yukon gold potato', 'purple ...",9
4,52282,120345,2005-05-21,4,very very sweet. after i waited the 2 days i b...,5 steak rubs for grilling,93881,5,52282,2004-06-21,"['15-minutes-or-less', 'time-to-make', 'course...","[2308.6, 102.0, 195.0, 3651.0, 204.0, 47.0, 15...",4,"['for each rub:', 'combine all ingredients in ...","from cooks illustrated, these are great for an...","['dried chipotle peppers', 'dried ancho chiles...",16


# Building Content Based Recommender

In [13]:
#Select the important recipe preference features
features = ['name', 'steps', 'description', 'ingredients', 'tags']

for feature in features:
    merged_data[feature] = merged_data[feature].fillna('') #Removing nan with empty space

In [14]:
#Create a new column that takes the value/info from the selected features
def combined_features(row):
    return str(row['name'])+ " "+str(row['steps'])+" " +str(row['description'])+" "+str(row['ingredients'])+" "+str(row['tags'])

In [15]:
merged_data["combined_features"] = merged_data.apply(combined_features, axis=1)
merged_data.head()

Unnamed: 0,user_id,recipe_id,date,rating,review,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,combined_features
0,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...,almond chicken with savory plum sauce,434158,35,126440,2010-08-03,"['60-minutes-or-less', 'time-to-make', 'course...","[1310.2, 135.0, 125.0, 98.0, 135.0, 51.0, 25.0]",15,"['preheat oven 375 deg', 'combine first four i...",i've had this for years and don't remember whe...,"['egg', 'cornstarch', 'soy sauce', 'garlic clo...",12,almond chicken with savory plum sauce ['prehea...
1,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin...",almond cinnamon rolls,136388,35,57222,2005-09-06,"['60-minutes-or-less', 'time-to-make', 'course...","[381.6, 25.0, 44.0, 7.0, 16.0, 41.0, 16.0]",16,"['crumble yeast into a bowl', 'melt butter , a...",cinnamon rolls made even more delicious with a...,"['butter', 'milk', 'fresh yeast', 'ground card...",11,almond cinnamon rolls ['crumble yeast into a b...
2,52282,120345,2005-05-21,4,very very sweet. after i waited the 2 days i b...,3 ingredient chocolate cake,81108,35,52282,2004-01-14,"['60-minutes-or-less', 'time-to-make', 'course...","[464.6, 73.0, 3.0, 3.0, 24.0, 142.0, 5.0]",6,"['preheat oven to 375', 'line the bottom of an...","this is decadent, delicious, and so easy! and ...","['semisweet chocolate', 'unsalted butter', 'ex...",4,3 ingredient chocolate cake ['preheat oven to ...
3,52282,120345,2005-05-21,4,very very sweet. after i waited the 2 days i b...,3 potato salad,66520,45,52282,2003-07-10,"['60-minutes-or-less', 'time-to-make', 'course...","[282.5, 11.0, 25.0, 10.0, 10.0, 5.0, 17.0]",7,"['heat the grill to high', 'wrap the potatoes ...","this recipe is from jack mcdavid, a wonderful ...","['sweet potato', 'yukon gold potato', 'purple ...",9,"3 potato salad ['heat the grill to high', 'wra..."
4,52282,120345,2005-05-21,4,very very sweet. after i waited the 2 days i b...,5 steak rubs for grilling,93881,5,52282,2004-06-21,"['15-minutes-or-less', 'time-to-make', 'course...","[2308.6, 102.0, 195.0, 3651.0, 204.0, 47.0, 15...",4,"['for each rub:', 'combine all ingredients in ...","from cooks illustrated, these are great for an...","['dried chipotle peppers', 'dried ancho chiles...",16,"5 steak rubs for grilling ['for each rub:', 'c..."


In [16]:
#Show combined features only                                      REMOVE THE [''] FROM OUTPUT
merged_data['combined_features'].head(10)

0    almond chicken with savory plum sauce ['prehea...
1    almond cinnamon rolls ['crumble yeast into a b...
2    3 ingredient chocolate cake ['preheat oven to ...
3    3 potato salad ['heat the grill to high', 'wra...
4    5 steak rubs for grilling ['for each rub:', 'c...
5    acini di pepe with spinach and feta ['cook aci...
6    acini di pepe with zucchini and peppers ['brin...
7    adobo steak ['mix all marinade ingredients', '...
8    almond rice mix ['add all ingredients , mixed ...
9    a different chicken dijonaisse ['saute or swea...
Name: combined_features, dtype: object

In [17]:
#Convert the chosen content for each recipe in vector form
count_vector = CountVectorizer()
sample_count_matrix = count_vector.fit_transform(merged_data["combined_features"])

In [18]:
cosine_sim = cosine_similarity(sample_count_matrix)
cosine_sim

array([[1.        , 0.46642196, 0.50071582, ..., 0.52467693, 0.38762421,
        0.34979996],
       [0.46642196, 1.        , 0.48043513, ..., 0.48983494, 0.37504952,
        0.33949341],
       [0.50071582, 0.48043513, 1.        , ..., 0.49646748, 0.36404027,
        0.43728818],
       ...,
       [0.52467693, 0.48983494, 0.49646748, ..., 1.        , 0.46459367,
        0.38372662],
       [0.38762421, 0.37504952, 0.36404027, ..., 0.46459367, 1.        ,
        0.28520666],
       [0.34979996, 0.33949341, 0.43728818, ..., 0.38372662, 0.28520666,
        1.        ]], shape=(11692, 11692))

In [19]:
#Example recipes tried before
recipe_tried = 'adobo steak'
# recipe_tried = '5 spice tea'
# recipe_tried = 'almond cinnamon rolls'
# recipe_tried = '3 potato salad'

In [20]:
#Get the index for recipe tried
def get_id_from_name(name):
    return merged_data[merged_data.name == name]["id"].index.values[0]

recipe_index = get_id_from_name(recipe_tried)
recipe_index

np.int64(7)

In [21]:
#Similarity score of tried recipe with other recipes
similar_recipes = list(enumerate(cosine_sim[recipe_index]))

similar_recipes[0:10]

[(0, np.float64(0.2521075188450377)),
 (1, np.float64(0.150488486987378)),
 (2, np.float64(0.24432430262247337)),
 (3, np.float64(0.2729696161617951)),
 (4, np.float64(0.34008102854029515)),
 (5, np.float64(0.25775179176713703)),
 (6, np.float64(0.24288211770337312)),
 (7, np.float64(0.9999999999999987)),
 (8, np.float64(0.29287700892335283)),
 (9, np.float64(0.25736237549520374))]

In [22]:
#Sort the list from highest to lowest similarity
sorted_similar_recipes = sorted(similar_recipes,
                               key=lambda x:x[1],
                               reverse=True)
sorted_similar_recipes[0:10]

[(7, np.float64(0.9999999999999987)),
 (3649, np.float64(0.9999999999999987)),
 (6074, np.float64(0.9999999999999987)),
 (7401, np.float64(0.9999999999999987)),
 (8529, np.float64(0.9999999999999987)),
 (147, np.float64(0.5105296229091597)),
 (4047, np.float64(0.5105296229091597)),
 (4258, np.float64(0.5105296229091597)),
 (6143, np.float64(0.5105296229091597)),
 (6486, np.float64(0.5105296229091597))]

In [23]:
#Find the recipe from the sorted list
def get_name_from_index(index):
    return recipes[recipes.index == index]["name"].values[0]

In [24]:
#Output the first few recipes based on tried recipe
print("Your next recipes based on trying",recipe_tried, "may be")
print("**********************************************************")
i=0
for recipe in sorted_similar_recipes[1:]:

    print(get_name_from_index(recipe[0]))
    i=i+1
    if i>10:
        break
print("**********************************************************")

Your next recipes based on trying adobo steak may be
**********************************************************
almond chocolate cake with ganache
apple   quark pie bars
apple tuna sandwiches
armenian basterma  dried cured beef
ant  kelly s london broil marinade
almost fat free asian cole slaw
alternative rice crispy treats
apple and cranberry galette
apple cider glazed spiced carrots
apple pear crisp  3 ww points
asparagus with thyme
**********************************************************


# Building Collaborative Based Recommender

#### Item user approach

In [25]:
#Item user matrix
item_user_matrix = merged_data.pivot_table(index='recipe_id',
                                   columns='user_id',
                                   values='rating')
item_user_matrix.head()

user_id,1535,2312,4291,4439,4470,5060,6258,6357,6651,6836,...,1677099,1706426,1783373,1844337,1925885,1926889,2198132,2198343,2549237,2597942
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
355,,,,,,,,,,,...,,,,,,,,,,
696,,,,,,,,,,,...,,,,,,,,,,
4034,,,,,,,,,,,...,,,,,,,,,,
5139,,,,,,,,,,,...,,,,,,,,,,
8507,,,,,,,,,,,...,,,,,,,,,,


In [26]:
#Replace nan values with 0
item_user_matrix = item_user_matrix.fillna(0)
item_user_matrix.head()

user_id,1535,2312,4291,4439,4470,5060,6258,6357,6651,6836,...,1677099,1706426,1783373,1844337,1925885,1926889,2198132,2198343,2549237,2597942
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
355,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
696,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4034,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8507,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
#Create a KNN model using cosine similarity as a distance metric
knn = NearestNeighbors(metric='cosine',
                       algorithm='auto',
                       n_neighbors=10)

knn.fit(item_user_matrix)

0,1,2
,n_neighbors,10
,radius,1.0
,algorithm,'auto'
,leaf_size,30
,metric,'cosine'
,p,2
,metric_params,
,n_jobs,


In [28]:
#Get the recipe_id based on name
recipe_id = merged_data[merged_data['name'] == '5 spice tea']['recipe_id'].values[0]
recipe_id

np.int64(373842)

In [29]:
#Generate closest 10 neighbors based on recipe_id
distances, indices = knn.kneighbors(item_user_matrix.loc[recipe_id, :].values.reshape(1,-1), n_neighbors = 10)

In [30]:
#Get the recipe_ids that are closest to the given recipe
neighbor_ids = item_user_matrix.iloc[indices[0]].index.values
neighbor_ids

array([373842,  18204, 435829, 412809, 311260, 195833, 203281,  42648,
       484456,  49413])

In [31]:
#Function to get the knn based on given recipe name
def get_knn_recipe(recipe_name,k):
  recipe_id = merged_data[merged_data['name'] == recipe_name]['recipe_id'].values[0]
  distances, indices = knn.kneighbors(item_user_matrix.loc[recipe_id, :].values.reshape(1,-1), n_neighbors = k)
  neighbor_ids = item_user_matrix.iloc[indices[0]].index.values
  return neighbor_ids

In [32]:
#Example
get_knn_recipe('5 spice tea',10)

array([373842,  18204, 435829, 412809, 311260, 195833, 203281,  42648,
       484456,  49413])

In [33]:
#Function that makes the recommendations
def recommend_recipes(k):

  #Prompts user for target recipe
  recipe_name = input("Enter a Recipe Name: ")
  print('Recommendations for {} (Collaborative filtering):'.format(recipe_name))
  print("**********************************************************")

  #KNN for the target recipe
  knn_recipes = get_knn_recipe(recipe_name,k)

  #Weighted average rating for each KNN recipe
  ratings_dict = {}
  for neighbor_id in knn_recipes:
    ratings_dict[neighbor_id] = 0
    for user_id in item_user_matrix.columns:
      if item_user_matrix.loc[neighbor_id, user_id] !=0:
        ratings_dict[neighbor_id] += item_user_matrix.loc[neighbor_id, user_id]
    ratings_dict[neighbor_id] /= len(item_user_matrix)

  #Sorts recipes by weighted average rating
  sorted_recipes = sorted(ratings_dict.items(), key=lambda x: x[1], reverse=True)


  #Calculates the top 10 recommendations
  recommended_recipes = [x[0] for x in sorted_recipes if x[0] != merged_data[merged_data['name'] == recipe_name]['recipe_id'].values[0]]
  recommended_recipe_names = merged_data[merged_data['recipe_id'].isin(recommended_recipes)]['name'].values

  #Formatting
  output_str = f"Recommended recipes for '{recipe_name}':\n"
  for i, recipe_name in enumerate(recommended_recipe_names):
      output_str += f"{i+1}. {recipe_name}\n"

  return output_str

In [35]:
#Some recipes to input
#5 spice tea
#5 tacos
#almond cinnamon rolls

recommendations = recommend_recipes(k=5)
print(recommendations)
('**********************************************************')

Enter a Recipe Name:  5 tacos


Recommendations for 5 tacos (Collaborative filtering):
**********************************************************
Recommended recipes for '5 tacos':
1. 2 minute carbonara
2. 4 salad dressings
3. aioli with herbs
4. all meat casserole
5. almond bread  cookie biscotti
6. almond crumbed chicken schnitzel with avocado salad
7. american sweet potato pudding
8. 2 minute carbonara
9. 4 salad dressings
10. aioli with herbs
11. all meat casserole
12. almond bread  cookie biscotti
13. almond crumbed chicken schnitzel with avocado salad
14. american sweet potato pudding
15. 2 minute carbonara
16. 4 salad dressings
17. aioli with herbs
18. all meat casserole
19. almond bread  cookie biscotti
20. almond crumbed chicken schnitzel with avocado salad
21. american sweet potato pudding
22. 1 favorite chinese steamed whole fish by sy
23. abuelita s almond chicken
24. almond bread pudding
25. 2 minute carbonara
26. 4 salad dressings
27. aioli with herbs
28. all meat casserole
29. almond bread  cookie bisc

'**********************************************************'

#### User item approach

In [36]:
#Resolve duplicates by aggregating
merged_data = merged_data.groupby(['user_id', 'recipe_id'], as_index=False)['rating'].mean()

In [37]:
#Create a user-item rating matrix
user_recipe_ratings = merged_data.pivot(index='user_id',
                                   columns='recipe_id',
                                   values='rating').fillna(0)
user_recipe_ratings.head()

recipe_id,355,696,4034,5139,8507,8587,8846,8905,10637,11415,...,502734,503977,504394,507198,511790,513016,514852,523223,523426,532629
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1535,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2312,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4291,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4470,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
#Initialize the KNN model
knn_model = NearestNeighbors(n_neighbors=5,
                             metric='cosine',
                             algorithm='brute')

#Fit the KNN model
knn_model.fit(user_recipe_ratings.values)

0,1,2
,n_neighbors,5
,radius,1.0
,algorithm,'brute'
,leaf_size,30
,metric,'cosine'
,p,2
,metric_params,
,n_jobs,


In [39]:
#Define a function to get recipe recommendations for a given user
def get_recipe_recommendations(user_id, num_recommendations=5):
    user_ratings = user_recipe_ratings.loc[user_id].values.reshape(1, -1)
    distances, indices = knn_model.kneighbors(user_ratings, n_neighbors=num_recommendations)
    recommended_recipe_ids = [user_recipe_ratings.columns[i] for i in indices.flatten()]
    return recommended_recipe_ids

In [40]:
#Example
get_recipe_recommendations(user_id=52282, num_recommendations=5)

[np.int64(62125),
 np.int64(379102),
 np.int64(321038),
 np.int64(367080),
 np.int64(338256)]

In [44]:
# Merge datasets and ensure unique (user_id, recipe_id) pairs
merged_data = pd.merge(
    interactions,
    recipes[['id', 'name']].rename(columns={'id': 'recipe_id'}),  # Rename `id` to `recipe_id`
    on='recipe_id'
)[['user_id', 'recipe_id', 'rating', 'name']].drop_duplicates()

In [47]:
#Function that makes the recommendations
def get_recipe_recommendations(user_id, num_recommendations=5):
    if user_id not in user_recipe_ratings.index:
        return "User ID not found."

    user_ratings = user_recipe_ratings.loc[user_id].values.reshape(1, -1)
    distances, indices = knn_model.kneighbors(user_ratings, n_neighbors=num_recommendations)
    recommended_recipe_ids = [user_recipe_ratings.columns[i] for i in indices.flatten()]

    #Filter `merged_data` to get unique recipe names based on recipe IDs
    recommended_recipes = merged_data[merged_data['recipe_id'].isin(recommended_recipe_ids)][['recipe_id', 'name']].drop_duplicates()

    return recommended_recipes.set_index('recipe_id')

#Example usage
user_id = 126440
recommended_recipes = get_recipe_recommendations(user_id, num_recommendations=5)
print(f"Recommended recipes for user {user_id}:")

for recipe_id, recipe_name in recommended_recipes['name'].items():
    print(f"Recipe ID: {recipe_id}, Name: {recipe_name}")

Recommended recipes for user 126440:
Recipe ID: 32723, Name: masters black eyed peas
Recipe ID: 269907, Name: sweet baked apples with dates   apricots
Recipe ID: 269786, Name: portuguese fried potatoes    batas a portuguesa
Recipe ID: 269849, Name: creamy oatmeal berry swirl
Recipe ID: 269788, Name: semolina with banana   nuts


# Building Hybrid Based Recommender

In [48]:
#Print the first 10 recipe names from the dataset to use
print(merged_data[['name']].head(10))

                                                     name
527262                    beef in red wine mushroom sauce
948572               christmas rum balls or bourbon balls
594083                                 three garlic pasta
364997                       cilantro curry chicken salad
230135                     mushroom   swiss cheese quiche
996115              electric mixer sweet butter pie crust
782211        bean casserole with eggplant and red pepper
11539                                    tex mex carnitas
202519  pork and stir fried vegetables with spicy asia...
350201                               toasted walnut bread


In [49]:
merged_data.reset_index(drop=True, inplace=True)

In [51]:
def build_cosine_sim_matrix(data, text_column):
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(data[text_column])
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    return cosine_sim

def hybrid_recommendation(user_id, title, merged_data, user_recipe_ratings, k=10):
    cosine_sim = build_cosine_sim_matrix(merged_data, 'name')

    title_index = get_index_from_title(title, merged_data)
    if title_index is None:
        print("Title not found.")
        return []

    if title_index >= len(cosine_sim):
        print(f"Title index {title_index} is out of bounds for the cosine similarity matrix size {len(cosine_sim)}.")
        return []

    content_scores = list(enumerate(cosine_sim[title_index]))
    content_recommendations = sorted(content_scores, key=lambda x: x[1], reverse=True)[1:26]

    if user_id not in user_recipe_ratings.index:
        print("User ID not found in the ratings matrix.")
        return []

    user_index = user_recipe_ratings.index.get_loc(user_id)
    user_ratings = user_recipe_ratings.iloc[user_index].values.reshape(1, -1)

    distances, indices = knn_model.kneighbors(user_ratings, n_neighbors=10)
    collaborative_ids = [user_recipe_ratings.columns[i] for i in indices.flatten() if i < len(user_recipe_ratings.columns)]

    combined_scores = {}
    for idx, score in content_recommendations:
        combined_scores[idx] = combined_scores.get(idx, 0) + score
    for idx, score in [(collaborative_ids[i], 1 / (1 + distances[0][i])) for i in range(len(collaborative_ids))]:
        combined_scores[idx] = combined_scores.get(idx, 0) + score

    sorted_combined = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:k]
    top_recipe_ids = [idx for idx, _ in sorted_combined]

    final_recommendations = [merged_data.iloc[idx]['name'] for idx in top_recipe_ids if idx < len(merged_data)]

    return final_recommendations

In [52]:
def get_index_from_title(title, data):
    try:
        return data[data['name'].str.lower() == title.lower()].index[0]
    except IndexError:
        return None 

In [53]:
#Testing the function
test_title = 'beef in red wine mushroom sauce'
test_index = get_index_from_title(test_title, merged_data)
print(f"Index for '{test_title}': {test_index}")

Index for 'beef in red wine mushroom sauce': 0


In [54]:
#Example usage
user_id = 126440
title = 'beef in red wine mushroom sauce'
recommendations = hybrid_recommendation(user_id, title, merged_data, user_recipe_ratings, k=5)
print("Recommended recipes based on", title, "and user preferences:")
for recommendation in recommendations:
    print(recommendation)

Recommended recipes based on beef in red wine mushroom sauce and user preferences:
beef enchiladas with red sauce
red wine and garlic mushrooms
cranberries in red wine
catfish fillets in white wine and mushroom sauce
