In [1]:
import pandas as pd
import ast
from scipy.spatial.distance import cosine, euclidean, hamming
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from time import time

In [2]:
recipe = pd.read_csv('cleaned1-Copy1.csv')

In [3]:
recipe.head()

Unnamed: 0.1,Unnamed: 0,title,rating,review_nums,calories,protein,fat,sodium,ingredients,directions_
0,0,Homemade Bacon,5.0,3,308,21,23,2017,pork belly\nsmoked paprika\nkosher salt\ngroun...,Prep\n5 m\nCook\n2 h 45 m\nReady In\n11 h 50 m...
1,1,"Pork Loin, Apples, and Sauerkraut",4.8,29,371,36,11,2606,sauerkraut drained\nGranny Smith apples sliced...,Prep\n15 m\nCook\n2 h 30 m\nReady In\n2 h 45 m...
2,2,Foolproof Rosemary Chicken Wings,4.6,12,335,23,23,762,chicken wings\nsprigs rosemary\nhead garlic\no...,Prep\n20 m\nCook\n40 m\nReady In\n1 h\nPreheat...
3,3,Chicken Pesto Paninis,4.6,163,640,32,29,1075,focaccia bread quartered\nprepared basil pesto...,Prep\n15 m\nCook\n5 m\nReady In\n20 m\nPreheat...
4,4,Potato Bacon Pizza,4.5,2,162,7,7,189,red potatoes\nstrips bacon\nSauce:\nheavy whip...,Prep\n20 m\nCook\n45 m\nReady In\n1 h 10 m\nPl...


In [4]:
id=pd.read_csv('id.csv')

In [5]:
id.head()

Unnamed: 0.1,Unnamed: 0,recipe_id
0,0,222388
1,1,240488
2,2,218939
3,3,87211
4,4,245714


In [6]:
data=recipe.drop(columns={'Unnamed: 0','title','rating','review_nums','ingredients','directions_'})

In [7]:
data.head()

Unnamed: 0,calories,protein,fat,sodium
0,308,21,23,2017
1,371,36,11,2606
2,335,23,23,762
3,640,32,29,1075
4,162,7,7,189


In [8]:
data['recipe_id'] = id['recipe_id'].copy()
data.index=data['recipe_id']

In [9]:
data.head()

Unnamed: 0_level_0,calories,protein,fat,sodium,recipe_id
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
222388,308,21,23,2017,222388
240488,371,36,11,2606,240488
218939,335,23,23,762,218939
87211,640,32,29,1075,87211
245714,162,7,7,189,245714


In [10]:
data=data.drop(columns="recipe_id")


In [11]:
data.head()

Unnamed: 0_level_0,calories,protein,fat,sodium
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
222388,308,21,23,2017
240488,371,36,11,2606
218939,335,23,23,762
87211,640,32,29,1075
245714,162,7,7,189


In [12]:
data.loc[222388].values

array([ 308,   21,   23, 2017], dtype=int64)

In [13]:
data.loc[75710].values

array([135,   9,  10, 895], dtype=int64)

In [14]:
df_normalized = pd.DataFrame(normalize(data, axis=0))
df_normalized.columns = data.columns
df_normalized.index = data.index
df_normalized.head()

Unnamed: 0_level_0,calories,protein,fat,sodium
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
222388,0.003553,0.00482,0.004773,0.006547
240488,0.004279,0.008263,0.002283,0.008459
218939,0.003864,0.005279,0.004773,0.002473
87211,0.007382,0.007345,0.006018,0.003489
245714,0.001869,0.001607,0.001453,0.000613


In [15]:
df_normalized.calories.sum()

184.36238095082484

In [16]:
df_normalized.loc[222388].values

array([0.00355277, 0.00481999, 0.00477258, 0.00654674])

In [17]:
df_normalized.head()

Unnamed: 0_level_0,calories,protein,fat,sodium
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
222388,0.003553,0.00482,0.004773,0.006547
240488,0.004279,0.008263,0.002283,0.008459
218939,0.003864,0.005279,0.004773,0.002473
87211,0.007382,0.007345,0.006018,0.003489
245714,0.001869,0.001607,0.001453,0.000613


In [18]:
 def nutrition_recommender(distance_method, recipe_id, N):
        
    allRecipes = pd.DataFrame(df_normalized.index)
    allRecipes = allRecipes[allRecipes.recipe_id != recipe_id]
    allRecipes["distance"] = allRecipes["recipe_id"].apply(lambda x: distance_method(df_normalized.loc[recipe_id], df_normalized.loc[x]))
    TopNRecommendation = allRecipes.sort_values(["distance"]).head(N)
    return TopNRecommendation

In [19]:
nutrition_recommender(euclidean, 222388, 15)


Unnamed: 0,recipe_id,distance
1799,258814,0.000516
12531,21563,0.000792
3529,257428,0.000868
46885,99705,0.0009
14534,234503,0.000951
3003,219803,0.001004
4533,38046,0.001011
74,216090,0.001026
6710,232096,0.00114
12913,31026,0.001156


In [20]:
def nutrition_hybrid_recommender(recipe_id, N):
    start = time()
    
    allRecipes_cosine = pd.DataFrame(df_normalized.index)
    allRecipes_cosine = allRecipes_cosine[allRecipes_cosine.recipe_id != recipe_id]
    allRecipes_cosine["distance"] = allRecipes_cosine["recipe_id"].apply(lambda x: cosine(df_normalized.loc[recipe_id], df_normalized.loc[x]))
    
    allRecipes_euclidean = pd.DataFrame(df_normalized.index)
    allRecipes_euclidean = allRecipes_euclidean[allRecipes_euclidean.recipe_id != recipe_id]
    allRecipes_euclidean["distance"] = allRecipes_euclidean["recipe_id"].apply(lambda x: euclidean(df_normalized.loc[recipe_id], df_normalized.loc[x]))
    
    allRecipes_hamming = pd.DataFrame(df_normalized.index)
    allRecipes_hamming = allRecipes_hamming[allRecipes_hamming.recipe_id != recipe_id]
    allRecipes_hamming["distance"] = allRecipes_hamming["recipe_id"].apply(lambda x: hamming(df_normalized.loc[recipe_id], df_normalized.loc[x]))
    
    Top2Recommendation_cosine = allRecipes_cosine.sort_values(["distance"]).head(2).sort_values(by=['distance'])
    Top2Recommendation_euclidean = allRecipes_euclidean.sort_values(["distance"]).head(2).sort_values(by=['distance'])
    Top2Recommendation_hamming = allRecipes_hamming.sort_values(["distance"]).head(2).sort_values(by=['distance'])
    
    recipe_df = recipe.set_index('recipe_id')
    hybrid_Top6Recommendation = pd.concat([Top2Recommendation_cosine, Top2Recommendation_euclidean, Top2Recommendation_hamming])
    #aver_rate_list = []
    #review_nums_list = []
    #for recipeid in hybrid_Top6Recommendation.recipe_id:
    #    aver_rate_list.append(recipe_df.at[recipeid, 'aver_rate'])
    #    review_nums_list.append(recipe_df.at[recipeid, 'review_nums'])
    #hybrid_Top6Recommendation['aver_rate'] = aver_rate_list
    #hybrid_Top6Recommendation['review_nums'] = review_nums_list
    TopNRecommendation = hybrid_Top6Recommendation.head(N).drop(columns=['distance'])
    
    recipe_id = [recipe_id]   
    recipe_list = []
    for recipeid in TopNRecommendation.recipe_id:
        recipe_id.append(recipeid)   # list of recipe id of selected recipe and recommended recipe(s)
        recipe_list.append("{}  {}".format(recipeid, recipe_df.at[recipeid, 'recipe_name']))

In [21]:
def nutrition_hybrid_recommender(recipe_id, N):
    start = time()
    
    allRecipes_cosine = pd.DataFrame(df_normalized.index)
    allRecipes_cosine = allRecipes_cosine[allRecipes_cosine.recipe_id != recipe_id]
    allRecipes_cosine["distance"] = allRecipes_cosine["recipe_id"].apply(lambda x: cosine(df_normalized.loc[recipe_id], df_normalized.loc[x]))
    
    allRecipes_euclidean = pd.DataFrame(df_normalized.index)
    allRecipes_euclidean = allRecipes_euclidean[allRecipes_euclidean.recipe_id != recipe_id]
    allRecipes_euclidean["distance"] = allRecipes_euclidean["recipe_id"].apply(lambda x: euclidean(df_normalized.loc[recipe_id], df_normalized.loc[x]))
    
    allRecipes_hamming = pd.DataFrame(df_normalized.index)
    allRecipes_hamming = allRecipes_hamming[allRecipes_hamming.recipe_id != recipe_id]
    allRecipes_hamming["distance"] = allRecipes_hamming["recipe_id"].apply(lambda x: hamming(df_normalized.loc[recipe_id], df_normalized.loc[x]))
    
    Top2Recommendation_cosine = allRecipes_cosine.sort_values(["distance"]).head(2)
    Top2Recommendation_euclidean = allRecipes_euclidean.sort_values(["distance"]).head(2).sort_values(by=['distance'])
    Top2Recommendation_hamming = allRecipes_hamming.sort_values(["distance"]).head(2).sort_values(by=['distance'])
    
    hybrid_Top6Recommendation = pd.concat([Top2Recommendation_cosine, Top2Recommendation_euclidean, Top2Recommendation_hamming])


    return hybrid_Top6Recommendation

In [22]:
hybrid_Top6Recommendation=nutrition_hybrid_recommender(245714, 15)

In [43]:
data1=recipe.drop(columns={'Unnamed: 0'})

In [44]:
data1.head()

Unnamed: 0,title,rating,review_nums,calories,protein,fat,sodium,ingredients,directions_
0,Homemade Bacon,5.0,3,308,21,23,2017,pork belly\nsmoked paprika\nkosher salt\ngroun...,Prep\n5 m\nCook\n2 h 45 m\nReady In\n11 h 50 m...
1,"Pork Loin, Apples, and Sauerkraut",4.8,29,371,36,11,2606,sauerkraut drained\nGranny Smith apples sliced...,Prep\n15 m\nCook\n2 h 30 m\nReady In\n2 h 45 m...
2,Foolproof Rosemary Chicken Wings,4.6,12,335,23,23,762,chicken wings\nsprigs rosemary\nhead garlic\no...,Prep\n20 m\nCook\n40 m\nReady In\n1 h\nPreheat...
3,Chicken Pesto Paninis,4.6,163,640,32,29,1075,focaccia bread quartered\nprepared basil pesto...,Prep\n15 m\nCook\n5 m\nReady In\n20 m\nPreheat...
4,Potato Bacon Pizza,4.5,2,162,7,7,189,red potatoes\nstrips bacon\nSauce:\nheavy whip...,Prep\n20 m\nCook\n45 m\nReady In\n1 h 10 m\nPl...


In [49]:
data1['recipe_id'] = id['recipe_id'].copy()
#data1['recipe_id'].index

In [55]:
data1=data1.set_index('recipe_id')

In [56]:
data1.head()

Unnamed: 0_level_0,title,rating,review_nums,calories,protein,fat,sodium,ingredients,directions_
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
222388,Homemade Bacon,5.0,3,308,21,23,2017,pork belly\nsmoked paprika\nkosher salt\ngroun...,Prep\n5 m\nCook\n2 h 45 m\nReady In\n11 h 50 m...
240488,"Pork Loin, Apples, and Sauerkraut",4.8,29,371,36,11,2606,sauerkraut drained\nGranny Smith apples sliced...,Prep\n15 m\nCook\n2 h 30 m\nReady In\n2 h 45 m...
218939,Foolproof Rosemary Chicken Wings,4.6,12,335,23,23,762,chicken wings\nsprigs rosemary\nhead garlic\no...,Prep\n20 m\nCook\n40 m\nReady In\n1 h\nPreheat...
87211,Chicken Pesto Paninis,4.6,163,640,32,29,1075,focaccia bread quartered\nprepared basil pesto...,Prep\n15 m\nCook\n5 m\nReady In\n20 m\nPreheat...
245714,Potato Bacon Pizza,4.5,2,162,7,7,189,red potatoes\nstrips bacon\nSauce:\nheavy whip...,Prep\n20 m\nCook\n45 m\nReady In\n1 h 10 m\nPl...


In [58]:
data1.to_csv(r'D:\food data\cleaned data\cleaned1id.csv',index= True)

In [26]:
hybrid_Top6Recommendation

Unnamed: 0,recipe_id,distance
35936,24512,1e-05
6831,65478,5.5e-05
38997,216674,0.000226
34018,42657,0.000244
40226,231922,0.5
19340,202261,0.5


In [27]:
#df["a"] = pd.to_numeric(df["a"])
hybrid_Top6Recommendation["recipe_id"]=pd.to_numeric(hybrid_Top6Recommendation["recipe_id"])

In [28]:
hybrid_Top6Recommendation.iloc[0].values

array([2.45120000e+04, 9.54296872e-06])

In [29]:
hybrid_Top6Recommendation.recipe_id.iloc[0]
#data.loc[75710].values

24512

In [30]:
data1.loc[42657].title

"Patti's Mussels a la Mariniere"

In [31]:
print(data1.loc[42657].ingredients)
print(".....")
print(data1.loc[245714].ingredients)


fresh mussels
extra virgin olive oil
garlic
white wine
margarine
green onions
fresh parsley
roma (plum) tomatoes chopped
salt and pepper to taste
.....
red potatoes
strips bacon
Sauce:
heavy whipping cream
butter
minced garlic
grated Parmesan cheese
Crust:
warm water (125 degrees F 52 degrees C)
honey
active dry yeast
vegetable oil
all-purpose flour
shredded mozzarella cheese
