In [5]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

import matplotlib.pyplot as plt
plt.style.use('ggplot')

%matplotlib inline

In [6]:
# path config
df = pd.read_csv("newfood.csv",usecols=['id','title','userId','stars',],dtype={'id': 'int32', 'title': 'str', 'userId' : 'int32', 'stars' : 'int32' })

In [7]:
df.head()

Unnamed: 0,id,title,stars,userId
0,1,Fried Anchovies with Sage,3,1
1,2,Anchovies Appetizer With Breadcrumbs & Scallions,1,1
2,3,"Carrots, Cauliflower And Anchovies",4,1
3,4,Bap Story Stir Fried Anchovies (Myulchi Bokkeum),3,1
4,5,"Bread, Butter And Anchovies",5,1


In [8]:
combine_recipe_rating = df.dropna(axis = 0, subset = ['title'])
recipe_ratingCount = (combine_recipe_rating.
     groupby(by = ['title'])['stars'].
     count().
     reset_index().
     rename(columns = {'stars': 'totalRatingCount'})
     [['title', 'totalRatingCount']]
    )
recipe_ratingCount.head()

Unnamed: 0,title,totalRatingCount
0,Tonight Caesar salad,1
1,Tonight Cornmeal-Crusted Pan-Fried Trout,1
2,Tonight Grilled Caesar salad,1
3,Tonight Mackerel with Currants,1
4,Tonight Marinated Chickpea salad,1


In [9]:
rating_with_totalRatingCount = combine_recipe_rating.merge(recipe_ratingCount, left_on = 'title', right_on = 'title', how = 'left')
rating_with_totalRatingCount.head()

Unnamed: 0,id,title,stars,userId,totalRatingCount
0,1,Fried Anchovies with Sage,3,1,1
1,2,Anchovies Appetizer With Breadcrumbs & Scallions,1,1,1
2,3,"Carrots, Cauliflower And Anchovies",4,1,1
3,4,Bap Story Stir Fried Anchovies (Myulchi Bokkeum),3,1,1
4,5,"Bread, Butter And Anchovies",5,1,1


In [10]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(recipe_ratingCount['totalRatingCount'].describe())

count   1645.000
mean       1.047
std        0.348
min        1.000
25%        1.000
50%        1.000
75%        1.000
max        8.000
Name: totalRatingCount, dtype: float64


In [11]:
popularity_threshold = 1
rating_popular_recipe= rating_with_totalRatingCount.query('totalRatingCount >= @popularity_threshold')
rating_popular_recipe.head()

Unnamed: 0,id,title,stars,userId,totalRatingCount
0,1,Fried Anchovies with Sage,3,1,1
1,2,Anchovies Appetizer With Breadcrumbs & Scallions,1,1,1
2,3,"Carrots, Cauliflower And Anchovies",4,1,1
3,4,Bap Story Stir Fried Anchovies (Myulchi Bokkeum),3,1,1
4,5,"Bread, Butter And Anchovies",5,1,1


In [12]:
rating_popular_recipe.shape

(1722, 5)

In [13]:
## First lets create a Pivot matrix

recipe_features_df=rating_popular_recipe.pivot_table(index='title',columns='userId',values='stars').fillna(1)
recipe_features_df.head()

userId,1,2,3,4,5,6,7,8,9,10,11,12,13,14
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Tonight Caesar salad,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Tonight Cornmeal-Crusted Pan-Fried Trout,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0
Tonight Grilled Caesar salad,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Tonight Mackerel with Currants,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0
Tonight Marinated Chickpea salad,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [22]:
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

recipe_features_df_matrix = csr_matrix(recipe_features_df.values)

model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(recipe_features_df_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

In [19]:
recipe_features_df_matrix.shape

(1645, 14)

In [16]:
query_index = np.random.choice(recipe_features_df.shape[0])
print(query_index)
distances, indices = model_knn.kneighbors(recipe_features_df.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 10)

764


In [17]:
recipe_features_df.head()

userId,1,2,3,4,5,6,7,8,9,10,11,12,13,14
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Tonight Caesar salad,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Tonight Cornmeal-Crusted Pan-Fried Trout,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0
Tonight Grilled Caesar salad,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Tonight Mackerel with Currants,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0
Tonight Marinated Chickpea salad,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [18]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(recipe_features_df.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, recipe_features_df.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for Lemon-horseradish Sole:

1: Roasted Pompano with Citrus Avocado Salsa, Cilantro Butter & Spicy Plantains, with distance of 0.0:
2: Fried Red Fish, with distance of 0.0:
3: Fried Smelt Recipe, with distance of 0.0:
4: Summer Ceviche, with distance of 0.0:
5: Hooked On Halibut, with distance of 0.0:
6: Pescado Almendrado, with distance of 0.0:
7: Grilled Salmon with Mustard-Wine side dish, with distance of 0.0:
8: Brook Trout with Pecans, Lemon, and Parsley Brown Butter, with distance of 0.0:
9: Thyme-Crusted Buttery Halibut with Parsley side dish, with distance of 0.0:
