In [2]:
import pandas as pd
from surprise import Dataset
from surprise import Reader

In [79]:
df = pd.read_csv('./data/ratings.csv')
reader = Reader(rating_scale=(1, 5))
df = df.iloc[0:500000]
# Loads Pandas dataframe
data = Dataset.load_from_df(df[["userId", "recipeId", "rating"]], reader)

In [92]:
from surprise import KNNWithMeans

# To use item-based cosine similarity
sim_options = {
    "name": "cosine",
    "user_based": True,  # Compute  similarities between users
    "min_support": 3 #minimum number of common items needed between users to consider them for similarity
}
algo = KNNWithMeans(sim_options=sim_options)

In [4]:
from surprise import SVD
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split

# sample random trainset and testset
# test set is made of 25% of the ratings.
trainset, testset = train_test_split(data, test_size=.25)

# We'll use the famous SVD algorithm.
algo = SVD()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)


RMSE: 0.8021


0.8021216100404636

In [81]:
import pandas as pd
import os 
import numpy as np

dirname=os.path.dirname(os.path.abspath('__file__'))
filename=os.path.join(dirname,'./data/recipesData.json')
df = pd.read_json(filename)

df = df.loc[:,['RecId', 'name']]
df = df.rename(columns={"name":"recName"})
df.head(20)

Unnamed: 0,RecId,recName
0,23945,Hot Artichoke Dip with Green Chiles
1,161178,Party Plentiful Guacamole
2,20945,Magaricz
3,24301,Christmas Chip Dip
4,165246,Christmas Dip
5,23931,Crackle Thins
6,25500,Texas Stuffed Mushrooms
7,26694,Antipasto Platter
8,242204,Corn in a Cup (Elote en Vaso)
9,178551,Green Onion Ranch Dip


In [82]:
ID_to_name = {}
name_to_ID = {}
for index,row in df.iterrows():
    ID_to_name[row.RecId] = row.recName
    name_to_ID[row.recName] = row.RecId 

In [63]:
ID_to_name[23945]
recipe_raw_id = name_to_ID['Christmas Dip']
recipe_raw_id

165246

In [93]:
# import io  # needed because of weird encoding of u.item file

from surprise import KNNBaseline
from surprise import Dataset


# First, train the algortihm to compute the similarities between items
trainset = data.build_full_trainset()
sim_options = {'name': 'pearson_baseline', 'user_based': False}
algo = KNNBaseline(sim_options=sim_options)
algo.fit(trainset)


Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBaseline at 0x10fbe4760>

In [96]:
# Retrieve inner id of the selected recipe
recipeName = 'Artichoke, Cheese and Olive Antipasto'
recipe_raw_id = name_to_ID[recipeName]
recipe_inner_id = algo.trainset.to_inner_iid(recipe_raw_id)

# Retrieve inner ids of the nearest neighbors of selected recipe.
recipe_neighbors = algo.get_neighbors(recipe_inner_id, k=10)

# Convert inner ids of the neighbors into names.
recipe_neighbors = (algo.trainset.to_raw_iid(inner_id)
                       for inner_id in recipe_neighbors)

recipe_neighbors = (ID_to_name[rid]
                       for rid in recipe_neighbors)
                       
print()
print('The 10 nearest neighbors of {} are:'.format(recipeName))
for recipe in recipe_neighbors:
    print(recipe)


The 10 nearest neighbors of Artichoke, Cheese and Olive Antipasto are:
Double Tomato Bruschetta
Annie's Fruit Salsa and Cinnamon Chips
Marinated Mushrooms II
Better Than Sex Cake II
Artichoke Salsa
Cilantro and Lime Butter
Cocktail Meatballs I
Best Ever Crab Cakes
Best Bruschetta Ever
Pig Picking Cake III
