## Surprise

In [1]:
import numpy as np
import pandas as pd
from scipy import sparse
import sys
from surprise import Dataset, Reader
from surprise import KNNBasic, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise.dataset import DatasetAutoFolds

In [2]:
df = pd.read_csv('/home/suppra/Desktop/GourmetGirls/data/user_recipe_interactions.csv')
print(len(df))
print(len(df['user_id'].unique().tolist()))
print(len(df['recipe_id'].unique().tolist()))

95
19
32


In [3]:
reader = Reader(rating_scale=(0,10)) # rating scale range
data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)
print(type(data))

<class 'surprise.dataset.DatasetAutoFolds'>


In [4]:
# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.25)
print(type(trainset))

<class 'surprise.trainset.Trainset'>


In [5]:
import itertools

for uid, iid, rating in itertools.islice(trainset.all_ratings(), 5):
    print(f"User {uid} rated item {iid} with a rating of {rating}")

print()
for uid, iid, rating in testset[:5]:
    print(f"User {uid} rated item {iid} with a rating of {rating}")
    
print()    
print(trainset.n_ratings,len(testset))

User 0 rated item 0 with a rating of 1.0
User 0 rated item 10 with a rating of 1.0
User 0 rated item 12 with a rating of 1.0
User 0 rated item 20 with a rating of 1.0
User 1 rated item 1 with a rating of 0.85

User delishrelish564 rated item R_31 with a rating of 1.0
User tArLaDaLaL rated item R_2 with a rating of 0.54
User I<3Food rated item R_23 with a rating of 1.0
User chefMaster rated item R_5 with a rating of 0.68
User aditi@456 rated item R_3qXgYDV8zRo9TUF5aH39vk with a rating of 0.85

71 24


In [6]:
# Use the SVD algorithm to train the model
#algo = KNNBasic()
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7ba2a65e82f0>

In [7]:
# Test the model on the testing set
predictions = algo.test(testset)

### est should be as close to r_ui

In [8]:
for prediction in predictions[0:5]:
    print(prediction)

user: delishrelish564 item: R_31       r_ui = 1.00   est = 0.85   {'was_impossible': False}
user: tArLaDaLaL item: R_2        r_ui = 0.54   est = 0.43   {'was_impossible': False}
user: I<3Food    item: R_23       r_ui = 1.00   est = 0.95   {'was_impossible': False}
user: chefMaster item: R_5        r_ui = 0.68   est = 0.70   {'was_impossible': False}
user: aditi@456  item: R_3qXgYDV8zRo9TUF5aH39vk r_ui = 0.85   est = 0.82   {'was_impossible': False}


In [9]:
# Print the performance metrics
rmse = round(accuracy.rmse(predictions), 2)


RMSE: 0.1744


In [10]:
true_ratings = [pred.r_ui for pred in predictions]
est_ratings = [pred.est for pred in predictions]
uids = [pred.uid for pred in predictions]

In [11]:
users=list(set(uids))
print(users[0:5]) # --> users who were part of test set

['sanjayKapoor2004', 'foodLover123', 'aditi@456', 'emptyPlate45', 'tArLaDaLaL']


### Recommendations for users for data they haven't seen

In [12]:
import operator
# items which the user not yet evaluate
items = trainset.build_anti_testset()
for user in users[0:5]:
    user_items = list(filter(lambda x: x[0] == user, items))
    print()
    print(user,len(user_items))
    
    # generate recommendation
    recommendations = algo.test(user_items)
    recommendations.sort(key=operator.itemgetter(3), reverse=True)
    print(f"Recommendations for {user}")
    for r in recommendations[0:5]:
        print(f" recipe{r}, [Estimated Rating] {round(r[3],3)}")


sanjayKapoor2004 27
Recommendations for sanjayKapoor2004
 recipeuser: sanjayKapoor2004 item: R_24       r_ui = 0.81   est = 1.15   {'was_impossible': False}, [Estimated Rating] 1.147
 recipeuser: sanjayKapoor2004 item: R_10       r_ui = 0.81   est = 1.05   {'was_impossible': False}, [Estimated Rating] 1.051
 recipeuser: sanjayKapoor2004 item: R_33       r_ui = 0.81   est = 1.05   {'was_impossible': False}, [Estimated Rating] 1.051
 recipeuser: sanjayKapoor2004 item: R_13       r_ui = 0.81   est = 0.95   {'was_impossible': False}, [Estimated Rating] 0.947
 recipeuser: sanjayKapoor2004 item: R_21       r_ui = 0.81   est = 0.93   {'was_impossible': False}, [Estimated Rating] 0.935

foodLover123 26
Recommendations for foodLover123
 recipeuser: foodLover123 item: R_27       r_ui = 0.81   est = 1.06   {'was_impossible': False}, [Estimated Rating] 1.064
 recipeuser: foodLover123 item: R_14       r_ui = 0.81   est = 1.04   {'was_impossible': False}, [Estimated Rating] 1.043
 recipeuser: foodL

### Predictions for a particular user

In [13]:
all_items = trainset.all_items()
# Convert internal surprise-assigned itemIDs back to what I gave
all_items_raw = [trainset.to_raw_iid(iid) for iid in all_items]


In [14]:
# Get items the user has already interacted with
user_id = 'suparna@123'
user_inner_id = trainset.to_inner_uid(user_id)
items_user_has_rated = set([j for (j, _) in trainset.ur[user_inner_id]])

# Recommend items the user hasn't rated
items_to_predict = [iid for iid in all_items if iid not in items_user_has_rated]

# Predict ratings
predictions = [
    (trainset.to_raw_iid(iid), algo.predict(user_id, trainset.to_raw_iid(iid)).est)
    for iid in items_to_predict
]

# Sort by predicted rating
predictions.sort(key=lambda x: x[1], reverse=True)

# Top-N Recommendations
top_n = predictions
for item, rating in top_n:
    print(f"Recommend item {item} with predicted rating {rating:.2f}")

Recommend item R_11 with predicted rating 1.00
Recommend item R_19 with predicted rating 0.91
Recommend item R_24 with predicted rating 0.91
Recommend item R_10 with predicted rating 0.89
Recommend item R_31 with predicted rating 0.89
Recommend item R_16 with predicted rating 0.87
Recommend item R_27 with predicted rating 0.86
Recommend item R_12 with predicted rating 0.85
Recommend item R_32 with predicted rating 0.81
Recommend item R_4 with predicted rating 0.81
Recommend item R_40 with predicted rating 0.81
Recommend item R_14 with predicted rating 0.79
Recommend item R_38 with predicted rating 0.79
Recommend item R_6 with predicted rating 0.78
Recommend item R_13 with predicted rating 0.78
Recommend item R_17 with predicted rating 0.78
Recommend item R_15 with predicted rating 0.77
Recommend item R_36 with predicted rating 0.75
Recommend item R_21 with predicted rating 0.74
Recommend item R_1 with predicted rating 0.74
Recommend item R_23 with predicted rating 0.71
Recommend item R

In [15]:
algo.predict('delishrelish546', 'R_20')

Prediction(uid='delishrelish546', iid='R_20', r_ui=None, est=0.81, details={'was_impossible': False})

### Saving the model and trainset

In [16]:
import pickle

# After training your model
with open(f'models/svd_model_{str(rmse)[2:]}.pkl', 'wb') as f:
    pickle.dump(algo, f)

with open(f'models/trainset_{str(rmse)[2:]}.pkl', 'wb') as f:
    pickle.dump(trainset, f)