Make predictions

## Surprise

In [68]:
import numpy as np
import pandas as pd
from scipy import sparse
import sys
from surprise import Dataset, Reader
from surprise import KNNBasic, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise.dataset import DatasetAutoFolds

In [69]:
df = pd.read_csv('/home/suppra/Desktop/GourmetGirls/data/user_recipe_interactions.csv')
print(len(df))
print(len(df['user_id'].unique().tolist()))
print(len(df['recipe_id'].unique().tolist()))

95
19
28


In [70]:
reader = Reader(rating_scale=(0,10)) # rating scale range
data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)
print(type(data))

<class 'surprise.dataset.DatasetAutoFolds'>


In [71]:
# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.25)
print(type(trainset))

<class 'surprise.trainset.Trainset'>


In [72]:
import itertools

for uid, iid, rating in itertools.islice(trainset.all_ratings(), 5):
    print(f"User {uid} rated item {iid} with a rating of {rating}")

print()
for uid, iid, rating in testset[:5]:
    print(f"User {uid} rated item {iid} with a rating of {rating}")
    
print()    
print(trainset.n_ratings,len(testset))

User 0 rated item 0 with a rating of 1.0
User 0 rated item 7 with a rating of 1.0
User 0 rated item 17 with a rating of 1.0
User 0 rated item 23 with a rating of 1.0
User 1 rated item 1 with a rating of 0.46

User gourmetGal rated item R_27 with a rating of 1.0
User burpista@89 rated item R_33 with a rating of 0.39
User g0rgeMehigan rated item R_16 with a rating of 0.46
User marcoPierre rated item R_29 with a rating of 1.0
User I<3Food rated item R_23 with a rating of 1.0

71 24


In [73]:
# Use the SVD algorithm to train the model
#algo = KNNBasic()
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x73cc4d80b0e0>

In [74]:
# Test the model on the testing set
predictions = algo.test(testset)

### est should be as close to r_ui

In [75]:
for prediction in predictions[0:5]:
    print(prediction)

user: gourmetGal item: R_27       r_ui = 1.00   est = 0.87   {'was_impossible': False}
user: burpista@89 item: R_33       r_ui = 0.39   est = 0.79   {'was_impossible': False}
user: g0rgeMehigan item: R_16       r_ui = 0.46   est = 0.68   {'was_impossible': False}
user: marcoPierre item: R_29       r_ui = 1.00   est = 0.72   {'was_impossible': False}
user: I<3Food    item: R_23       r_ui = 1.00   est = 0.83   {'was_impossible': False}


In [76]:
# Print the performance metrics
rmse = round(accuracy.rmse(predictions), 2)


RMSE: 0.2192


In [77]:
true_ratings = [pred.r_ui for pred in predictions]
est_ratings = [pred.est for pred in predictions]
uids = [pred.uid for pred in predictions]

In [78]:
users=list(set(uids))
print(users[0:5]) # --> users who were part of test set

['foodLover123', 'sihikahi34', 'sanjayKapoor2004', 'yumCook123', 'aditi@456']


### Recommendations for users for data they haven't seen

In [79]:
import operator
# items which the user not yet evaluate
items = trainset.build_anti_testset()
for user in users[0:5]:
    user_items = list(filter(lambda x: x[0] == user, items))
    print()
    print(user,len(user_items))
    
    # generate recommendation
    recommendations = algo.test(user_items)
    recommendations.sort(key=operator.itemgetter(3), reverse=True)
    print(f"Recommendations for {user}")
    for r in recommendations[0:5]:
        print(f" recipe{r}, [Estimated Rating] {round(r[3],3)}")


foodLover123 23
Recommendations for foodLover123
 recipeuser: foodLover123 item: R_13       r_ui = 0.84   est = 1.13   {'was_impossible': False}, [Estimated Rating] 1.128
 recipeuser: foodLover123 item: R_8        r_ui = 0.84   est = 1.07   {'was_impossible': False}, [Estimated Rating] 1.067
 recipeuser: foodLover123 item: R_31       r_ui = 0.84   est = 1.05   {'was_impossible': False}, [Estimated Rating] 1.047
 recipeuser: foodLover123 item: R_10       r_ui = 0.84   est = 1.03   {'was_impossible': False}, [Estimated Rating] 1.031
 recipeuser: foodLover123 item: R_11       r_ui = 0.84   est = 1.02   {'was_impossible': False}, [Estimated Rating] 1.015

sihikahi34 23
Recommendations for sihikahi34
 recipeuser: sihikahi34 item: R_36       r_ui = 0.84   est = 1.09   {'was_impossible': False}, [Estimated Rating] 1.088
 recipeuser: sihikahi34 item: R_7        r_ui = 0.84   est = 1.05   {'was_impossible': False}, [Estimated Rating] 1.054
 recipeuser: sihikahi34 item: R_29       r_ui = 0.84  

### Predictions for a particular user

In [80]:
all_items = trainset.all_items()
# Convert internal surprise-assigned itemIDs back to what I gave
all_items_raw = [trainset.to_raw_iid(iid) for iid in all_items]


In [81]:
# Get items the user has already interacted with
user_id = 'delishrelish564'
user_inner_id = trainset.to_inner_uid(user_id)
items_user_has_rated = set([j for (j, _) in trainset.ur[user_inner_id]])

# Recommend items the user hasn't rated
items_to_predict = [iid for iid in all_items if iid not in items_user_has_rated]

# Predict ratings
predictions = [
    (trainset.to_raw_iid(iid), algo.predict(user_id, trainset.to_raw_iid(iid)).est)
    for iid in items_to_predict
]

# Sort by predicted rating
predictions.sort(key=lambda x: x[1], reverse=True)

# Top-N Recommendations
top_n = predictions
for item, rating in top_n:
    print(f"Recommend item {item} with predicted rating {rating:.2f}")

Recommend item R_8 with predicted rating 1.10
Recommend item R_16 with predicted rating 1.05
Recommend item R_4 with predicted rating 1.02
Recommend item R_18 with predicted rating 1.02
Recommend item R_13 with predicted rating 0.99
Recommend item R_3 with predicted rating 0.95
Recommend item R_6 with predicted rating 0.94
Recommend item R_24 with predicted rating 0.93
Recommend item R_38 with predicted rating 0.91
Recommend item R_1 with predicted rating 0.89
Recommend item R_5 with predicted rating 0.89
Recommend item R_27 with predicted rating 0.88
Recommend item R_23 with predicted rating 0.87
Recommend item R_32 with predicted rating 0.86
Recommend item R_7 with predicted rating 0.86
Recommend item R_10 with predicted rating 0.85
Recommend item R_12 with predicted rating 0.84
Recommend item R_11 with predicted rating 0.81
Recommend item R_17 with predicted rating 0.81
Recommend item R_14 with predicted rating 0.76
Recommend item R_9 with predicted rating 0.69
Recommend item R_2 wi

In [82]:
algo.predict('delishrelish546', 'R_20')

Prediction(uid='delishrelish546', iid='R_20', r_ui=None, est=0.8438028169014085, details={'was_impossible': False})

### Saving the model and trainset

In [83]:
import pickle

# After training your model
with open(f'models/svd_model_{rmse}.pkl', 'wb') as f:
    pickle.dump(algo, f)

with open(f'models/trainset_{rmse}.pkl', 'wb') as f:
    pickle.dump(trainset, f)