Make predictions

## Surprise

In [2]:
import numpy as np
import pandas as pd
from scipy import sparse
import sys
from surprise import Dataset, Reader
from surprise import KNNBasic, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise.dataset import DatasetAutoFolds

In [3]:
df = pd.read_csv('/home/suppra/Desktop/GourmetGirls/data/user_recipe_interactions.csv')
print(len(df))
print(len(df['user_id'].unique().tolist()))
print(len(df['recipe_id'].unique().tolist()))

95
19
28


In [4]:
reader = Reader(rating_scale=(0,10)) # rating scale range
data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)
print(type(data))

<class 'surprise.dataset.DatasetAutoFolds'>


In [5]:
# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.25)
print(type(trainset))

<class 'surprise.trainset.Trainset'>


In [6]:
import itertools

for uid, iid, rating in itertools.islice(trainset.all_ratings(), 5):
    print(f"User {uid} rated item {iid} with a rating of {rating}")

print()
for uid, iid, rating in testset[:5]:
    print(f"User {uid} rated item {iid} with a rating of {rating}")
    
print()    
print(trainset.n_ratings,len(testset))

User 0 rated item 0 with a rating of 0.85
User 0 rated item 9 with a rating of 0.77
User 0 rated item 12 with a rating of 0.85
User 1 rated item 1 with a rating of 1.0
User 1 rated item 8 with a rating of 1.0

User yumCook123 rated item R_1 with a rating of 0.46
User food4lyf rated item R_33 with a rating of 0.47
User foodLover123 rated item R_36 with a rating of 1.0
User g0rgeMehigan rated item R_15 with a rating of 0.46
User suparna@123 rated item R_2 with a rating of 0.85

71 24


In [7]:
# Use the SVD algorithm to train the model
#algo = KNNBasic()
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x78749c527140>

In [8]:
# Test the model on the testing set
predictions = algo.test(testset)

### est should be as close to r_ui

In [9]:
for prediction in predictions[0:5]:
    print(prediction)

user: yumCook123 item: R_1        r_ui = 0.46   est = 0.59   {'was_impossible': False}
user: food4lyf   item: R_33       r_ui = 0.47   est = 0.73   {'was_impossible': False}
user: foodLover123 item: R_36       r_ui = 1.00   est = 0.93   {'was_impossible': False}
user: g0rgeMehigan item: R_15       r_ui = 0.46   est = 0.64   {'was_impossible': False}
user: suparna@123 item: R_2        r_ui = 0.85   est = 0.66   {'was_impossible': False}


In [10]:
# Print the performance metrics
rmse = round(accuracy.rmse(predictions), 2)


RMSE: 0.1661


In [11]:
true_ratings = [pred.r_ui for pred in predictions]
est_ratings = [pred.est for pred in predictions]
uids = [pred.uid for pred in predictions]

In [12]:
users=list(set(uids))
print(users[0:5]) # --> users who were part of test set

['g0rgeMehigan', 'gourmetGal', 'burpista@89', 'suparna@123', 'marcoPierre']


### Recommendations for users for data they haven't seen

In [13]:
import operator
# items which the user not yet evaluate
items = trainset.build_anti_testset()
for user in users[0:5]:
    user_items = list(filter(lambda x: x[0] == user, items))
    print()
    print(user,len(user_items))
    
    # generate recommendation
    recommendations = algo.test(user_items)
    recommendations.sort(key=operator.itemgetter(3), reverse=True)
    print(f"Recommendations for {user}")
    for r in recommendations[0:5]:
        print(f" recipe{r}, [Estimated Rating] {round(r[3],3)}")


g0rgeMehigan 23
Recommendations for g0rgeMehigan
 recipeuser: g0rgeMehigan item: R_13       r_ui = 0.85   est = 1.02   {'was_impossible': False}, [Estimated Rating] 1.024
 recipeuser: g0rgeMehigan item: R_27       r_ui = 0.85   est = 0.97   {'was_impossible': False}, [Estimated Rating] 0.966
 recipeuser: g0rgeMehigan item: R_36       r_ui = 0.85   est = 0.91   {'was_impossible': False}, [Estimated Rating] 0.908
 recipeuser: g0rgeMehigan item: R_7        r_ui = 0.85   est = 0.90   {'was_impossible': False}, [Estimated Rating] 0.898
 recipeuser: g0rgeMehigan item: R_40       r_ui = 0.85   est = 0.84   {'was_impossible': False}, [Estimated Rating] 0.837

gourmetGal 24
Recommendations for gourmetGal
 recipeuser: gourmetGal item: R_31       r_ui = 0.85   est = 1.01   {'was_impossible': False}, [Estimated Rating] 1.01
 recipeuser: gourmetGal item: R_33       r_ui = 0.85   est = 1.00   {'was_impossible': False}, [Estimated Rating] 0.999
 recipeuser: gourmetGal item: R_18       r_ui = 0.85   

### Predictions for a particular user

In [14]:
all_items = trainset.all_items()
# Convert internal surprise-assigned itemIDs back to what I gave
all_items_raw = [trainset.to_raw_iid(iid) for iid in all_items]


In [15]:
# Get items the user has already interacted with
user_id = 'delishrelish564'
user_inner_id = trainset.to_inner_uid(user_id)
items_user_has_rated = set([j for (j, _) in trainset.ur[user_inner_id]])

# Recommend items the user hasn't rated
items_to_predict = [iid for iid in all_items if iid not in items_user_has_rated]

# Predict ratings
predictions = [
    (trainset.to_raw_iid(iid), algo.predict(user_id, trainset.to_raw_iid(iid)).est)
    for iid in items_to_predict
]

# Sort by predicted rating
predictions.sort(key=lambda x: x[1], reverse=True)

# Top-N Recommendations
top_n = predictions
for item, rating in top_n:
    print(f"Recommend item {item} with predicted rating {rating:.2f}")

Recommend item R_24 with predicted rating 1.16
Recommend item R_27 with predicted rating 1.08
Recommend item R_17 with predicted rating 1.05
Recommend item R_1 with predicted rating 1.01
Recommend item R_23 with predicted rating 0.99
Recommend item R_16 with predicted rating 0.99
Recommend item R_8 with predicted rating 0.95
Recommend item R_32 with predicted rating 0.94
Recommend item R_38 with predicted rating 0.93
Recommend item R_4 with predicted rating 0.92
Recommend item R_10 with predicted rating 0.91
Recommend item R_3 with predicted rating 0.91
Recommend item R_11 with predicted rating 0.89
Recommend item R_15 with predicted rating 0.89
Recommend item R_13 with predicted rating 0.89
Recommend item R_12 with predicted rating 0.88
Recommend item R_2 with predicted rating 0.81
Recommend item R_7 with predicted rating 0.81
Recommend item R_9 with predicted rating 0.78
Recommend item R_18 with predicted rating 0.72
Recommend item R_14 with predicted rating 0.65


In [16]:
algo.predict('delishrelish546', 'R_20')

Prediction(uid='delishrelish546', iid='R_20', r_ui=None, est=0.8453521126760564, details={'was_impossible': False})

### Saving the model and trainset

In [18]:
import pickle

# After training your model
with open(f'models/svd_model_{str(rmse)[2:]}.pkl', 'wb') as f:
    pickle.dump(algo, f)

with open(f'models/trainset_{str(rmse)[2:]}.pkl', 'wb') as f:
    pickle.dump(trainset, f)