# Restaurant Recommender System SVD

In [9]:
# import libraries
from collections import defaultdict

import pandas as pd
import numpy as np
from surprise.model_selection import train_test_split
from surprise import SVD, SVDpp
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise.reader import Reader
from surprise import accuracy

In [10]:
# import data 

ratings = pd.read_csv('Desktop/pmf.csv')
ratings.head()

Unnamed: 0,Gender,Age,LatestRestaurants,Top3Fav,Top3Unfav,Another Hound,Four season,Town House,Vivarium,MCDONALD'S,...,Tudari,Bonchon,Veganarie,Viet cuisine,La Creperie,Piri Piri flaming grill,Kloset,Boon Tong Kee,Coffee Beans by Dao,Suki Masa
0,หญิง,19-25 ปี,"Another Hound, MCDONALD'S, Burger King, Subway...","Another Hound, แหลมเจริญซีฟู้ด, Bankara Ramen","Chabuton, Sabotan, Sushi Den",5.0,3.0,3.0,3.0,5.0,...,,4.0,,,,,,,,
1,หญิง,19-25 ปี,"Four Season, Subway, Tacobell, Evaime Shabu, V...","Four Season, แหลมเจริญซีฟู้ด, Veganarie","MCDONALD'S, KFC, ฝ้ายซอคำ",4.0,5.0,,,1.0,...,,3.0,5.0,4.0,4.0,,2.0,,,
2,หญิง,19-25 ปี,"แหลมเจริญซีฟู้ด, Ootoya, บ้านไอซ์, Veganarie, ...","Ootoya, Veganarie, Viet cuisine","KFC, Oishi Grand Buffet, Wa Q Yakiniku",4.0,3.0,,,3.0,...,,3.0,5.0,5.0,3.0,,,4.0,,
3,หญิง,19-25 ปี,"Mos Burger, Subway, Ootoya, รสนิยม, Viet cuisine","Subway, แหลมเจริญซีฟู้ด, Viet cuisine","Oishi Grand Buffet, Crab & Claw, เตี๋ยวตาโต",4.0,4.0,3.0,3.0,3.0,...,3.0,4.0,4.0,4.0,3.0,3.0,3.0,4.0,,
4,ชาย,19-25 ปี,"MCDONALD'S, COCO ichibanya, Ootoya, ตะลิงปลิง,...","MCDONALD'S, COCO ichibanya, เนื้อคู่","MK Gold Restaurant, Oishi Grand Buffet, Scala ...",4.0,3.0,3.0,3.0,4.0,...,3.0,4.0,,4.0,,4.0,,4.0,,


In [11]:
original_ratings = ratings

ratings = ratings.drop(['Gender','Age','LatestRestaurants','Top3Fav','Top3Unfav'], axis=1)

ratings.head()

Unnamed: 0,Another Hound,Four season,Town House,Vivarium,MCDONALD'S,Burger King,KFC,Mos Burger,Subway,Tacobell,...,Tudari,Bonchon,Veganarie,Viet cuisine,La Creperie,Piri Piri flaming grill,Kloset,Boon Tong Kee,Coffee Beans by Dao,Suki Masa
0,5.0,3.0,3.0,3.0,5.0,5.0,5.0,3.0,5.0,4.0,...,,4.0,,,,,,,,
1,4.0,5.0,,,1.0,2.0,1.0,,4.0,4.0,...,,3.0,5.0,4.0,4.0,,2.0,,,
2,4.0,3.0,,,3.0,3.0,2.0,,4.0,4.0,...,,3.0,5.0,5.0,3.0,,,4.0,,
3,4.0,4.0,3.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,...,3.0,4.0,4.0,4.0,3.0,3.0,3.0,4.0,,
4,4.0,3.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,...,3.0,4.0,,4.0,,4.0,,4.0,,


In [12]:
ratings_df = ratings.stack().reset_index()
ratings_df.columns = ['userId','Restaurant','ratings']
ratings_df

Unnamed: 0,userId,Restaurant,ratings
0,0,Another Hound,5.0
1,0,Four season,3.0
2,0,Town House,3.0
3,0,Vivarium,3.0
4,0,MCDONALD'S,5.0
...,...,...,...
16391,496,Veganarie,5.0
16392,496,Viet cuisine,4.0
16393,496,Piri Piri flaming grill,3.0
16394,496,Boon Tong Kee,3.0


## SVD instance

In [13]:
# create Surpirse.Dataset from pandas.DataFrame
dataset = Dataset.load_from_df(ratings_df[['userId', 'Restaurant', 'ratings']], Reader())
trainset = dataset.build_full_trainset()

algo = SVD(random_state=0)
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x10ff9ca00>

In [14]:
def get_user_prediction(uid, model):
    return [model.predict(uid, res) for res in ratings.columns]

def get_all_users_predictions(model):
    return [preds for uid in ratings.index for preds in get_user_prediction(uid, model)]

def get_top_n(predictions, n=10):
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

def recommend_new_restaurant(uid, n=5):
    global algo
    predictions = get_all_users_predictions(algo)
    top_n = get_top_n(predictions, n=n*3)[uid]
    already_tried_restaurant = set(ratings_df[ratings_df["userId"] == uid]["Restaurant"].unique())
    filtered_top_n = [x[0] for x in top_n if x[0] not in already_tried_restaurant][:n]
    return filtered_top_n

def update_data(uid, restaurant_name, rating):
    global ratings_df, algo
    ratings_df = ratings_df.append({"userId": uid, "Restaurant": restaurant_name, "ratings": rating}, ignore_index=True)
    dataset = Dataset.load_from_df(ratings_df[['userId', 'Restaurant', 'ratings']], Reader())
    trainset = dataset.build_full_trainset()

    algo.fit(trainset)

def similar_user(uid, n=5):
    n_similar_users_with_scores = sorted(enumerate(algo.compute_similarities()[uid]), key=lambda x:-x[1])[:n]
    return [u[0] for u in n_similar_users_with_scores]

In [50]:
top3_actual = original_ratings["Top3Fav"].apply(lambda x: np.array(x.split(", "))).values

In [45]:
def my_loss(x, y):
    return 1.0 - len(set(x).intersection(set(y)))/len(x)

## Baseline prediction(random)

In [15]:
from surprise import NormalPredictor

In [16]:
random_algo = NormalPredictor()

In [17]:
random_algo.fit(trainset)

<surprise.prediction_algorithms.random_pred.NormalPredictor at 0x10fcf3850>

In [18]:
random_top_3 = get_top_n(get_all_users_predictions(random_algo), n=3)
random_top3_preds = np.array([[rec[0] for rec in random_top_3[i]] for i in original_ratings.index])

In [24]:
random_top3_preds

array([['KFC\xa0', 'Saboten', 'Scoozi Pizza'],
       ['Bankara Ramen', 'Ootoya', 'แสนแซ่บ'],
       ['Chilli Thai Restaurant', 'ฝ้ายซอคำ', 'ทองสมิทธิ์'],
       ...,
       ['AOI', 'ตะลิงปลิง', 'เตี๋ยวตาโต'],
       ['Vivarium', 'MK Gold Restaurant', 'Crab & Claw'],
       ['แหลมเจริญซีฟู๊ด', 'Scoozi Pizza', 'Saboten']], dtype='<U32')

In [79]:
random_loss_acc = []
for i in range(len(top3_actual)):
    random_loss_acc.append(my_loss(top3_actual[i], random_top3_preds[i]))

In [80]:
np.mean(random_loss_acc)

0.9537223340040242

## Baseline prediction(popular)

In [37]:
popular_top3 = ratings_df.groupby("Restaurant").mean()["ratings"].sort_values(ascending=False)[:3].keys().to_numpy().astype("<U24")

In [46]:
popular_loss_acc = []
for i in range(len(top3_actual)):
    popular_loss_acc.append(my_loss(top3_actual[i], popular_top3))

In [47]:
np.mean(popular_loss_acc)

0.8819584171696847

## Multi-label classification evaluation



In [48]:
original_ratings.head()

Unnamed: 0,Gender,Age,LatestRestaurants,Top3Fav,Top3Unfav,Another Hound,Four season,Town House,Vivarium,MCDONALD'S,...,Tudari,Bonchon,Veganarie,Viet cuisine,La Creperie,Piri Piri flaming grill,Kloset,Boon Tong Kee,Coffee Beans by Dao,Suki Masa
0,หญิง,19-25 ปี,"Another Hound, MCDONALD'S, Burger King, Subway...","Another Hound, แหลมเจริญซีฟู้ด, Bankara Ramen","Chabuton, Sabotan, Sushi Den",5.0,3.0,3.0,3.0,5.0,...,,4.0,,,,,,,,
1,หญิง,19-25 ปี,"Four Season, Subway, Tacobell, Evaime Shabu, V...","Four Season, แหลมเจริญซีฟู้ด, Veganarie","MCDONALD'S, KFC, ฝ้ายซอคำ",4.0,5.0,,,1.0,...,,3.0,5.0,4.0,4.0,,2.0,,,
2,หญิง,19-25 ปี,"แหลมเจริญซีฟู้ด, Ootoya, บ้านไอซ์, Veganarie, ...","Ootoya, Veganarie, Viet cuisine","KFC, Oishi Grand Buffet, Wa Q Yakiniku",4.0,3.0,,,3.0,...,,3.0,5.0,5.0,3.0,,,4.0,,
3,หญิง,19-25 ปี,"Mos Burger, Subway, Ootoya, รสนิยม, Viet cuisine","Subway, แหลมเจริญซีฟู้ด, Viet cuisine","Oishi Grand Buffet, Crab & Claw, เตี๋ยวตาโต",4.0,4.0,3.0,3.0,3.0,...,3.0,4.0,4.0,4.0,3.0,3.0,3.0,4.0,,
4,ชาย,19-25 ปี,"MCDONALD'S, COCO ichibanya, Ootoya, ตะลิงปลิง,...","MCDONALD'S, COCO ichibanya, เนื้อคู่","MK Gold Restaurant, Oishi Grand Buffet, Scala ...",4.0,3.0,3.0,3.0,4.0,...,3.0,4.0,,4.0,,4.0,,4.0,,


In [49]:
top_3 = get_top_n(get_all_users_predictions(algo), n=3)
top3_preds = np.array([[rec[0] for rec in top_3[i]] for i in original_ratings.index])

In [53]:
top3_preds

array([['COCO ichibanya', 'KFC\xa0', 'Burger King'],
       ['Another Hound', 'Coffee Beans by Dao', 'Veganarie'],
       ['Veganarie', 'Another Hound', 'Nara'],
       ...,
       ['Burger King', 'Mos Burger', 'KFC\xa0'],
       ['Burger King', "MCDONALD'S", 'COCO ichibanya'],
       ['Veganarie', 'Another Hound', 'Nara']], dtype='<U24')

In [56]:
loss_acc = []
for i in range(len(top3_actual)):
    loss_acc.append(my_loss(top3_actual[i], top3_preds[i]))
    # hammer_loss from sklearn current'y doesn't support multi-label for now,
    # so we need to do it one by one for each user

In [57]:
np.mean(loss_acc)

0.6525821596244132

In [19]:
min(loss_acc)
# This means there's some user that get recommented with 3 restaurants that are in their fav list

0.0

## Update data (intended to be used by new user)

In [20]:
update_data(1, "Vivarium", 5.0)

In [21]:
get_user_prediction(1, algo)
# see the result, Vivarium's data is updated for user 1

[Prediction(uid=1, iid='Another Hound', r_ui=None, est=4.774424609547722, details={'was_impossible': False}),
 Prediction(uid=1, iid='Four season', r_ui=None, est=4.235746953919358, details={'was_impossible': False}),
 Prediction(uid=1, iid='Town House', r_ui=None, est=3.8194073929836687, details={'was_impossible': False}),
 Prediction(uid=1, iid='Vivarium', r_ui=None, est=3.978936874763135, details={'was_impossible': False}),
 Prediction(uid=1, iid="MCDONALD'S", r_ui=None, est=1.6847269196342947, details={'was_impossible': False}),
 Prediction(uid=1, iid='Burger King', r_ui=None, est=2.1660495919280844, details={'was_impossible': False}),
 Prediction(uid=1, iid='KFC\xa0', r_ui=None, est=1.764251940112009, details={'was_impossible': False}),
 Prediction(uid=1, iid='Mos Burger', r_ui=None, est=2.8020967490751705, details={'was_impossible': False}),
 Prediction(uid=1, iid='Subway', r_ui=None, est=3.310027168993658, details={'was_impossible': False}),
 Prediction(uid=1, iid='Tacobell', r_

## Get similar user

In [22]:
similar_user(1, n=7)

[1, 193, 264, 196, 425, 254, 433]

## Recommend new restaurant for given user

In [23]:
recommend_new_restaurant(1, n=3)

['Coffee Beans by Dao', 'ลูกไก่ทอง', 'Nara']