# Model Based Collaborative Filtering Experiments

Importing libraries from surprise module for the algorithms

In [1]:
import pandas as pd
from surprise import NormalPredictor
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy
from surprise.model_selection import train_test_split


In [2]:
from surprise import SVDpp
from surprise import KNNBasic
from surprise import KNNWithMeans

Using the vegas restaurants review pandas dataframe to build the dataset 

In [3]:
df = pd.read_pickle("C:\\Users\\ineso\\FEUP-3ano\\gulbenkian-ai\\data\\vegas-restaurants\\vegas_review.pickle")
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'business_id', 'stars']], reader)

Importing the graph for later testing of the model

In [8]:
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

from group_recommender_system import *
import networkx as nx
import random

In [None]:
G = nx.read_gpickle("C:\\Users\\ineso\\FEUP-3ano\\gulbenkian-ai\\data\\vegas-restaurants\\vegas_graph_small.gpickle")

Splitting the data in a train set and a test set

In [4]:
trainset, testset = train_test_split(data, test_size=0.20)

### Single value decomposition

In [5]:
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1f3d0cc25c0>

In [12]:
predictions = model.test(testset)
accuracy.rmse(predictions)

RMSE: 1.2886


1.288601130104512

### K-Nearest Neighbors

In [6]:
import pandas as pd
import numpy as np 
import seaborn as sn 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

In [8]:
review_df = pd.read_pickle("C:\\Users\\ineso\\FEUP-3ano\\gulbenkian-ai\\data\\vegas-restaurants\\vegas_review.pickle")
rest_df = pd.read_pickle("C:\\Users\\ineso\\FEUP-3ano\\gulbenkian-ai\\data\\vegas-restaurants\\restaurant_in_vegas.pickle")

In [14]:
categories_iter = (set(c.split(", ")) for c in rest_df.categories)
categories = sorted(set.union(*categories_iter))

In [30]:
data = pd.DataFrame(np.zeros((len(rest_df), len(categories)), dtype=np.int), columns=categories)

In [31]:
ids = rest_df['business_id'].tolist()
data['business_id'] = ids
data.head(3)

Unnamed: 0,Acai Bowls,Accessories,Active Life,Acupuncture,Adult Entertainment,Afghan,African,Air Duct Cleaning,Aircraft Repairs,Airport Shuttles,...,Windshield Installation & Repair,Wine & Spirits,Wine Bars,Wineries,Women's Clothing,Wraps,Yelp Events,Yoga,Zoos,business_id
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,fnZrZlqW1Z8iWgTVDfv_MA
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,fhNf_sg-XzZ3e7HEVGuOZg
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,OWkS1FXNJbozn-qPg3LWxg


In [32]:
for index, row in rest_df.iterrows():
    for cat in row['categories'].split(", "):
        data.at[index, cat] = 1
    data.at[index, 'business_id'] = row['business_id']

31886


In [42]:
data.to_pickle("C:\\Users\\ineso\\FEUP-3ano\\gulbenkian-ai\\data\\vegas-restaurants\\rest_cat_df.pickle")

In [45]:
review_reduced = review_df.filter(['user_id', 'business_id', 'stars'])

In [49]:
review_reduced.to_pickle("C:\\Users\\ineso\\FEUP-3ano\\gulbenkian-ai\\data\\vegas-restaurants\\vegas_review_reduced.pickle")

In [50]:
small_review_reduced = review_reduced.sample(n=100000)

In [52]:
dataset = pd.merge(small_review_reduced, data, on='business_id')

In [64]:
dataset = dataset.drop_duplicates(subset=['user_id', 'business_id', 'stars'])

In [65]:
dataset = dataset.dropna()

In [67]:
dataset.to_pickle("C:\\Users\\ineso\\FEUP-3ano\\gulbenkian-ai\\data\\vegas-restaurants\\vegas_model_df.pickle")

In [68]:
train, test = train_test_split(dataset, test_size=0.2)

In [83]:
X = dataset.drop(['stars', 'business_id', 'user_id'], axis=1)

In [84]:
y = dataset['stars']

In [85]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)

In [86]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

KNeighborsClassifier()

In [87]:
y_pred = knn.predict(X_test)

In [88]:
metrics.accuracy_score(y_test, y_pred)

0.44251377368771566

### Testing on the graph

In [9]:
user = 'ld9Q7kd06mbl1kMzISS02g'
similar_users = most_similar_users(G, user)
print("Similar users: ", similar_users)

category_ratings = predict_category_ratings(G, user, similar_users)
print(category_ratings, "\n")

recommendations = set()
for u in similar_users:
    recommendations.update(recommend_restaurants(G, u, user))

for rest in recommendations:
    pred = model.predict(user, rest)
    print("*", G.nodes[rest]['name'], "-", pred.est)

Similar users:  ['N3oNEwh0qgPqPP3Em6wJXw']
{'Ethiopian': 4, 'Vegetarian': 4, 'Cafes': 4, 'Breakfast & Brunch': 3.5, 'Bagels': 4, 'Juice Bars & Smoothies': 4, 'Food': 3.5, 'Coffee & Tea': 4, 'Bakeries': 4, 'Tapas/Small Plates': 3, 'Tapas Bars': 3, 'Nightlife': 3, 'American (New)': 3, 'Beer': 3, 'Wine & Spirits': 3, 'Bars': 3, 'Wine Bars': 3, 'Lounges': 3, 'British': 3, 'Pubs': 3, 'Southern': 5, 'American (Traditional)': 5, 'Soul Food': 5} 

* M&M Soul Food Cafe - 4.317309477153939
* Queen Victoria Pub - 3.741470549869204
* Madhouse Coffee - 4.0505097112505535
* Hearthstone Kitchen & Cellar - 3.9171737161089797


In [13]:
user = 'ld9Q7kd06mbl1kMzISS02g'
similar_users = most_similar_users(G, user)
print("Similar users: ", similar_users)

category_ratings = predict_category_ratings(G, user, similar_users)
print(category_ratings, "\n")

recommendations = set()
for u in similar_users:
    recommendations.update(recommend_restaurants(G, u, user))

for rest in recommendations:
    pred = model.predict(user, rest)
    print("*", G.nodes[rest]['name'], "-", pred.est)

Similar users:  ['N3oNEwh0qgPqPP3Em6wJXw']
{'Ethiopian': 4, 'Vegetarian': 4, 'Cafes': 4, 'Breakfast & Brunch': 3.5, 'Bagels': 4, 'Juice Bars & Smoothies': 4, 'Food': 3.5, 'Coffee & Tea': 4, 'Bakeries': 4, 'Tapas/Small Plates': 3, 'Tapas Bars': 3, 'Nightlife': 3, 'American (New)': 3, 'Beer': 3, 'Wine & Spirits': 3, 'Bars': 3, 'Wine Bars': 3, 'Lounges': 3, 'British': 3, 'Pubs': 3, 'Southern': 5, 'American (Traditional)': 5, 'Soul Food': 5} 

* M&M Soul Food Cafe - 3.680951433341492
* Queen Victoria Pub - 3.45632326650157
* Madhouse Coffee - 4.21599197243397
* Hearthstone Kitchen & Cellar - 4.385042005866605
