In [45]:
import sys
sys.path.append('modules')
import pandas as pd
import numpy as np
import random 
import pickle

from recommend import *
from evaluate import *
from similarity import Similarity
from preference_generation import *
from present_score_generation import *
from user_restaurant_matrix import *

from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_rows = 4000




Read in the user, business and review files <br>
Initialise the lists of valid user and valid_business

In [41]:
user_df=pd.read_csv('Data/'+'yelp_academic_dataset_user_pruned.csv')
business=pd.read_csv('Data/'+'yelp_academic_dataset_business_pruned.csv')
review_df=pd.read_csv('Data/'+'yelp_academic_dataset_review_pruned.csv')

valid_users=user_df['user_id'].unique()
valid_businesses=business['business_id'].unique()

Drop all attributes that are not relevant

In [24]:
business_names=business['name']
attrs=[i for i in (business.columns) if business[i].dtype=='object']+['latitude','longitude','stars','review_count']
attrs.remove('business_id')
business_df=business.drop(attrs,axis=1)

Generate a k-user subset representing the group is users for whom the recommendation is to be made <br>
Configure k as required

In [13]:
k=3
user_group=random.sample(list(valid_users),k)

Generate the preference (n_user x n_businesses) score matrix

In [5]:
preference_df=get_preferences(user_df,review_df,business_df)

Generate the (k x n_businesses) present score matrix 

In [None]:
present_score=get_present_score_mat(user_df,review_df,business_df,preference_df,user_group)

Generate the restaurant-similarity matrix

In [26]:
mapping=dict(zip(business_df.business_id,range(0,len(business_df))))
restaurant_similarity=Similarity(cosine_similarity(business_df.drop('business_id',axis=1)),mapping)

Generate the (k x n_business) imputed ratings

In [27]:
imputed_ratings=make_user_rest_matrix(present_score,business_df,user_group,restaurant_similarity)

In [30]:
business_df=business_df.drop('business_id',axis=1) #No longer required

Compute the aggregate score for each restaurant

In [31]:
top_pick=aggregate_scores(imputed_ratings)
top_pick_business=business_df.iloc[top_pick]

Specify the constraints and synthesise an ideal restuarant that satsifies these constraints

In [32]:
constraints={'attributes.DogsAllowed':1,'attributes.DietaryRestrictions.halal':1,'attributes.DietaryRestrictions.vegan':0}
ideal_restaurant=get_ideal_restaurant(constraints,top_pick_business)

Specify the number of recommendations to be returned (i.e. top n recommendations) <br>
Compute the cosine similarity of the synthesised restaurants to all other restaurants in teh dataset and choose the n most similar of them

In [33]:
n=10
ideal_similarity=list(cosine_similarity(ideal_restaurant.reshape(1, -1),business_df)[0])
top_k=list(np.argsort(ideal_similarity))
top_k.reverse()
top_k=top_k[0:k]

Compute the Least Satisfaction and Average Satisfaction criterion for the k recommended restaurants

In [None]:
least=[]
avg=[]
for i in top_k:
    least.append(least_satisfaction(preference_df.loc[preference_df.user_id.isin(user_group)].drop('user_id',axis=1),business_df.iloc[i]))
    avg.append(avg_satisfaction(preference_df.loc[preference_df.user_id.isin(user_group)].drop('user_id',axis=1),business_df.iloc[i]))
print("Least Satisfaction Scores:")
print(least)
print("Average Satisfaction Scores:")
print(avg)


Sort the recommended restaurants based on the least satisfaction score <br>
Display the results based on least satisfaction

In [None]:
for i in np.flip(np.argsort(least)):
    print(business_names[top_k[i]])

Sort the recommended restaurants based on the average satisfaction score <br>
Display the results based on average satisfaction

In [None]:
for i in np.flip(np.argsort(avg)):
    print(business_names[top_k[i]])