# Recommender System

The recommender system is aimed to help restaurant owners study their top competitors based on their profiles on Yelp.

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
data_org = pd.read_csv('vegas_cuisine.csv')
data_org.shape

(3779, 422)

We pick these following columns as important columns based on key features ranked by our Decision Tree and Logistic Regression models.

In [9]:
important_cols = ['org_index','name','postal_code','latitude','longitude','stars','review_count',
                  'asian','european','namerica','samerica','other',              
'BusinessParking_street',
 'BusinessParking_validated',
 'BusinessParking_valet',
 'class',
 'Arts & Entertainment',
 'Asian Fusion',
 'Bakeries',
 'Barbeque',
 'Bars',
 'Beer',
 'Breakfast & Brunch',
 'Buffets',
 'Cafes',
 'Casinos',
 'Caterers',
 'Cocktail Bars',
 'Coffee & Tea',
 'Desserts',
 'Diners',
 'Event Planning & Services',
 'Food',
 'Food Delivery Services',
 'Food Stands',
 'Food Trucks',
 'Gluten-Free',
 'Grocery',
 'Hawaiian',
 'Ice Cream & Frozen Yogurt',
 'Juice Bars & Smoothies',
 'Lounges',
 'Nightlife',
 'Pubs',
 'Restaurants',
 'Salad',
 'Seafood',
 'Shopping',
 'Soup',
 'Specialty Food',
 'Sports Bars',
 'Steakhouses',
 'Sushi Bars']

In [10]:
data = data_org[important_cols]
data = data.fillna(data.mean())

data[data['asian'] == True]['name']

In [42]:
def get_title_from_index(data,index):
    name = data[data.index == index]["name"].values
#     if type(name) == list:
#         name = name[0]
    return name

def get_index_from_title(data, name):
    idx = data[data.name == name].index
    if len(idx) > 0:
        idx = idx[0]
    return idx

In [51]:
id = get_title_from_index(data,0)
id

array(['Subway'], dtype=object)

### First version
The first version of the recommender system will output 10 competing restaurants that are found to be the most similar to our target restaurant.

In [75]:
#Get a list of similar restaurants (given a target restaurant) in descending order of similarity score
#1 only look at restaurants of the same category
cuisine_list = ['asian','namerica','samerica','european']

def top_similar_res(data):
    #Get user's input - a name of a restaurant. Here we assume that the restaurant has a Yelp profile and is in our dataset
    title = str(input('Enter a name of a restaurant: ')) 
    
    #Get the cuisine of the restaurant
    cuisine = str(input('Please specify the cuisine type (choose among these following options: asian, namerica, samerica or european): '))
    
    #Print confirmation
    print('The selected restaurant and cuisine type are: '+ title +', '+cuisine)
    
    #Get index of a restaurant from its name 
    res_index = int(get_index_from_title(data,title))
    
    #only compare the restaurants with others of the same cuisine
    datause = data[data[cuisine] ==True]
    
    for i in cuisine_list:
        if i != cuisine:
            datause = datause[datause[i] == False]
    datause.reset_index(inplace = True) 
            
    print('There are ' + str(datause.shape[0]) + ' restaurants of the same cuisine')
    res_index = int(get_index_from_title(data,title))
        
    #Select features and compute cosine sim matrix
    features = datause.columns.tolist()
    features.remove('name')
    features.remove('org_index')
    
    dfuse = np.matrix(datause[features])
    cosine_sim = cosine_similarity(dfuse)
    similar_res =  list(enumerate(cosine_sim[res_index]))
    sorted_similar_res = sorted(similar_res,key=lambda x:x[1],reverse=True)

    # Print
    print(' ')
    print('List of top 10 competitors: ')
    i=0
    for element in sorted_similar_res:
        title_sim = get_title_from_index(datause, element[0])
        stars = datause['stars'][element[0]]
        review = datause['review_count'][element[0]]
        
        if title_sim != title:
            print(title_sim, stars, review)
        i=i+1
        if i>10:
            break

In [76]:
top_similar_res(data)

Enter a name of a restaurant: Subway
Please specify the cuisine type (choose among these following options: asian, namerica, samerica or european): namerica
The selected restaurant and cuisine type are: Subway, namerica
There are 1775 restaurants of the same cuisine
 
List of top 10 competitors: 
['Popeyes Louisiana Kitchen'] 1.5 16
['Pizza Hut'] 2.5 19
['Metro Pizza'] 2.5 21
["Domino's Pizza"] 2.5 27
['Fatburger'] 2.5 22
['KFC'] 2.0 21
["Charlie's Market Buffet"] 2.5 15
['Taste My Love'] 4.5 11
['Ace Of Steaks'] 4.5 35
['Jack in the Box'] 3.0 33


Based on this recommender system, the restaurants that are most similar to Subway is Popeyes Louisiana Kitchen, Pizza Hut and Metro Pizza. This makes sense since they all serve fast food.

### Second Version
The second version of the restaurant will ask users how many competitors they want to examine. It will output that number of competitors if the competitors are beating our target restaurant in certain aspects (either in star rating or review counts)

In [82]:
#Get a list of similar restaurants (given a target restaurant) in descending order of similarity score
#1 only look at restaurants of the same category
def get_stars_reviewcount_from_index(data,index):
    star = data[data.index == index]["stars"].values
    review = data[data.index == index]["review_count"].values
    return star, review

def top_similar_res(data):
    #Get user's input - a name of a restaurant. Here we assume that the restaurant has a Yelp profile and is in our dataset
    title = str(input('Enter a name of a restaurant: ')) 
    
    #Get the cuisine of the restaurant
    cuisine = str(input('Please specify the cuisine type (choose among these following options: asian, namerica, samerica or european): '))
    
    #Print confirmation
    print('The selected restaurant and cuisine type are: '+ title +', '+cuisine)
    
    #Get the number of competitors users want to see
    numofcomp = int(input('Enter the number of competitors use want to see: '))
    
    #Get index of a restaurant from its name 
    res_index = int(get_index_from_title(data,title))
    
    #Get the stars and review counts of the input restaurant
    input_star, input_review = get_stars_reviewcount_from_index(data,res_index)
    print('Rating star and number of reviews for '+ title +' are: '+ str(input_star) + ', ' + str(input_review))
        
    #Get index of a restaurant from its name
    res_index = int(get_index_from_title(data,title))
    print('The selected restaurant and cuisine type are: '+ title +', '+cuisine)
    
    #only compare the restaurants with others of the same cuisine
    datause = data[data[cuisine] ==True]
    
    for i in cuisine_list:
        if i != cuisine:
            datause = datause[datause[i] == False]
    datause.reset_index(inplace = True) 
            
    print('There are ' + str(datause.shape[0]) + ' restaurants of the same cuisine')
        
    #Select features and compute cosine sim matrix
    features = datause.columns.tolist()
    features.remove('name')
    features.remove('org_index')
    
    dfuse = np.matrix(datause[features])
    cosine_sim = cosine_similarity(dfuse)
    similar_res =  list(enumerate(cosine_sim[res_index]))
    sorted_similar_res = sorted(similar_res,key=lambda x:x[1],reverse=True)

    # Print
    print(' ')
    print('List of top 10 competitors: ')
    i=0
    for element in sorted_similar_res:
        title_sim = get_title_from_index(datause, element[0])
        stars = datause['stars'][element[0]]
        review = datause['review_count'][element[0]]
        if title_sim != title:
            if stars > input_star or review > input_review:
                print(title_sim, stars, review)
                i=i+1
        if i> numofcomp:
            break

In [83]:
top_similar_res(data)

Enter a name of a restaurant: Subway
Please specify the cuisine type (choose among these following options: asian, namerica, samerica or european): namerica
The selected restaurant and cuisine type are: Subway, namerica
Enter the number of competitors use want to see: 15
Rating star and number of reviews for Subway are: [2.5], [13]
The selected restaurant and cuisine type are: Subway, namerica
There are 1775 restaurants of the same cuisine
 
List of top 10 competitors: 
['Popeyes Louisiana Kitchen'] 1.5 16
['Pizza Hut'] 2.5 19
['Metro Pizza'] 2.5 21
["Domino's Pizza"] 2.5 27
['Fatburger'] 2.5 22
['KFC'] 2.0 21
["Charlie's Market Buffet"] 2.5 15
['Taste My Love'] 4.5 11
['Ace Of Steaks'] 4.5 35
['Jack in the Box'] 3.0 33
['Bagels And Subs'] 4.5 21
['Port of Subs'] 3.0 15
["Jason's Deli"] 3.5 47
['Popeyes Louisiana Kitchen'] 2.0 56
['Pizza Hut'] 2.5 25
['Checkers'] 2.0 59
