<font size = '5' style="font-weight:bold">Recommendation Models</font>

In [21]:
%pylab inline
import warnings
warnings.filterwarnings('ignore')

Populating the interactive namespace from numpy and matplotlib


In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [23]:
# Loading the reviews dataset
reviews_frame = pd.read_csv('reviews_restaurants_text.csv')

In [24]:
import pandas as pd
import numpy as np
from numpy.linalg import norm
from sklearn.pipeline import FeatureUnion
from transformers import *
from scipy.sparse import coo_matrix

In [25]:
# Loading business and user dataset
import pandas as pd
import simplejson as json
from datetime import datetime
from sklearn.cross_validation import train_test_split

def get_data(line, columns):
    d = json.loads(line)
    return dict((key, d[key]) for key in columns)

print ('Loading user dataset started--------')
# Loading user data
columns = ('user_id', 'name','average_stars')
with open('user.json') as f:
    user_frame = pd.DataFrame(get_data(line, columns) for line in f)
user_frame = user_frame.sort_values('user_id')

print ('Loading user dataset completed--------')

print ('Loading business dataset started--------')

# Loading business data
columns = ('business_id', 'name','categories','attributes','city','stars')
with open('business.json') as f:
    business = pd.DataFrame(get_data(line, columns) for line in f)

business = business.sort_values('business_id')

# Trimming the dataset by city
business_by_city = business['city'] == "Las Vegas"
business = business[business_by_city]

# Trimming the dataset by the category 'Restaurants'
business_frame = business
count = 0
for row in business_frame.itertuples():
    count = count + 1
    if (count%5000 == 0):
        print (count)
    if 'Restaurants' not in row.categories:
        business_frame.drop([row.Index], inplace=True)
print (len(business_frame))
print ('Loading business dataset completed--------')

Loading user dataset started--------
Loading user dataset completed--------
Loading business dataset started--------
5000
10000
15000
20000
25000
5899
Loading business dataset completed--------


<font size = '5' style="font-weight:bold">Content Based Filtering Model</font>

In [17]:
# Feature Extraction
print ('Feature Extraction started---------')

encoding_category = One_Hot_Encoder('categories', 'list', sparse=False)
encoding_attribute = One_Hot_Encoder('attributes', 'dict', sparse=False)
encoding_city= One_Hot_Encoder('city', 'value', sparse=False)
rating = Column_Selector(['stars'])
encoding_union = FeatureUnion([ ('cat', encoding_category),('attr', encoding_attribute),('city', encoding_city), ('rating', rating) ])
encoding_union.fit(business_frame)

print ('Feature Extraction completed---------')

Feature Extraction started---------
Feature Extraction completed---------


In [18]:
# Generating profile of the user

user = 'tL2pS5UOmN6aAOi3Z-qFGg' 

print ('Businesses for the reviews given by the selected user-----')

reviews_given_by_user = reviews_frame.ix[reviews_frame.user_id == user]
reviews_given_by_user['stars'] = reviews_given_by_user['stars'] - float(user_frame.average_stars[user_frame.user_id == user])
reviews_given_by_user = reviews_given_by_user.sort_values('business_id')
display (reviews_given_by_user)

# list of ids of the businesses reviewed by the user
reviewed_business_id_list = reviews_given_by_user['business_id'].tolist()
reviewed_business = business_frame[business_frame['business_id'].isin(reviewed_business_id_list)]
reviewed_business = reviewed_business.sort_values('business_id')
display (reviewed_business)

print ('Profile creation started-------')

features = encoding_union.transform(reviewed_business)
profile = np.matrix(reviews_given_by_user.stars) * features

print ('Profile creation completed-------')

Businesses for the reviews given by the selected user-----


Unnamed: 0,review_id,business_id,user_id,text,stars
5,HBB5H80QrvJO9yp2K8Cnww,-P8dGzSVhJi-5oZ-8U2y0w,tL2pS5UOmN6aAOi3Z-qFGg,"b""We ate here on a recent visit to Vegas. We f...",-0.63
4102,qqsjcSouBI9vV161PAn5Rw,54WM5coZonA0izd2y04p_A,tL2pS5UOmN6aAOi3Z-qFGg,"b""We've gotten take out from here a couple of ...",0.37
9,RNLRW8dfpYF2lH75YLPGjQ,QC9bLPcNk6ymAOHf03pHhQ,tL2pS5UOmN6aAOi3Z-qFGg,b'Stopped in here recently to grab a bite on t...,0.37
8,xGYGeu0zSTqLsUlG-_h6kQ,SVGApDPNdpFlEjwRQThCxA,tL2pS5UOmN6aAOi3Z-qFGg,b'Stopped in here with my family for dinner to...,0.37
2,adNS6X4TnaxuFFxzoezGzg,So132GP_uy3XbGs0KNyzyw,tL2pS5UOmN6aAOi3Z-qFGg,"b""I've eaten here a couple of times. Great foo...",1.37
10,SFjQVy4pREFzU2jCWvAvNg,T00dMi_gQdyQkE9GKJFVSQ,tL2pS5UOmN6aAOi3Z-qFGg,"b""This one is right across the street from me....",0.37
7,8h8_hkjA76CL0WDwazRqDw,T6VKCBhHojQkCWp_IYO24w,tL2pS5UOmN6aAOi3Z-qFGg,b'Typical McDonalds. Food was typical. Employe...,-0.63
4,ClB81WvkDJwDnHN6evg84w,WzkbnhI-fxdH_tMzT3evtA,tL2pS5UOmN6aAOi3Z-qFGg,b'This is one of the best bars in town. Not ma...,1.37
3,JyOWXyxpN0PmPAF3OXkfCQ,cZBCVzd4lg_jx8IiFz-Iag,tL2pS5UOmN6aAOi3Z-qFGg,"b""Have grabbed a quick bite to eat hear many t...",0.37
1,xryg94pDLOO71veGcQINuQ,dfRAK2mgdHbL2_YsFqtCdQ,tL2pS5UOmN6aAOi3Z-qFGg,"b""I used to come here back in the 90s when thi...",-2.63


Unnamed: 0,attributes,business_id,categories,city,name,stars
118019,"{'RestaurantsTableService': True, 'GoodForMeal...",-P8dGzSVhJi-5oZ-8U2y0w,"[Diners, Buffets, Restaurants]",Las Vegas,Cannery Row Buffet,2.5
42612,"{'RestaurantsTableService': True, 'GoodForMeal...",54WM5coZonA0izd2y04p_A,"[Sandwiches, Restaurants, Barbeque]",Las Vegas,Famous Dave's Barbeque,3.5
75812,"{'RestaurantsTableService': False, 'GoodForMea...",QC9bLPcNk6ymAOHf03pHhQ,"[Restaurants, Mexican]",Las Vegas,Del Taco,3.5
105246,"{'Alcohol': 'full_bar', 'HasTV': True, 'NoiseL...",SVGApDPNdpFlEjwRQThCxA,"[Restaurants, Bars, Nightlife, Mexican]",Las Vegas,Juan's Flaming Fajitas & Cantina,4.5
132243,"{'Alcohol': 'full_bar', 'HasTV': True, 'NoiseL...",So132GP_uy3XbGs0KNyzyw,"[Restaurants, Pizza, Seafood, American (New), ...",Las Vegas,Casa Di Amore,4.5
122782,"{'RestaurantsTableService': False, 'GoodForMea...",T00dMi_gQdyQkE9GKJFVSQ,"[Mexican, Tex-Mex, Restaurants, Fast Food]",Las Vegas,Taco Bell,2.5
153255,"{'GoodForMeal': {'dessert': False, 'latenight'...",T6VKCBhHojQkCWp_IYO24w,"[Restaurants, Fast Food, Burgers]",Las Vegas,McDonald's,2.0
75343,"{'Alcohol': 'full_bar', 'HasTV': True, 'NoiseL...",WzkbnhI-fxdH_tMzT3evtA,"[Restaurants, Nightlife, Sports Bars, Karaoke,...",Las Vegas,Mr D's Sportsbar & Grill,3.5
158260,"{'RestaurantsTableService': False, 'GoodForMea...",cZBCVzd4lg_jx8IiFz-Iag,"[Burgers, Fast Food, Restaurants]",Las Vegas,Carl's Jr,2.5
18003,"{'BusinessParking': {'garage': False, 'street'...",dfRAK2mgdHbL2_YsFqtCdQ,"[Music Venues, Restaurants, Bars, Dive Bars, A...",Las Vegas,Cheyenne Saloon,3.0


Profile creation started-------
Profile creation completed-------


In [26]:
# Calculating cosine similarity of the unreviewed reviews with the user's profile
print ('Cosine similarity calculation started-----')

test_frame = business_frame[0:1000]
test_frame = test_frame.sort_values('business_id')
business_id_list = test_frame['business_id'].tolist()
features = encoding_union.transform(test_frame)
similarity = np.asarray(profile * features.T) * 1./(norm(profile) * norm(features, axis = 1))

print ('Cosine similarity calculation completed-----')

Cosine similarity calculation started-----
Cosine similarity calculation completed-----


In [54]:
# Output the recommended restaurants
index_arr = (-similarity).argsort()[:10][0][0:10]
print ('Hi ' + user_frame.name[user_frame.user_id == user].values[0] + '\nCheck out these restaurants: ')
for i in index_arr:
    resturant = business_frame[business_frame.business_id == business_id_list[i]]
    print (str(resturant['name'].values[0]))
#     display(business_frame[business_frame['name'] == str(resturant['name'].values[0])])

Hi Mark
Check out these restaurants: 
Chapalas Mexican Restaurants
Dal Toro Ristorante
Argana
Paymons Mediterranean Cafe & Hookah Lounge
Wahoo's Fish Tacos
Rhodes Ranch Club House Restaurant
Payless Pizza #2 and Ribs
Krung Siam Thai
Mr B's Bar & Grill
Andiron Steak & Sea


<font size = '5' style="font-weight:bold">User based Collaborative Filtering Model</font>

In [55]:
reviews_frame['stars'] = reviews_frame.groupby('business_id')['stars'].transform(lambda x : x - x.mean())

def get_idx(user_id): 
    global running_index
    running_index = running_index + 1
    return pd.Series(np.zeros(len(user_id)) + running_index) 

running_index = -1 
reviews_frame['user_idx'] = reviews_frame.groupby('user_id')['user_id'].transform(get_idx)

# Work in terms of sparse matrix
print ('Processing utility matrix started------')

def convert_to_sparse(group):
    ratings = coo_matrix( (np.array(group['stars']), (np.array(group['user_idx']), np.zeros(len(group)))), 
                          shape = (len(user_frame), 1) ).tocsc()
    return ratings / np.sqrt(float(ratings.T.dot(ratings).toarray()))

utility = reviews_frame.groupby('business_id')[['stars', 'user_idx']].apply(convert_to_sparse) 

print ('Processing utility matrix completed------')

# Get top recommendatiokns
print ('Generating recommendations started-----')

def cosine_similarity(v1, v2):
    return float(v1.T.dot(v2).toarray())

def get_recommended_businesses(n, business_id):
    util_to_match = utility[utility.index == business_id]
    similarity = utility.apply(lambda x: cosine_similarity(util_to_match.values[0], x))
    similarity.sort_values(ascending=False)
    return similarity[1:(n+1)]

fav_business = reviews_frame.business_id[ reviews_frame.stars[ user_frame.user_id == user ].argmax() ]

rec = pd.DataFrame(get_recommended_businesses(10, fav_business), columns=['similarity'])
rec['name'] = [ business_frame.name[ business_frame.business_id == business_id ].values[0] for business_id in rec.index]

print ('Generating recommendations completed-----')

# Output recommendation
print ('Hi ' + user_frame.name[user_frame.user_id == user].values[0] + '\nCheck out these restaurants: ')
for name in rec.name:
    print (name)

Processing utility matrix started------
Processing utility matrix completed------
Generating recommendations started-----
Generating recommendations completed-----
Hi Mark
Check out these restaurants: 
Double Play Sports Bar
Jody Maroni's Sausage Kingdom
Bavette's Steakhouse & Bar
Red Ginseng Narita Sushi & BBQ
Michael Mina
Sin City Thai Restaurant
Fresh Buffet
Smashburger
Ichi Ramen House
Woo Chun Korean BBQ
