In [1]:
import time
import json
import re
import os 
import tensorflow as tf
import pandas as pd
import numpy as np
from itertools import compress
import matplotlib.pyplot as plt

In [2]:
def json_to_csv(directory, fileNames, createSample=False):
    """
    json_to_csv: loops through specified JSON files and converts them to csv files.
                 option to also create a sample csv, which uses np.random.seed 9001 to create a sample dataset with 10% of the observations
    
                 pandas has a read_json function, but returns a 'Trailing data error' when working with these specific files
                 
                 
    Inputs: -directory of JSON files
            -list of JSON filenames
            -createSample flag
            
    """
    
    start = time.time()

    jsonData = []

    for fileName in fileNames:
        with open(directory + fileName,  encoding="utf8") as file:
            print('{0} opened'.format(fileName))
            for line in file:
                #I use an rstrip here because some of the files have trailing blank spaces
                jsonData.append(json.loads(line.rstrip()))
        
        df = pd.DataFrame.from_dict(jsonData)
        
        csvFileName = fileName[:len(fileName)-5] + '.csv'
        
        df.to_csv(directory + csvFileName)
        print('{0} created'.format(csvFileName))
        
        
        if createSample:
            np.random.seed(9001)
            msk = np.random.rand(len(df)) <= 0.1
            sample = df[msk]
            
            csvSampleFileName = fileName[:len(fileName)-5] + '_sample.csv'
            
            sample.to_csv(directory + csvSampleFileName)
            print('{0} created'.format(csvSampleFileName))
        
    print('This function took {} minutes to run'.format((time.time()-start)/60))
    

In [3]:
# fileNameList = ['user.json',
#                 'business.json',
#                 'review.json']

# json_to_csv('data/', fileNameList, createSample=True)

In [4]:
df_business = pd.read_json('data/business.json', lines=True)
df_business.dropna(inplace=True, subset = ['categories'], axis=0)
df_business.loc[df_business['categories'].str.contains('Restaurants')]
df_business['categories'].value_counts()

Restaurants, Pizza                                                                                                                                 1042
Nail Salons, Beauty & Spas                                                                                                                         1031
Pizza, Restaurants                                                                                                                                  993
Beauty & Spas, Nail Salons                                                                                                                          947
Food, Coffee & Tea                                                                                                                                  888
Mexican, Restaurants                                                                                                                                885
Coffee & Tea, Food                                                                      

In [5]:
df_business.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
0,2818 E Camino Acequia Drive,{'GoodForKids': 'False'},1SWheh84yJXfytovILXOAQ,"Golf, Active Life",Phoenix,,0,33.522143,-112.018481,Arizona Biltmore Golf Club,85016,5,3.0,AZ
1,30 Eglinton Avenue W,"{'RestaurantsReservations': 'True', 'GoodForMe...",QXAEGFB4oINsVuTFxEYKFQ,"Specialty Food, Restaurants, Dim Sum, Imported...",Mississauga,"{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",1,43.605499,-79.652289,Emerald Chinese Restaurant,L5R 3E7,128,2.5,ON
2,"10110 Johnston Rd, Ste 15","{'GoodForKids': 'True', 'NoiseLevel': 'u'avera...",gnKjwL_1w79qoiV3IC_xQQ,"Sushi Bars, Restaurants, Japanese",Charlotte,"{'Monday': '17:30-21:30', 'Wednesday': '17:30-...",1,35.092564,-80.859132,Musashi Japanese Restaurant,28210,170,4.0,NC
3,"15655 W Roosevelt St, Ste 237",,xvX2CttrVhyG2z1dFg_0xw,"Insurance, Financial Services",Goodyear,"{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ...",1,33.455613,-112.395596,Farmers Insurance - Paul Lorenz,85338,3,5.0,AZ
4,"4209 Stuart Andrew Blvd, Ste F","{'BusinessAcceptsBitcoin': 'False', 'ByAppoint...",HhyxOkGAM07SRYtlQ4wMFQ,"Plumbing, Shopping, Local Services, Home Servi...",Charlotte,"{'Monday': '7:0-23:0', 'Tuesday': '7:0-23:0', ...",1,35.190012,-80.887223,Queen City Plumbing,28217,4,4.0,NC


In [6]:
df_business.count()

address         192127
attributes      163773
business_id     192127
categories      192127
city            192127
hours           147778
is_open         192127
latitude        192127
longitude       192127
name            192127
postal_code     192127
review_count    192127
stars           192127
state           192127
dtype: int64

In [7]:
# reading filter_review 
df_review = pd.read_csv('data/filtered_reviews.csv', index_col=0)
df_review.dropna

  mask |= (ar1 == a)


<bound method DataFrame.dropna of                     business_id  stars                 user_id
0        ujmEBvifdJM6h6RLv4wQIg    1.0  hG7b0MtEbXx5QzbzE6C_VA
2        WTqjgwHlXbSFevF32_DJVw    5.0  n6-Gk65cPZL6Uz8qRm3NYw
3        ikCg8xy5JIg_NGPx-MSIDA    5.0  dacAIZ6fTM6mqwW5uxkskg
6        3fw2X5bZYeW9xCz_zGhOHg    3.0  jlu4CztcSxrKx56ba1a5AQ
7        zvO-PJCpNk4fgAVUnExYAA    1.0  d6xvYpyzcfbF_AZ8vMB7QA
8        b2jN2mm9Wf3RcrZCgfo1cg    2.0  sG_h0dIzTKWa3Q6fmb4u-g
9        oxwGyA17NL6c5t1Etg5WgQ    3.0  nMeCE5-xsdleyxYuNZ_7rA
10       8mIrX_LrOnAqWsB5JrOojQ    4.0  FIk4lQQu1eTe2EpzQ4xhBA
12       FxLfqxdYPA6Z85PFKaqLrg    4.0  GYNnVehQeXjty0xH7-6Fhw
14       AakkkTuGZA2KBodKi2_u8A    1.0  TpyOT5E16YASd7EWjLQlrw
15       YvrylyuWgbP90RgMqZQVnQ    5.0  NJlxGtouq06hhC7sS2ECYw
16       NyLYY8q1-H3hfsTwuwLPCg    4.0  86J5DwcFk4f4In1Vxe2TvA
17       cHdJXLlKNWixBXpDwEGb_A    3.0  JSrP-dUmLlwZiI7Dp3PQ2A
18       6lj2BJ4tJeu7db5asGHQ4w    5.0  6Fz_nus_OG4gar721OKgZA
19       y-Iw6dZflNix

In [8]:
# inumerating business_in and user_id with bid and uid 
def build_fmap_invmap(ser):
    uni_ele = ser.unique()
    fmap = {v:i for i, v in enumerate(uni_ele)}
    invmap = {i:v for i, v in enumerate(uni_ele)}
    return fmap, invmap

In [9]:
# setting debuging enviroment on (dbg =1) to turn it off (dbg = 0)
dbg = 1
if dbg:
    df_review = df_review.head(100000)

In [10]:
bus_fmap, bus_invmap = build_fmap_invmap(df_review['business_id'])
u_fmap, u_invmap = build_fmap_invmap(df_review['user_id'])

In [11]:
df_review['bid'] = df_review['business_id'].map(bus_fmap)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [12]:
df_review['uid'] = df_review['user_id'].map(u_fmap)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [13]:
df_review.head()

Unnamed: 0,business_id,stars,user_id,bid,uid
0,ujmEBvifdJM6h6RLv4wQIg,1.0,hG7b0MtEbXx5QzbzE6C_VA,0,0
2,WTqjgwHlXbSFevF32_DJVw,5.0,n6-Gk65cPZL6Uz8qRm3NYw,1,1
3,ikCg8xy5JIg_NGPx-MSIDA,5.0,dacAIZ6fTM6mqwW5uxkskg,2,2
6,3fw2X5bZYeW9xCz_zGhOHg,3.0,jlu4CztcSxrKx56ba1a5AQ,3,3
7,zvO-PJCpNk4fgAVUnExYAA,1.0,d6xvYpyzcfbF_AZ8vMB7QA,4,4


In [14]:
n_users, n_bus = df_review['uid'].nunique(), df_review['bid'].nunique()

In [15]:
n_dim = 5

In [16]:
# Initializing tensor flow at a randon number | n_users * n_dim (layers) initializing at some random number between
# -1 to 1 both for business and users. For internal layer. 
# PS > Create a function with code below: 

user_vector_raw = tf.Variable(tf.random_uniform([n_users, n_dim], minval = -1., maxval = 1.))
bus_vector_raw = tf.Variable(tf.random_uniform([n_bus, n_dim], minval = -1., maxval = 1.))

# running the tanh function to find 
user_vector = tf.tanh(user_vector_raw)
bus_vector = tf.tanh(bus_vector_raw)

# Stipulating the imput layer. 
users = tf.placeholder(tf.int32, shape=(None))
businesses = tf.placeholder(tf.int32, shape=(None))
ratings = tf.placeholder(tf.float32, shape=(None))

UserSampled = tf.nn.embedding_lookup(user_vector, users)
BusinessSampled = tf.nn.embedding_lookup(bus_vector, businesses)
UserSampled.set_shape([None, n_dim])
BusinessSampled.set_shape([None, n_dim])

# input tensors for products, users, ratings

Instructions for updating:
Colocations handled automatically by placer.


In [17]:
# Defining the output
# transfer into a fucntion
estimatedaffinitiesraw = tf.reduce_sum(UserSampled * BusinessSampled, 1)
estimatedaffinities = tf.sigmoid(estimatedaffinitiesraw)*5

In [18]:
# estimatedaffinities - ratings ask Lee to clarify ratings, where that ratings comes from? ask to explain the loss function 
# transfer into a function 
loss = tf.reduce_sum(tf.square(estimatedaffinities - ratings))
opt = tf.train.RMSPropOptimizer(learning_rate=.1).minimize(loss)

Instructions for updating:
Use tf.cast instead.


In [19]:
# Setting the session and intialize it 

sess = tf.Session()

In [20]:
# picking up 64 randon rows in order to run under memory capacity
rows = np.random.choice(df_review.shape[0], 64)

In [21]:
sess.run(tf.global_variables_initializer())

In [22]:
# Creating a loop to train under 64 random rows 
for i in range(10000):
    rows = np.random.choice(df_review.shape[0], 64)
    dfrows = df_review.iloc[rows]
    fd = {users:dfrows['uid'].values,
         businesses:dfrows['bid'].values,
         ratings:dfrows['stars'].values}
    _, l2loss = sess.run([opt, loss], fd)
    if i % 1000 == 0:
        print(l2loss)

214.91202
194.90646
152.51094
74.84643
92.67352
65.45526
48.34632
41.532845
75.51494
21.321947


In [23]:
user_values, bus_values = sess.run([user_vector, bus_vector])

In [24]:
bus_vec_df = pd.DataFrame(data = bus_values, index = 
                          [bus_invmap[i] for i in range(n_bus)])



In [25]:
bus_vec_df

Unnamed: 0,0,1,2,3,4
ujmEBvifdJM6h6RLv4wQIg,0.999997,0.999280,0.408935,0.333874,0.999996
WTqjgwHlXbSFevF32_DJVw,-0.658855,0.950752,0.076261,0.714559,-0.999990
ikCg8xy5JIg_NGPx-MSIDA,-0.997689,0.989382,0.926184,0.966205,-0.997900
3fw2X5bZYeW9xCz_zGhOHg,0.763555,0.663143,0.520106,-0.474499,0.763286
zvO-PJCpNk4fgAVUnExYAA,-0.441752,0.859989,0.753026,0.276535,-0.862949
b2jN2mm9Wf3RcrZCgfo1cg,-0.999741,0.998258,-0.983448,0.999998,-0.811568
oxwGyA17NL6c5t1Etg5WgQ,0.999999,-0.888191,-0.965882,-0.252030,0.372227
8mIrX_LrOnAqWsB5JrOojQ,-1.000000,-1.000000,0.999997,-0.388426,-1.000000
FxLfqxdYPA6Z85PFKaqLrg,-0.949160,0.226390,0.789684,-0.879125,0.309351
AakkkTuGZA2KBodKi2_u8A,0.449540,-0.700350,0.877886,0.075232,-0.187281


In [26]:
# Joining df_business + bus_vec_df
df_allBusiness = df_business.join(bus_vec_df, on='business_id', how='right')

In [27]:
df_allBusiness = df_allBusiness.dropna()

In [28]:
# Pulling user ID 4 and comparing to inverse map on uid (not sure why, maybe to check accuracy?)
uid = 4
u_invmap[uid]


'd6xvYpyzcfbF_AZ8vMB7QA'

In [29]:
bname = 'cHdJXLlKNWixBXpDwEGb_A'
bid = bus_fmap[bname]

In [30]:
df_allBusiness

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state,0,1,2,3,4
1,30 Eglinton Avenue W,"{'RestaurantsReservations': 'True', 'GoodForMe...",QXAEGFB4oINsVuTFxEYKFQ,"Specialty Food, Restaurants, Dim Sum, Imported...",Mississauga,"{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",1.0,43.605499,-79.652289,Emerald Chinese Restaurant,L5R 3E7,128.0,2.5,ON,0.235235,0.755847,0.506182,0.657664,-0.209576
2,"10110 Johnston Rd, Ste 15","{'GoodForKids': 'True', 'NoiseLevel': 'u'avera...",gnKjwL_1w79qoiV3IC_xQQ,"Sushi Bars, Restaurants, Japanese",Charlotte,"{'Monday': '17:30-21:30', 'Wednesday': '17:30-...",1.0,35.092564,-80.859132,Musashi Japanese Restaurant,28210,170.0,4.0,NC,-0.997862,-0.999683,-0.999945,0.998321,-0.999907
4,"4209 Stuart Andrew Blvd, Ste F","{'BusinessAcceptsBitcoin': 'False', 'ByAppoint...",HhyxOkGAM07SRYtlQ4wMFQ,"Plumbing, Shopping, Local Services, Home Servi...",Charlotte,"{'Monday': '7:0-23:0', 'Tuesday': '7:0-23:0', ...",1.0,35.190012,-80.887223,Queen City Plumbing,28217,4.0,4.0,NC,-0.391483,-0.154326,0.443073,0.361585,0.758199
7,"4545 E Tropicana Rd Ste 8, Tropicana","{'RestaurantsPriceRange2': '3', 'GoodForKids':...",gbQN7vr_caG_A1ugSmGhWg,"Hair Salons, Hair Stylists, Barbers, Men's Hai...",Las Vegas,"{'Monday': '10:0-19:0', 'Tuesday': '10:0-19:0'...",1.0,36.099872,-115.074574,Supercuts,89121,3.0,3.5,NV,-0.989363,-0.971699,-0.991149,0.993350,0.823146
9,21689 Lorain Rd,"{'ByAppointmentOnly': 'False', 'BusinessAccept...",4GBVPIYRvzGh4K4TkRQ_rw,"Beauty & Spas, Nail Salons, Day Spas, Massage",Fairview Park,"{'Tuesday': '9:0-21:0', 'Wednesday': '9:0-21:0...",1.0,41.440825,-81.854097,Options Salon & Spa,44126,8.0,4.5,OH,-0.866507,-0.455834,-0.312869,-0.747360,-0.139751
11,2450 E Indian School Rd,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...",1Dfx3zM-rW4n-31KeC8sJg,"Restaurants, Breakfast & Brunch, Mexican, Taco...",Phoenix,"{'Monday': '7:0-0:0', 'Tuesday': '7:0-0:0', 'W...",1.0,33.495194,-112.028588,Taco Bell,85016,18.0,3.0,AZ,0.902087,0.762534,0.426091,0.489029,0.468676
12,"119 Landings Dr, Ste 101","{'BusinessParking': '{'garage': False, 'street...",5t3KVdMnFgAYmSl1wYLhmA,"Bars, Nightlife, Pubs, Barbers, Beauty & Spas,...",Mooresville,"{'Monday': '10:0-1:0', 'Tuesday': '10:0-1:0', ...",1.0,35.527410,-80.868003,The Kilted Buffalo Langtree,28117,9.0,3.5,NC,0.804303,-0.255422,-0.015206,0.999301,-0.233003
13,5981 Andrews Rd,"{'RestaurantsPriceRange2': '2', 'BusinessAccep...",fweCYi8FmbJXHCqLnwuk8w,"Italian, Restaurants, Pizza, Chicken Wings",Mentor-on-the-Lake,"{'Monday': '10:0-0:0', 'Tuesday': '10:0-0:0', ...",1.0,41.708520,-81.359556,Marco's Pizza,44060,16.0,4.0,OH,0.480978,-0.995069,-0.742991,-0.931080,0.987280
14,4145 Erie St,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...",-K4gAv8_vjx8-2BxkVeRkA,"Bakeries, Food",Willoughby,"{'Tuesday': '11:0-17:0', 'Wednesday': '11:0-17...",1.0,41.639860,-81.406396,Baby Cakes,44094,7.0,3.0,OH,-0.538094,-0.350610,0.165029,-0.852983,-0.247670
18,,{'BusinessAcceptsCreditCards': 'True'},nh_kQ16QAoXWwqZ05MPfBQ,"Event Planning & Services, Photographers, Prof...",Las Vegas,"{'Monday': '0:0-0:0', 'Tuesday': '0:0-0:0', 'W...",1.0,36.116549,-115.088115,Myron Hensel Photography,89121,21.0,5.0,NV,-0.999776,-0.999971,-0.999434,0.999153,0.999953


In [31]:
df_allBusiness.count()

address         9895
attributes      9895
business_id     9895
categories      9895
city            9895
hours           9895
is_open         9895
latitude        9895
longitude       9895
name            9895
postal_code     9895
review_count    9895
stars           9895
state           9895
0               9895
1               9895
2               9895
3               9895
4               9895
dtype: int64

In [32]:
df_allBusiness.loc[df_allBusiness['categories'].str.contains('Restaurant') & 
           df_allBusiness['categories'].str.contains('Japanese')]

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state,0,1,2,3,4
2,"10110 Johnston Rd, Ste 15","{'GoodForKids': 'True', 'NoiseLevel': 'u'avera...",gnKjwL_1w79qoiV3IC_xQQ,"Sushi Bars, Restaurants, Japanese",Charlotte,"{'Monday': '17:30-21:30', 'Wednesday': '17:30-...",1.0,35.092564,-80.859132,Musashi Japanese Restaurant,28210,170.0,4.0,NC,-0.997862,-0.999683,-0.999945,0.998321,-0.999907
73,436 Market St,"{'OutdoorSeating': 'False', 'HasTV': 'True', '...",v-scZMU6jhnmV955RSzGJw,"Japanese, Sushi Bars, Restaurants",Pittsburgh,"{'Monday': '11:0-20:0', 'Tuesday': '11:0-20:0'...",1.0,40.441062,-80.002126,No. 1 Sushi Sushi,15222,106.0,4.5,PA,0.999911,-0.999974,0.999959,0.999993,-0.997477
80,529-17th Avenue SW,"{'WiFi': ''free'', 'GoodForDancing': 'False', ...",PkDghu4aan2_wxrhXjTEgg,"Nightlife, Italian, Restaurants, Japanese, Lou...",Calgary,"{'Tuesday': '14:0-23:0', 'Wednesday': '14:0-23...",0.0,51.037778,-114.073351,MiraKuru,T2S 0A9,16.0,3.5,AB,-0.913820,-0.309494,0.178883,0.533210,0.982643
185,2945 Lake Shore Boulevard,"{'RestaurantsDelivery': 'True', 'HasTV': 'True...",SJBzyJDCR_f6dx5tpYAABA,"Sushi Bars, Japanese, Restaurants",Toronto,"{'Monday': '16:0-22:0', 'Tuesday': '11:30-22:0...",1.0,43.600523,-79.505516,Kibo Sushi House,M8V 1J5,15.0,4.0,ON,-0.999100,-0.998028,0.999414,0.999140,-0.999368
343,10624 S Eastern Ave,"{'RestaurantsTakeOut': 'True', 'NoiseLevel': '...",jX9DocoiY4Bo9EUkaTSqvg,"Restaurants, Hawaiian, Chinese, Japanese, Poke...",Henderson,"{'Monday': '0:0-0:0', 'Tuesday': '11:0-19:0', ...",1.0,35.998220,-115.102246,China AAA,89052,149.0,4.5,NV,0.999977,-0.998614,0.999995,-0.999994,-0.999966
346,1909 E Ray Rd,"{'RestaurantsDelivery': 'False', 'RestaurantsT...",ecJri9ozyke4dOCWulZiRQ,"Asian Fusion, Japanese, Restaurants, Ramen, Ta...",Chandler,"{'Monday': '17:0-21:30', 'Tuesday': '17:0-21:3...",1.0,33.320006,-111.809675,Nishikawa Ramen,85225,427.0,4.0,AZ,-0.969119,-0.999998,1.000000,-0.252163,0.999999
444,3339 Boulevard des Sources,"{'RestaurantsDelivery': 'False', 'RestaurantsA...",4B8VnRAstRRshxiUzm9yPw,"Restaurants, Sushi Bars, Japanese",Dollard-des-Ormeaux,"{'Monday': '17:0-22:0', 'Tuesday': '17:0-23:0'...",1.0,45.484315,-73.795652,Maiko Sushi - DDO,H9B 1Z8,51.0,4.0,QC,0.999945,0.999610,0.999912,-0.999971,0.737059
507,"6989 N Hayden Rd, Ste A12","{'RestaurantsAttire': ''casual'', 'OutdoorSeat...",QS3QxI7u5PRdtbGgI0-UsA,"Asian Fusion, Restaurants, Seafood, Sushi Bars...",Scottsdale,"{'Monday': '0:0-0:0', 'Tuesday': '17:30-21:0',...",1.0,33.538029,-111.905676,Sakana Sushi & Grill,85250,347.0,4.0,AZ,-0.999572,-0.643248,0.997075,1.000000,0.999999
524,338-8338 18 Street SE,"{'GoodForMeal': '{'dessert': False, 'latenight...",bPBZEDuHbE-I7bxUWIYMhQ,"Japanese, Restaurants, Sushi Bars",Calgary,"{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...",1.0,50.977178,-114.013257,Oishii Sushi,T2C 4E4,78.0,3.5,AB,0.585926,-0.060686,0.826284,-0.760823,0.999998
577,1418 Rue Cartier,"{'RestaurantsPriceRange2': '3', 'OutdoorSeatin...",9ELnhtgMF8_h8Zky4A7BSA,"Restaurants, Japanese",Montréal,"{'Monday': '17:0-22:0', 'Tuesday': '17:0-22:0'...",0.0,45.523226,-73.551979,Saiken Sushi,H2K 4C8,4.0,3.0,QC,0.968894,-0.133106,-0.352658,0.479079,0.799655


# Testing | Passing Train 

In [33]:
bid

12

In [34]:
chipotle = bus_values[bid]

In [35]:
chipotle

array([-0.7978502 ,  0.99910045,  0.99999845,  1.        , -0.9999982 ],
      dtype=float32)

In [36]:
japaneselover = user_values[uid]

In [37]:
bus_values

array([[ 0.9999975 ,  0.99928015,  0.40893543,  0.33387396,  0.9999958 ],
       [-0.6588551 ,  0.9507522 ,  0.07626127,  0.71455854, -0.9999901 ],
       [-0.99768925,  0.989382  ,  0.9261841 ,  0.9662053 , -0.99789953],
       ...,
       [-0.05813452, -0.44171482,  0.8609466 ,  0.37003392, -0.61952525],
       [-0.79474425,  0.94767684, -0.97984105,  0.78057665, -0.9207304 ],
       [ 0.9171797 , -0.6928156 , -0.32345986, -0.54975665,  0.7389049 ]],
      dtype=float32)

In [38]:
np.square(bus_values - japaneselover[None,:]).sum(1).argsort()

array([ 3450, 12690, 10475, ...,   739,  4252,  1108])

In [39]:
np.square(bus_values - chipotle[None,:]).sum(1).argsort()

array([   12, 12174, 10946, ...,   663,   610,   842])

In [40]:
def closest_businesses_to(business = None, user = None, df = None):
    if business is not None:
        target = bus_values[bus_fmap[business]]
    if user is not None:
        target = user_values[u_fmap[user]]
    if df is None:
        df = bus_values
    best_restaurants = np.square(df - target[None,:]).sum(1).argsort()
    return best_restaurants

In [41]:
midtown_japanese_restaurants = bus_values[:30,:]

In [42]:
closest_businesses_to(business = 'cHdJXLlKNWixBXpDwEGb_A')


array([   12, 12174, 10946, ...,   663,   610,   842])

In [43]:
closest_businesses_to(user = 'ri7itn7-CdpsaPxTToK5cQ')

array([11290,  6789, 10964, ...,  2798,  3991,  4311])

In [44]:
closest_businesses_to(user = 'ri7itn7-CdpsaPxTToK5cQ', df = midtown_japanese_restaurants)

array([ 8, 17, 26, 16,  4,  1, 27, 21,  5,  2,  3, 12, 19, 18,  0, 22,  9,
       20, 15, 11, 29,  7, 28, 13, 10, 24,  6, 25, 23, 14])

# Dropping NaN 

In [45]:
df_userSample = pd.read_csv('data/user_sample.csv')
df_userSample = df_userSample.dropna()

In [46]:
df_userSample.count()

Unnamed: 0            7229
average_stars         7229
compliment_cool       7229
compliment_cute       7229
compliment_funny      7229
compliment_hot        7229
compliment_list       7229
compliment_more       7229
compliment_note       7229
compliment_photos     7229
compliment_plain      7229
compliment_profile    7229
compliment_writer     7229
cool                  7229
elite                 7229
fans                  7229
friends               7229
funny                 7229
name                  7229
review_count          7229
useful                7229
user_id               7229
yelping_since         7229
dtype: int64

In [47]:
df_reviewSample = pd.read_csv('data/review_sample.csv')
df_reviewSample = df_reviewSample.dropna()

In [48]:
df_reviewSample.count()

Unnamed: 0     668330
business_id    668330
cool           668330
date           668330
funny          668330
review_id      668330
stars          668330
text           668330
useful         668330
user_id        668330
dtype: int64

In [49]:
def find_ftres_with_nan(df):
    all_nan = df.columns[df.isnull().all()].tolist()
    some_nan = df.columns[df.isnull().any()].tolist()
    print("All NaN Features: ", len(all_nan), all_nan, "Some NaN Features: ", len(some_nan), some_nan)
    return all_nan, some_nan

In [50]:
business = pd.read_csv('data/business.csv',encoding = "ISO-8859-1",index_col=0)
all_nan, some_nan = find_ftres_with_nan(business)

  interactivity=interactivity, compiler=compiler, result=result)


All NaN Features:  0 [] Some NaN Features:  33 ['address', 'attributes', 'average_stars', 'business_id', 'categories', 'city', 'compliment_cool', 'compliment_cute', 'compliment_funny', 'compliment_hot', 'compliment_list', 'compliment_more', 'compliment_note', 'compliment_photos', 'compliment_plain', 'compliment_profile', 'compliment_writer', 'cool', 'elite', 'fans', 'friends', 'funny', 'hours', 'is_open', 'latitude', 'longitude', 'name', 'postal_code', 'stars', 'state', 'useful', 'user_id', 'yelping_since']


### Number of businesses that have both "food" and "restaurant" in their category:

In [51]:

# create a mask for restaurants

mask_restaurants = business['categories'].str.contains('Restaurants')

# create a mask for food
mask_food = business['categories'].str.contains('Food')

# apply both masks
restaurants_and_food = business[mask_restaurants & mask_food]

# number of businesses that have food and restaurant in their category
restaurants_and_food['categories'].count()

21311

### Even after taking buisnesses that have both food and restaurant in their categories, there are still irrelevant business categories in the data.

In [52]:
# an example row
restaurants_and_food.head(1)['categories'].values

array(['Specialty Food, Restaurants, Dim Sum, Imported Food, Food, Chinese, Ethnic Food, Seafood'],
      dtype=object)

### Thus, we manually identified additional categories that needed to be excluded specifically.

In [53]:
categoryDF = restaurants_and_food['categories'].apply(lambda x: x[1:-1].split(',')).apply(pd.Series)
uniqueCategories = pd.DataFrame(categoryDF.stack().str.strip().unique())

In [54]:
categoriesToRemove = ['Grocery','Drugstores','Convenience Stores','Beauty & Spas','Photography Stores & Services',
                      'Cosmetics & Beauty Supply','Discount Store','Fashion','Department Stores','Gas Stations',
                      'Automotive','Music & Video','Event Planning & Services','Mobile Phones','Health & Medical',
                      'Weight Loss Centers','Home & Garden','Kitchen & Bath','Jewelry',"Children's Clothing",
                      'Accessories','Home Decor','Bus Tours','Auto Glass Services','Auto Detailing',
                      'Oil Change Stations', 'Auto Repair','Body Shops','Car Window Tinting','Car Wash',
                      'Gluten-Free','Fitness & Instruction','Nurseries & Gardening','Wedding Planning',
                      'Embroidery & Crochet','Dance Schools','Performing Arts',
                      'Wholesale Stores','Tobacco Shops','Nutritionists','Hobby Shops','Pet Services',
                      'Electronics','Plumbing','Gyms','Yoga','Walking Tours','Toy Stores','Pet Stores',
                      'Pet Groomers','Vape Shops','Head Shops',
                      'Souvenir Shops','Pharmacy','Appliances & Repair','Wholesalers','Party Equipment Rentals',
                      'Tattoo','Funeral Services & Cemeteries','Sporting Goods','Dog Walkers',
                      'Pet Boarding/Pet Sitting','Scavenger Hunts','Contractors','Trainers', 
                      'Customized Merchandise', 'Dry Cleaning & Laundry', 'Art Galleries'
                      'Tax Law', 'Bankruptcy Law', 'Tax Services', 'Estate Planning Law', 
                      'Business Consulting', 'Lawyers', 'Pet Adoption', 'Escape Games', 
                      'Animal Shelters', 'Commercial Real Estate', 'Real Estate Agents', 
                      'Real Estate Services', 'Home Inspectors']

In [55]:

restaurants_df = restaurants_and_food[~restaurants_and_food['categories'].str.contains('|'.join(categoriesToRemove))]

In [56]:
restaurants_df.to_csv('data/restaurants.csv')
restaurants_df = pd.read_csv('data/restaurants.csv', encoding='ISO-8859-1', index_col=0)
restaurants_df = restaurants_df.dropna(axis=1)

In [57]:
restaurants_df.head()

Unnamed: 0,business_id,categories,city,is_open,latitude,longitude,name,review_count,stars,state
1637139,QXAEGFB4oINsVuTFxEYKFQ,"Specialty Food, Restaurants, Dim Sum, Imported...",Mississauga,1.0,43.605499,-79.652289,Emerald Chinese Restaurant,128,2.5,ON
1637149,1Dfx3zM-rW4n-31KeC8sJg,"Restaurants, Breakfast & Brunch, Mexican, Taco...",Phoenix,1.0,33.495194,-112.028588,Taco Bell,18,3.0,AZ
1637161,1RHY4K3BD22FK7Cfftn8Mg,"Sandwiches, Salad, Restaurants, Burgers, Comfo...",Pittsburgh,1.0,40.496177,-80.246011,Marathon Diner,35,4.0,PA
1637163,tstimHoMcYbkSC4eBA1wEg,"Mexican, Restaurants, Patisserie/Cake Shop, Fo...",Las Vegas,1.0,36.195615,-115.040529,Maria's Mexican Restaurant & Bakery,184,4.5,NV
1637167,NDuUMJfrWk52RA-H-OtrpA,"Juice Bars & Smoothies, Food, Restaurants, Fas...",Toronto,1.0,43.642889,-79.425429,Bolt Fresh Bar,57,3.0,ON


# Building 

In [58]:
#task 1
def get_restaurants(keyword):
    return restaurants_df.loc[restaurants_df['categories'].str.contains(keyword)]

In [59]:
get_restaurants('Japanese')

Unnamed: 0,business_id,categories,city,is_open,latitude,longitude,name,review_count,stars,state
1637481,jX9DocoiY4Bo9EUkaTSqvg,"Restaurants, Hawaiian, Chinese, Japanese, Poke...",Henderson,1.0,35.998220,-115.102246,China AAA,149,4.5,NV
1637904,Z3UBeP5EoKIXIDmEdA3JmQ,"Poke, Asian Fusion, Food, Restaurants, Hawaiia...",Toronto,0.0,43.664603,-79.412934,Big Tuna PokÃÂ© Bar,65,3.0,ON
1638149,OiZsIGscvwlL_0yhWmjJtw,"Food, Malaysian, Restaurants, Japanese",Solon,0.0,41.386068,-81.444693,Pacific East,11,3.5,OH
1638819,bZs7FJ7fHIJxVecfp1qvug,"Ramen, Restaurants, Japanese, Tapas/Small Plat...",Las Vegas,1.0,36.125381,-115.195980,Takopa,51,4.0,NV
1638985,YL1MmnQXbdqmO5zexUNvlg,"Food, Japanese, Cafes, Tea Rooms, Restaurants,...",Toronto,0.0,43.656809,-79.399300,Matcha Tea & Dessert,27,2.5,ON
1639220,a7mTbEi2N8Zd-r-8jlReww,"Japanese, Korean, Restaurants, Barbeque, Fast ...",Toronto,0.0,43.775210,-79.257088,Koryo Korean Barbeque,7,3.5,ON
1640369,XuZqb9sVliEQuz-RuESOlQ,"Food, Specialty Food, Restaurants, Japanese, S...",Mississauga,1.0,43.574781,-79.608195,Kanda Sushi Japanese Buffet,63,3.0,ON
1640669,jJ2ca6wqp_YwTsdAWzplkQ,"Poke, Sushi Bars, Restaurants, Japanese, Food,...",Las Vegas,0.0,36.045795,-115.244117,Ted's Sushi Burrito,98,4.5,NV
1642797,HKt_BDQJmQTc6WhU-3hyaA,"Fast Food, Japanese, Restaurants",Glendale,1.0,33.642100,-112.225533,Little Tokyo,16,4.0,AZ
1643060,gRMYm-CeEdjXtaQcVo8vsw,"Beer, Wine & Spirits, Food, Restaurants, Sushi...",Gilbert,1.0,33.304874,-111.756109,Ryu Sushi & Teppanyaki,234,4.0,AZ


In [60]:
#task 2
def get_reviews_for(rest_id):
    return df_reviewSample.loc[df_reviewSample['business_id']==rest_id]

In [61]:
get_reviews_for('19fdSca3MUoaGFNX2BrjTQ')


Unnamed: 0.1,Unnamed: 0,business_id,cool,date,funny,review_id,stars,text,useful,user_id
68133,679434,19fdSca3MUoaGFNX2BrjTQ,0,2018-06-09 14:13:04,0,bDhAERq8suVl40xdk00zKQ,3.0,I was super excited to go here but was slightl...,0,6aklT92xV0_duyUI9l84Pw
80631,804232,19fdSca3MUoaGFNX2BrjTQ,0,2018-05-10 18:09:25,0,TVzDDSiC6qQ2A2mjdwwV8w,5.0,This is exactly what this area needs! You want...,0,96s7b2PBjmkzEeQTzmKp7w
81639,814150,19fdSca3MUoaGFNX2BrjTQ,1,2018-01-13 15:48:11,0,MWyp6vwBVbReDbJAYQUCpw,5.0,I happened upon this place randomly and just H...,1,uBHRgjD2xy77JCVm7CwmOA
100279,1000397,19fdSca3MUoaGFNX2BrjTQ,0,2018-08-30 01:30:04,0,FgHiEau2OaB2sDkw4p15Dw,5.0,"Amazing! Quality ingredients, all sauces are h...",1,wlTd-6M1pkO7zs5CgopQOQ
100896,1006603,19fdSca3MUoaGFNX2BrjTQ,0,2018-07-12 19:27:38,0,3HhyFikgJUW-SXWnp-dUtQ,5.0,Incredible. I stopped in today and I'm blown a...,0,oOB27OMd4k5oPTayEPClag
120537,1201834,19fdSca3MUoaGFNX2BrjTQ,0,2018-01-30 19:27:07,0,tmT-drziC2y64jfXH3HTow,5.0,I just stopped in today as I was on my way bac...,0,YYKuS5-8NGiAPNMwX9nO7g
124197,1237683,19fdSca3MUoaGFNX2BrjTQ,0,2018-04-13 02:29:18,0,rWufRpJ9HWivqaa_5EW6uA,5.0,Stopped in hereafter a workout session in the ...,1,7GIcGERUfVvOx_TNYomGcA
131198,1308059,19fdSca3MUoaGFNX2BrjTQ,1,2018-01-06 21:55:16,0,NOe4hvLsJqHHkwE8brfrPQ,5.0,Not one negative thing to say! Service was qui...,3,pz69IIgBrek4B__VV1-HIA


In [62]:
import pdb

In [63]:
#task 3
def get_recommendations_for(user_id = None, business_id = None):
    if user_id is not None:
        bids = closest_businesses_to(user = user_id)
    else:
        bids = closest_businesses_to(business = business_id)
    bnames = [bus_invmap[b] for b in bids]
    return restaurants_df.set_index('business_id').loc[
        [b for b in bnames if b in restaurants_df['business_id'].values]]#.dropna()

In [64]:
get_recommendations_for(business_id = 'a7mTbEi2N8Zd-r-8jlReww')

Unnamed: 0_level_0,categories,city,is_open,latitude,longitude,name,review_count,stars,state
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
a7mTbEi2N8Zd-r-8jlReww,"Japanese, Korean, Restaurants, Barbeque, Fast ...",Toronto,0.0,43.775210,-79.257088,Koryo Korean Barbeque,7,3.5,ON
50Anorn0DJXFhBr9a9_gHQ,"Burgers, Food Stands, Restaurants, Sandwiches,...",Madison,0.0,43.076578,-89.399486,Lakefront Cafe on Langdon,9,3.0,WI
JhqsE8IGN-UXwNli474w7g,"Chicken Shop, Fast Food, Restaurants, Chicken ...",Henderson,1.0,36.012400,-114.964000,KFC,35,2.0,NV
1_qcKXfm8H8zecygxHjeWw,"Restaurants, Chicken Shop, Chicken Wings, Fast...",Toronto,1.0,43.669056,-79.386501,KFC,20,2.5,ON
FbUzIBAj2ziuqL2grUgw6Q,"Pizza, Restaurants, Food",Lakewood,0.0,41.485168,-81.787163,Grab-N-Go Pizza Express,19,4.0,OH
RUbB6951mueG7dm6lIpkcg,"Burgers, Fast Food, Restaurants",Mississauga,1.0,43.636922,-79.718319,McDonald's,6,1.0,ON
Js5dRDxKpB8TqcO8tNPEvA,"Restaurants, Middle Eastern, Food",Toronto,1.0,43.661072,-79.382759,Lebanon Express,59,3.5,ON
wFU96ah70juScw822O4Zpw,"Sandwiches, Fast Food, Restaurants",Las Vegas,1.0,36.169666,-115.144171,Subway,10,2.5,NV
o10-_WmF3MF5uXwMWGg5VQ,"Burgers, Fast Food, Restaurants",Cleveland,1.0,41.521022,-81.534733,Wendy's,15,1.0,OH
h0fQdldYxuG1Utc_ZK1z0A,"Sandwiches, Coffee & Tea, Food, Restaurants, B...",Las Vegas,1.0,36.103287,-115.141945,Einstein Bros,12,2.5,NV


In [65]:
get_recommendations_for(user_id= '96s7b2PBjmkzEeQTzmKp7w')

Unnamed: 0_level_0,categories,city,is_open,latitude,longitude,name,review_count,stars,state
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3rY7LYNynXGRbCT3sLABHg,"Sandwiches, Fast Food, Restaurants",Chandler,1.0,33.307342,-111.873993,Subway,6,2.5,AZ
ia1Q47ANt6OZDMpQPzkqJQ,"Fast Food, Italian, Pizza, Restaurants, Chicke...",Tempe,1.0,33.364451,-111.913171,Pizza Hut,11,2.0,AZ
_kHEtMBckbAqxLUiyuQOuw,"Ethnic Food, Restaurants, Chinese, Specialty F...",Georgetown,1.0,43.650438,-79.914591,Choy's Chinese Restaurant,4,4.0,ON
Zs-Gk02SPk_CU7NgMAVTJw,"Coffee & Tea, Cafes, Restaurants, Food, Coffee...",Pittsburgh,1.0,40.488455,-79.882650,Coffee Tree Roasters,23,3.5,PA
6xowh_GdOsRrz9QWN6nMxA,"Food, Patisserie/Cake Shop, Desserts, Restaura...",Toronto,1.0,43.654923,-79.386795,Uncle Tetsu's Japanese Angel Cafe,168,3.5,ON
JO5_Frcbp9J732VNn-I3Ew,"American (New), Burgers, Diners, Restaurants, ...",Brunswick,1.0,41.237801,-81.806002,Steak Ã¢ÂÂn Shake,44,2.0,OH
5GuB6dY1ZWlb0D5zJJ08xg,"Sandwiches, Restaurants, Food",Medina,1.0,41.157469,-81.861339,Dave's Cosmic Subs,8,3.0,OH
xXtWrRKn8A708BfaNs1nZw,"Sandwiches, Juice Bars & Smoothies, Coffee & T...",Las Vegas,0.0,36.145220,-115.176651,"Coffee, Tea, or Me",19,4.0,NV
r_w-dmplsEDZpomAGUERzw,"American (New), Restaurants, Local Flavor, Bar...",Deerfield,1.0,43.030995,-89.150017,Nora's,3,2.0,WI
b6vi3Ub0tbGzGtG-pVgJfQ,"Bakeries, Restaurants, Food",Pittsburgh,1.0,40.456112,-79.896542,Dana's Bakery,9,4.5,PA


### Building Geo Table 

In [66]:
restaurantsGeo_df = restaurants_df.drop(['is_open', 'review_count'], 1)

In [68]:
lat = 44
lon = -70
distance = np.sqrt((restaurantsGeo_df['latitude'] - lat)**2 + (restaurantsGeo_df['longitude'] - lon)**2)

restaurantsGeo_df.loc[distance < 5]

Unnamed: 0,business_id,categories,city,latitude,longitude,name,stars,state
1637447,LF66oShCh30k5NOV6pPJTQ,"Coffee & Tea, Cafes, Restaurants, Bakeries, Ju...",MontrÃÂ©al,45.501661,-73.556605,Flyjin CafÃÂ©,4.5,QC
1637603,G7sVtpD6aqpuUB4F3LEG_w,"Breakfast & Brunch, Food, Restaurants, Coffee ...",MontrÃÂ©al,45.552487,-73.539985,Hoche CafÃÂ©,4.0,QC
1637691,uHSmRkzznq0GpWXWuvzuhw,"Restaurants, Fast Food, Noodles",Montreal,45.504364,-73.560705,Noobox,2.0,QC
1637750,BkIZ8UDA23qxnp_kR1Sv8Q,"Burgers, American (Traditional), Cafes, Breakf...",MontrÃÂ©al,45.507291,-73.565672,Eggspectation,4.0,QC
1637865,gTIColgrMq27NsSu60iq3g,"Food Trucks, Burgers, Food, Sandwiches, Restau...",MontrÃÂ©al,45.549945,-73.574223,Frite Alors,3.5,QC
1637959,RJdSwvgAd2TRdQw4iOsrag,"Food, Breakfast & Brunch, Ice Cream & Frozen Y...",MontrÃÂ©al,45.515404,-73.571156,CafÃÂ© Mezcal,4.0,QC
1638169,FGWc5hjTaOql-Rut_bEbVw,"Burgers, Breakfast & Brunch, Restaurants, Fast...",MontrÃÂ©al,45.504753,-73.572587,A&W,3.5,QC
1638243,Tlc317wt-t9OOOqBK4qxEA,"Bakeries, Food, Restaurants, Cafes, Coffee & Tea",MontrÃÂ©al,45.490172,-73.583981,Pekarna,3.5,QC
1638406,-n8jrgEb8_JlYbk4hXWFMg,"Restaurants, Bakeries, Coffee & Tea, Sandwiche...",MontrÃÂ©al,45.536646,-73.607713,BÃÂ©mol & Levian,4.0,QC
1638480,k2DR7Fu7bZOQpLPOEWP6zw,"Fast Food, Restaurants",Dollard-Des-Ormeaux,45.485130,-73.796286,Kojax Souflaki,4.0,QC


In [69]:
#task 4
def filter_by_location(df, lat, lon, max_distance):
    distance = np.sqrt((df['latitude'] - lat)**2 + (df['longitude'] - lon)**2)
    return df.loc[distance < max_distance]


In [70]:

    
    
    
def get_recommendations_for_locally(user_id = None, business_id = None, lat = 0, lon = 0, max_distance = 100):
    if user_id is not None:
        bids = closest_businesses_to(user = user_id)
    else:
        bids = closest_businesses_to(business = business_id)
    bnames = [bus_invmap[b] for b in bids]
    closest_businesses = restaurants_df.set_index('business_id').loc[
        [b for b in bnames if b in restaurants_df['business_id'].values]]#.dropna()
    filtered_by_location = filter_by_location(closest_businesses, lat, lon, max_distance)
    return filtered_by_location

In [71]:
def filter_by_keyword(df, keyword_include = None, keyword_exclude = None):
    if keyword_include is not None:
        #throw away restaurants who don't include keyword
        df = df.loc[df['categories'].str.contains(keyword_include)]
    if keyword_exclude is not None:
        #throw away restaurants who include keyword
        df = df.loc[~df['categories'].str.contains(keyword_exclude)]
    return df

In [74]:

def get_recommendations_for_locally_by_keyword(
        user_id = None, business_id = None, lat = 0, lon = 0, max_distance = None,
       keyword_include = None, keyword_exclude = None):
    if user_id is not None:
        bids = closest_businesses_to(user = user_id)
    else:
        bids = closest_businesses_to(business = business_id)
    bnames = [bus_invmap[b] for b in bids]
    df = restaurants_df.set_index('business_id').loc[
        [b for b in bnames if b in restaurants_df['business_id'].values]]#.dropna()
    if max_distance is not None:
        df = filter_by_location(df, lat, lon, max_distance)
    if keyword_include is not None or keyword_exclude is not None:
        df = filter_by_keyword(df, keyword_include, keyword_exclude)
    return df

In [76]:
get_recommendations_for_locally_by_keyword(
    user_id= '96s7b2PBjmkzEeQTzmKp7w', lat = 44, lon = -70, max_distance = 10,
keyword_include = 'Korean',keyword_exclude = 'Sushi')

Unnamed: 0_level_0,categories,city,is_open,latitude,longitude,name,review_count,stars,state
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
_kbxNHWv2Z6o-TJZKWmzhg,"Food, Desserts, Restaurants, Coffee & Tea, Korean",North York,1.0,43.77838,-79.414777,Cafe Bene,83,3.5,ON
v-z5Tw5ds2sEvtk3Kj5x4w,"Imported Food, Food, Korean, Restaurants, Ethn...",Toronto,1.0,43.779326,-79.417569,Yupdduk,29,3.0,ON
ZCYCNWA98718SZ0A4bHD7A,"Ice Cream & Frozen Yogurt, Food, Restaurants, ...",Toronto,1.0,43.80675,-79.288858,Dear & Fro,120,3.5,ON
dM8i3hklYuGE2xiVsQZ4Yg,"Coffee & Tea, Food, Bakeries, Korean, Desserts...",Vaughan,1.0,43.797964,-79.425003,Red Bean Waffle House,71,3.5,ON
G1Als7Z5qCruetTGyIAWuA,"Fast Food, Korean, Restaurants",Vaughan,1.0,43.798918,-79.421372,Wow Korean Restaurant,7,4.0,ON
JImufwPgbxHdGzoxKE5Ijw,"Desserts, Korean, Restaurants, Food, Ice Cream...",Toronto,1.0,43.670686,-79.391561,Cafe Bora,37,3.5,ON
xF2vsjsLBVDdjysSvbU7Cw,"Sandwiches, Korean, Coffee & Tea, Food, Restau...",Newmarket,1.0,44.05195,-79.456383,Cafe Hesed,8,4.0,ON
a7mTbEi2N8Zd-r-8jlReww,"Japanese, Korean, Restaurants, Barbeque, Fast ...",Toronto,0.0,43.77521,-79.257088,Koryo Korean Barbeque,7,3.5,ON
Y1jrsVAWK1bFC_s1CJmICw,"Korean, Restaurants, Specialty Food, Desserts,...",Toronto,1.0,43.774884,-79.414001,Go Topoki,90,4.0,ON


In [78]:

def get_recommendations_for_locally(user_id = None, business_id = None, lat = 0, lon = 0, max_distance = 100):
    if user_id is not None:
        bids = closest_businesses_to(user = user_id)
    else:
        bids = closest_businesses_to(business = business_id)
    bnames = [bus_invmap[b] for b in bids]
    closest_businesses = restaurants_df.set_index('business_id').loc[
        [b for b in bnames if b in restaurants_df['business_id'].values]]#.dropna()
    filtered_by_location = filter_by_location(closest_businesses, lat, lon, max_distance)
    return filtered_by_location

In [79]:
get_recommendations_for_locally(business_id = 'a7mTbEi2N8Zd-r-8jlReww', lat = 44, lon = -70, max_distance = 10)

Unnamed: 0_level_0,categories,city,is_open,latitude,longitude,name,review_count,stars,state
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
a7mTbEi2N8Zd-r-8jlReww,"Japanese, Korean, Restaurants, Barbeque, Fast ...",Toronto,0.0,43.775210,-79.257088,Koryo Korean Barbeque,7,3.5,ON
1_qcKXfm8H8zecygxHjeWw,"Restaurants, Chicken Shop, Chicken Wings, Fast...",Toronto,1.0,43.669056,-79.386501,KFC,20,2.5,ON
RUbB6951mueG7dm6lIpkcg,"Burgers, Fast Food, Restaurants",Mississauga,1.0,43.636922,-79.718319,McDonald's,6,1.0,ON
Js5dRDxKpB8TqcO8tNPEvA,"Restaurants, Middle Eastern, Food",Toronto,1.0,43.661072,-79.382759,Lebanon Express,59,3.5,ON
Ri_K4vaiRNQjlyutXgadog,"Restaurants, Chinese, Thai, Fast Food",Toronto,1.0,43.768698,-79.413163,California Thai,7,2.0,ON
YQHTkxWKgrrFLr8dPaXLVw,"Ethnic Food, Mexican, Tex-Mex, Specialty Food,...",Brampton,1.0,43.737682,-79.700232,Quesada Burritos & Tacos,17,4.0,ON
rmASvV7pnjBuSMmLPtp6SA,"Cheese Shops, Delis, Sandwiches, Restaurants, ...",North York,1.0,43.764539,-79.395919,Hungarian Honey Bear Delicatessen,6,2.5,ON
uWbmDMJcuxVgryNihVx3rw,"Cafes, Food, Coffee & Tea, Restaurants",Toronto,1.0,43.661713,-79.381104,Starbucks,10,3.5,ON
Bal9D1fzSgPvQPMzrvJF8g,"Specialty Food, Restaurants, Food, Imported Fo...",Toronto,1.0,43.645908,-79.387822,California Thai,6,3.5,ON
d9uahbi1_mGyMbiPOPGMmg,"Fast Food, Restaurants, Chicken Wings",Richmond Hill,1.0,43.893534,-79.439595,Popeyes Louisiana Kitchen,16,2.5,ON


In [80]:
get_recommendations_for_locally(user_id= '96s7b2PBjmkzEeQTzmKp7w')

Unnamed: 0_level_0,categories,city,is_open,latitude,longitude,name,review_count,stars,state
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
_kHEtMBckbAqxLUiyuQOuw,"Ethnic Food, Restaurants, Chinese, Specialty F...",Georgetown,1.0,43.650438,-79.914591,Choy's Chinese Restaurant,4,4.0,ON
Zs-Gk02SPk_CU7NgMAVTJw,"Coffee & Tea, Cafes, Restaurants, Food, Coffee...",Pittsburgh,1.0,40.488455,-79.882650,Coffee Tree Roasters,23,3.5,PA
6xowh_GdOsRrz9QWN6nMxA,"Food, Patisserie/Cake Shop, Desserts, Restaura...",Toronto,1.0,43.654923,-79.386795,Uncle Tetsu's Japanese Angel Cafe,168,3.5,ON
JO5_Frcbp9J732VNn-I3Ew,"American (New), Burgers, Diners, Restaurants, ...",Brunswick,1.0,41.237801,-81.806002,Steak Ã¢ÂÂn Shake,44,2.0,OH
5GuB6dY1ZWlb0D5zJJ08xg,"Sandwiches, Restaurants, Food",Medina,1.0,41.157469,-81.861339,Dave's Cosmic Subs,8,3.0,OH
r_w-dmplsEDZpomAGUERzw,"American (New), Restaurants, Local Flavor, Bar...",Deerfield,1.0,43.030995,-89.150017,Nora's,3,2.0,WI
b6vi3Ub0tbGzGtG-pVgJfQ,"Bakeries, Restaurants, Food",Pittsburgh,1.0,40.456112,-79.896542,Dana's Bakery,9,4.5,PA
t1bYPK5ccZgYzqsrGqe3Vw,"Food, Specialty Food, Chinese, Restaurants, As...",Kirkland,1.0,45.457942,-73.862744,Le Palais ImpÃÂ©rial,6,2.5,QC
hQYhybAvH3hbheZWvW41_A,"Delis, Restaurants, Bakeries, Food, Bagels, Br...",North York,1.0,43.755495,-79.440730,Bagel Plus,19,3.5,ON
GKEl7LNGUqJiQWNQ0vpH8Q,"Restaurants, Nightlife, Bars, American (Tradit...",Toronto,1.0,43.776756,-79.259008,Milestones Restaurants,71,3.0,ON
