In [1]:
import pandas as pd
import numpy as np
import time
import os
import re
import string

from nltk.tokenize import WordPunctTokenizer
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer

from sklearn.feature_extraction.text import CountVectorizer

In [2]:
pho_res = pd.read_csv("pho_res.csv")

In [3]:
pho_res.head()

Unnamed: 0.1,Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
0,100089,8729 N Central Ave,"{'Music': ""{'dj': False, 'background_music': F...",rjlAUHT8B6KhKAmkZqS1xw,"Bars, Mexican, Nightlife, Restaurants",Phoenix,"{'Monday': '0:0-0:0', 'Tuesday': '15:0-22:0', ...",1,33.56379,-112.073486,Ladera Taverna y Cocina,85020.0,310,4.5,AZ
1,100402,"5310 E High St, Ste 101","{'RestaurantsAttire': ""u'casual'"", 'BusinessAc...",67X6JB3kvZ2B_p8pucgNXA,"Sushi Bars, American (Traditional), Hawaiian, ...",Phoenix,"{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",1,33.67681,-111.966607,Kona Grill,85054.0,363,3.5,AZ
2,100491,4041 N 15th Ave,"{'RestaurantsReservations': 'True', 'GoodForKi...",xzj6q131hJE59tBkhe3Gag,"Sandwiches, Mediterranean, American (New), Gre...",Phoenix,"{'Monday': '10:30-21:0', 'Tuesday': '10:30-21:...",1,33.494362,-112.091132,Crazy Jim's,85015.0,200,4.0,AZ
3,100515,534 W McDowell Rd,"{'HasTV': 'False', 'RestaurantsTakeOut': 'True...",rQtQbCcRPyoVR8-Qrti6pA,"Sandwiches, Salad, Fast Food, Delis, Restauran...",Phoenix,"{'Monday': '11:0-20:0', 'Tuesday': '11:0-20:0'...",1,33.466,-112.081829,Potbelly Sandwich Shop,85003.0,79,4.0,AZ
4,100560,4747 E Bell Rd,"{'BikeParking': 'True', 'RestaurantsAttire': ""...",len2nrEtgD9bjnoYSyLPjA,"Middle Eastern, Mediterranean, Restaurants, Gr...",Phoenix,"{'Monday': '0:0-0:0', 'Tuesday': '11:0-21:0', ...",1,33.639783,-111.97826,Saba's Mediterranean Kitchen,85032.0,396,4.5,AZ


In [4]:
pho_res.isna().sum()

Unnamed: 0        0
address          15
attributes       21
business_id       0
categories        0
city              0
hours           151
is_open           0
latitude          0
longitude         0
name              0
postal_code       3
review_count      0
stars             0
state             0
dtype: int64

In pho_res dataframe we still have some null values however we don't need all the info  
address: we will use cordinates  
attributes: we might use them for filtering in the future for now it's okay  
hours: again doesn't matter much  
postal_code: doesn't matter at all

In [5]:
# Dropping Unnamed: 0 column
pho_res.drop("Unnamed: 0", axis=1, inplace=True)

In [6]:
pho_rev = pd.read_csv("pho_rev.csv")

In [7]:
pho_rev.head()

Unnamed: 0.1,Unnamed: 0,review_id,user_id,stars,text,business_id
0,0,6BnQwlxRn7ZuWdzninM9sQ,JSrP-dUmLlwZiI7Dp3PQ2A,3.0,I love chinese food and I love mexican food. W...,cHdJXLlKNWixBXpDwEGb_A
1,1,vzMkIQm34QWBCYaHdV-2mQ,jAVtSgE5L-Dt6_I5FIiVGA,5.0,Flat out fantastic! Have been here twice and ...,EIL41z-hvVCeYHqfA9PyWQ
2,2,JYdhCDyR6lYfN2qnSspLlQ,6kEFHccntnYMF_7cdqCcKg,5.0,First off food is DELICIOUS! By far my favorit...,yNPh5SO-7wr8HPpVCDPbXQ
3,3,W6VRRz7mVGZLsidyHelHwg,To4ATBBNlnC5gzj0dwXfuA,5.0,This place is quite possibly my favorite resta...,EgwGTDZ705TwudPJwAY0yQ
4,4,RKGH2ZQHyBNgJwQ84lKMFg,pHKISjytTmP0LrP952_32w,4.0,I'm not that familiar with Scottsdale so I'm j...,xS5HGqgk0KY2jFWU-l_nrA


In [8]:
pho_rev.isna().sum()

Unnamed: 0        0
review_id         0
user_id           0
stars             0
text           1306
business_id       0
dtype: int64

We need to drop the reviews with no text because we will be using for our analysis.

In [9]:
pho_rev = pho_rev.dropna()

In [10]:
# Dropping Unnamed: 0 column
pho_rev.drop("Unnamed: 0", axis=1, inplace=True)

In [11]:
# Found online !
def clean_text(text):
    ## Remove puncuation
    text = text.translate(string.punctuation)
    
    ## Convert words to lower case and split them
    text = text.lower().split()
    
    ## Remove stop words
    stops = set(stopwords.words("english"))
    text = [w for w in text if not w in stops and len(w) >= 3]
    
    text = " ".join(text)
    
    # Clean the text
    text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r",", " ", text)
    text = re.sub(r"\.", " ", text)
    text = re.sub(r"!", " ! ", text)
    text = re.sub(r"\/", " ", text)
    text = re.sub(r"\^", " ^ ", text)
    text = re.sub(r"\+", " + ", text)
    text = re.sub(r"\-", " - ", text)
    text = re.sub(r"\=", " = ", text)
    text = re.sub(r"'", " ", text)
    text = re.sub(r"(\d+)(k)", r"\g<1>000", text)
    text = re.sub(r":", " : ", text)
    text = re.sub(r" e g ", " eg ", text)
    text = re.sub(r" b g ", " bg ", text)
    text = re.sub(r" u s ", " american ", text)
    text = re.sub(r"\0s", "0", text)
    text = re.sub(r" 9 11 ", "911", text)
    text = re.sub(r"e - mail", "email", text)
    text = re.sub(r"j k", "jk", text)
    text = re.sub(r"\s{2,}", " ", text)    
    return text

In [12]:
# Cleaning the texts
pho_rev['text'] = pho_rev['text'].apply(clean_text)

In [13]:
# Reading labels.txt file. This file contains labels from photos
f=open("/Users/kerimbasbug/PycharmProjects/IronHack_Lessons/labels.txt", "r")
if f.mode == 'r':
    contents =f.read()

In [14]:
label_set = set(contents[1:-1].lower().replace("[", "").replace("]", "").replace('"', "").replace("'", "").split(", "))

In [15]:
label_set

{'burrito',
 'cuisine',
 'dish',
 'food',
 'gordita',
 'ingredient',
 'korean taco',
 'meatdish',
 'mission burrito',
 'produce',
 'sandwich wrap',
 'staple food',
 'taco'}

In [16]:
common = set(["cuisine", "food", "dish", "produce", "ingredient", "staple food", "korean taco"])

In [17]:
label_set = label_set - common

In [18]:
label_list = list(label_set)

In [19]:
label_list

['meatdish', 'burrito', 'sandwich wrap', 'mission burrito', 'taco', 'gordita']

In [20]:
pho_rev['text'][0]

'love chinese food love mexican food wrong couple things first things first place rice bowl kind place thought going diverse far menu goes mainly rice bowls get different kinds meats ordering little confusing first one employees helped got 2 - item bowl got jade chicken hengrenade chicken rice jerk also ordered jade chicken quesadilla side n ni am gonna admit place looks kinda dirty think arizona uses health department letter grade system like california does judge looked inside i would give c grade lol waited minutes finally got food took ate hotel room n nmmmm food alright jade chicken nothing special tasted like generic chinese fast food orange chicken sesame chicken variant hengrenade chicken although less spicier version jerk chicken still pretty spicy me warned jerk chicken super spicy sure ask sample restaurant ordering way spicy me n nthe jade chicken quesadilla decent nothing special imagine orange chicken tortilla cheese friend mine ordered jade chicken burrito confused pulle

In [21]:
def filter_by_photo(text):
    res = []
    for names in label_list:
        res.append(names in text)
    if True in res:
        return True
    else:
        return False

In [22]:
filter_by_photo(pho_rev['text'][0])

True

In [23]:
pho_rev['photo'] = pho_rev['text'].apply(filter_by_photo)

In [24]:
pho_rev.head()

Unnamed: 0,review_id,user_id,stars,text,business_id,photo
0,6BnQwlxRn7ZuWdzninM9sQ,JSrP-dUmLlwZiI7Dp3PQ2A,3.0,love chinese food love mexican food wrong coup...,cHdJXLlKNWixBXpDwEGb_A,True
1,vzMkIQm34QWBCYaHdV-2mQ,jAVtSgE5L-Dt6_I5FIiVGA,5.0,flat fantastic ! twice croissants bomb ca not ...,EIL41z-hvVCeYHqfA9PyWQ,False
2,JYdhCDyR6lYfN2qnSspLlQ,6kEFHccntnYMF_7cdqCcKg,5.0,first food delicious ! far favorite restaurant...,yNPh5SO-7wr8HPpVCDPbXQ,False
3,W6VRRz7mVGZLsidyHelHwg,To4ATBBNlnC5gzj0dwXfuA,5.0,place quite possibly favorite restaurant phoen...,EgwGTDZ705TwudPJwAY0yQ,False
4,RKGH2ZQHyBNgJwQ84lKMFg,pHKISjytTmP0LrP952_32w,4.0,i am familiar scottsdale i am guessing restaur...,xS5HGqgk0KY2jFWU-l_nrA,False


In [25]:
pho_rev.loc[pho_rev["photo"] == True].sort_values(by="stars", ascending=False)

Unnamed: 0,review_id,user_id,stars,text,business_id,photo
198257,y4NlP7hnE3FYg42FQa-UAQ,g3P-fvv3dv96TOZvDv-Eug,5.0,incredible absolutely incredible got mocha fre...,S_bG5WWbtNXhhY9Y0GTcSg,True
214865,Q8TzoC-BfE1ZN5fUKSwFNg,VF-LOaYiKhE4zbRQuVBvsw,5.0,loved server ! enjoyed pastor pollo tingy taco...,_iEl9sCLsvXEFHUWPvgsAg,True
215083,aajDkKklbYA7R8ehHc7YsQ,oTVQ05hwLF3HCu-cgkAk_w,5.0,favorite place mexican food every week since c...,daqYMX3Y4QR8xl-BUlYBPw,True
215053,HJ13OOR1hw0h4sO1XerCLg,R4jR9v4lOxh2ANT7npQMsw,5.0,labor day bit busy ate delicious chorizo burri...,5eK_pgro9_LxPYDoRVJnEA,True
215012,QSeM8MLP5b7-zXi-VH6wuA,NyANqDL4eKC3ew2d8gDcnw,5.0,place hands favorite place stop mexican food e...,Ynm7H4c2ll5vZEX3iLdxmw,True
214968,SqJFS-MLF3sB7t0lTtfTcg,VCZiqBtvDkzbksTzA-Q41w,5.0,went taco guild happy service provided waitres...,3C5Z9homtzkWHouH2BHXYQ,True
214911,zoXKsPcLb8tYd2NVnbORxA,DRlUwi4ENwjklKpX3TwokA,5.0,love queso guacamole red salsa here street cor...,3C5Z9homtzkWHouH2BHXYQ,True
214907,8gUjQL4OJe-IgCCAEVoevA,fmXpSKwBtAm7haVSW5iATQ,5.0,mmmmm quinoa tacos freaking awwwwwwesome ! ! !...,LelAlfuj5oVRF9CQdWLsNQ,True
214856,-sJ1ppiXbXruuvch9et-Zw,Eye6rSFj_klHwZxLLRlX9g,5.0,tried willie taco joint yet - you truly missin...,g384tK63QxXXm3UNhtGMuA,True
215098,F1HGlYX_nZXf3rUidRIgFA,BGfgtMpBUczc7AGxurCiLw,5.0,i am vegetarian dad one seemed really happy me...,QsJ6orXv_VB7xz-MpqHzSQ,True


In [26]:
pho_rev["photo"].value_counts()

False    326901
True      28957
Name: photo, dtype: int64

In [27]:
pho_rev_photo = pho_rev.loc[pho_rev["photo"] == True]

In [28]:
# Vectorizing the reviews
""" 
vectorizer_reviews = CountVectorizer(max_features=30 ,min_df = .01,max_df = .99, tokenizer = WordPunctTokenizer().tokenize)
vectorized_reviews = vectorizer_reviews.fit_transform(pho_rev['text'])
"""

vectorizer_reviews = CountVectorizer(max_features=30 ,min_df = .01,max_df = .99, tokenizer = WordPunctTokenizer().tokenize)
vectorized_reviews = vectorizer_reviews.fit_transform(pho_rev_photo['text'])

In [29]:
print(vectorized_reviews.shape)

(28957, 30)


In [30]:
' | '.join(vectorizer_reviews.get_feature_names()[:100])

'! | - | also | back | best | burrito | chicken | delicious | food | get | good | got | great | have | i | it | like | mexican | n | one | order | ordered | place | really | salsa | service | taco | tacos | time | would'

In [31]:
# Vectorizing the categories
vectorizer_categories = CountVectorizer(min_df = 1, max_df = 1., tokenizer = lambda x: x.split(', '))

res_ids = list(pho_rev_photo["business_id"])
pho_res = pho_res.loc[pho_res["business_id"].isin(res_ids)]

print(pho_res.shape)
vectorized_categories = vectorizer_categories.fit_transform(pho_res['categories'])

(999, 14)


In [32]:
print(vectorized_categories.shape)

(999, 204)


In [33]:
' | '.join(vectorizer_categories.get_feature_names()[:100])

'active life | afghan | african | airports | american (new) | american (traditional) | arcades | art classes | art galleries | art museums | arts & entertainment | asian fusion | automotive | bagels | bakeries | barbeque | bars | beauty & spas | beer | beer bar | beer gardens | beverage store | bikes | bowling | breakfast & brunch | breweries | brewpubs | british | buffets | burgers | cafes | cajun/creole | cambodian | cantonese | caribbean | caterers | cheesesteaks | chicken shop | chicken wings | chinese | cinema | club crawl | cocktail bars | coffee & tea | coffee roasteries | comedy clubs | comfort food | community service/non-profit | convenience stores | cosmetics & beauty supply | country clubs | creperies | cuban | custom cakes | dance clubs | day spas | delis | dentists | desserts | dim sum | diners | discount store | dive bars | djs | do-it-yourself food | donuts | eatertainment | education | ethiopian | ethnic food | event planning & services | farmers market | farms | fashi

In [34]:
from scipy import sparse
"""
businessxreview = sparse.csr_matrix(pd.get_dummies(pho_rev['business_id']).values)
"""
businessxreview = sparse.csr_matrix(pd.get_dummies(pho_rev_photo['business_id']).values)

In [35]:
# Shapes
print('restuarants x categories: \t', vectorized_categories.shape) 

print('restuarants x reviews: \t\t' , businessxreview.shape) 

print('reviews x words: \t\t', vectorized_reviews.shape)

restuarants x categories: 	 (999, 204)
restuarants x reviews: 		 (28957, 999)
reviews x words: 		 (28957, 30)


In [39]:
pho_res.sort_values(by = "stars", ascending=False)

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
675,918 N 5th St,"{'Caters': 'True', 'BusinessAcceptsCreditCards...",fEUsAP6znkde43BYNoxLvQ,"Venues & Event Spaces, Event Planning & Servic...",Phoenix,"{'Tuesday': '6:0-18:0', 'Wednesday': '6:0-18:0...",1,33.458334,-112.067683,Rise Craft Coffee + Eatery,85004.0,32,5.0,AZ
177,N 19th Ave W Thunderbird Rd,"{'DogsAllowed': 'True', 'OutdoorSeating': 'Tru...",o60ONA3T9gzCX6_8LjWB8w,"Restaurants, Hot Dogs, Mexican, Tacos",Phoenix,"{'Monday': '0:0-0:0', 'Tuesday': '9:0-21:0', '...",1,33.611859,-112.099512,Emilio's Tacos & Hotdogs,85023.0,28,5.0,AZ
261,306 W Yavapai St,"{'BusinessAcceptsCreditCards': 'True', 'Outdoo...",mss-LiOfL1vtoNo3WoEoJw,"Restaurants, Tacos, American (New), Salad, Chi...",Phoenix,"{'Monday': '0:0-0:0', 'Tuesday': '10:0-19:0', ...",1,33.436169,-112.077588,Kiss Pollos Estilo Sinaloa,85003.0,173,5.0,AZ
1660,811 W Deer Valley Rd,"{'Alcohol': ""u'none'"", 'RestaurantsGoodForGrou...",0jDvRJS-z9zdMgOUXgr6rA,"Food, Gluten-Free, Specialty Food, Caterers, R...",Phoenix,"{'Monday': '0:0-0:0', 'Tuesday': '8:0-16:0', '...",1,33.683407,-112.084804,Sunfare,85027.0,31,5.0,AZ
1688,15820 N 35th Ave,"{'OutdoorSeating': 'True', 'WiFi': ""u'free'"", ...",WZVnmFXoE42coc4FmcbEDQ,"Bakeries, Food, Sandwiches, Caterers, Restaura...",Phoenix,"{'Monday': '0:0-0:0', 'Tuesday': '7:30-18:0', ...",1,33.629494,-112.134310,My Gal Sal Bakery & Catering,85053.0,67,5.0,AZ
196,845 W Southern Ave,"{'Ambience': ""{'touristy': False, 'hipster': F...",y1-FeVRdx0GhgtZKWghKXA,"Restaurants, Mexican, Tacos",Phoenix,"{'Monday': '10:0-20:0', 'Tuesday': '10:0-20:0'...",1,33.391362,-112.083070,Tortas Manny,85041.0,39,5.0,AZ
1724,3333 E Van Buren St,"{'OutdoorSeating': 'False', 'GoodForKids': 'Tr...",E3qxMkbKxQR6Aca36c53GA,"Cafes, Salad, Restaurants, Sandwiches",Phoenix,"{'Monday': '7:0-15:0', 'Tuesday': '7:0-15:0', ...",1,33.450829,-112.009453,"Helpings Cafe, Market and Catering",85008.0,46,5.0,AZ
935,4301 E University Dr,"{'BusinessAcceptsCreditCards': 'True', 'Restau...",Xg5qEQiB-7L6kGJ5F4K3bQ,"Barbeque, Restaurants",Phoenix,"{'Monday': '0:0-0:0', 'Tuesday': '11:0-16:0', ...",1,33.421877,-111.989344,Little Miss BBQ,85034.0,1936,5.0,AZ
1135,4044 S 16th St,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...",9MVKjEMN5T59uzG1xoD2BQ,"Mexican, Restaurants",Phoenix,"{'Monday': '0:0-0:0', 'Tuesday': '11:0-21:0', ...",1,33.408767,-112.047381,Cocina Madrigal,85040.0,128,5.0,AZ
601,"814 E Union Hills Dr, Ste C-6","{'BusinessParking': ""{'garage': False, 'street...",Llm_iXzE0-8_XKwI2e4JdA,"Food Trucks, Restaurants, Pizza, Persian/Irani...",Phoenix,"{'Tuesday': '9:0-14:30', 'Wednesday': '9:0-14:...",1,33.656123,-112.062959,Saffron JAK,85024.0,133,5.0,AZ


In [42]:
# Picking a restaurant with high reviews  # wxzG81ZyWpBje_mU4aFLaw italian
business_choose = pho_res.sort_values(by = "stars", ascending=False).iloc[0]["business_id"]

In [43]:
# Getting restaurant reviews
"""
new_reviews = pho_rev.loc[pho_rev['business_id'] == business_choose, 'text']
"""
new_reviews = pho_rev_photo.loc[pho_rev_photo['business_id'] == business_choose, 'text']

In [46]:
# Printing restaurant reviews
print('\n'.join([r[:1000] for r in new_reviews.tolist()]))

husband absolutely love restaurant food delicious atmosphere unique fun live phoenix anymore hard get there anytime we are town try stop in chipotle cherry steak taco absolutely die for peeps could sing top mountain would 
tried place today duck taco well done shrimp taco also captured heart coffee braised beef flavor liking all atmosphere cool like use historical structure seating comfortable church way pews high bar stools place looking classic tacos amazing new twist em 
place pretty awesome d cor like nothing ever seen restaurant old church stain glass windows high cathedral ceilings sat high - top table bar area served quickly tried classic margarita disappointed all tasted fresh margarita - mix like enjoyed elote guacamole pastor tacos food memorable margaritas though sure means drinks amazing really
first time arizona stopped place lunch place inside old church kept beautiful stained glass artwork staff welcoming attentive made even enjoyable i am giving ordered street corn shav

In [45]:
# Getting restaurant categories
new_categories = pho_res.loc[pho_res['business_id'] == business_choose, 'categories']
new_categories

675    Venues & Event Spaces, Event Planning & Servic...
Name: categories, dtype: object

In [46]:
# Distance matrices

from scipy.spatial.distance import cdist
# Reviews
dists1 = cdist(vectorizer_reviews.transform(new_reviews).todense().mean(axis=0), 
              vectorized_reviews.T.dot(businessxreview).T.todense(), 
               metric='correlation')
# Categories
dists2 = cdist(vectorizer_categories.transform(new_categories).todense().mean(axis=0), 
              vectorized_categories.todense(), 
               metric='correlation')

In [47]:
new_categories

675    Venues & Event Spaces, Event Planning & Servic...
Name: categories, dtype: object

In [48]:
dists1

array([[7.30440367e-01, 7.57418239e-01, 2.79463154e-01, 6.98503446e-01,
        4.71891756e-01, 4.65938963e-01, 2.05349445e-01, 3.53464005e-01,
        6.82819539e-01, 8.97680329e-01, 4.11750278e-01, 7.49376232e-01,
        4.70504079e-01, 8.02572257e-01, 8.91575839e-01, 3.89422775e-01,
        8.74754801e-01, 7.68723360e-01, 3.45710457e-01, 4.23296388e-01,
        9.39434138e-01, 2.50108769e-01, 9.33065215e-01, 3.76878415e-01,
        3.72478729e-01, 5.40788575e-01, 7.78683255e-01, 2.53994052e-01,
        7.52381692e-01, 1.77578656e-01, 6.21729415e-01, 5.75238578e-01,
        3.31349355e-01, 2.62079193e-01, 8.35369857e-01, 8.42470805e-01,
        8.06226662e-01, 8.78894689e-01, 1.31287560e+00, 6.98809807e-01,
        3.48789531e-01, 9.55663642e-01, 4.19658666e-01, 5.57741212e-01,
        2.06483546e-01, 3.83389968e-01, 4.75928692e-01, 7.20414647e-01,
        6.33423096e-01, 4.21215902e-01, 9.68592061e-01, 4.55720206e-01,
        2.81347091e-01, 2.82872212e-01, 4.60066747e-01, 2.682208

In [49]:
dists2

array([[0.81536276, 0.83995965, 0.88570076, 0.83995965, 0.72285801,
        0.72285801, 0.81536276, 0.83995965, 0.7802413 , 0.7802413 ,
        0.83995965, 0.72285801, 0.90514649, 0.87343178, 0.81536276,
        0.63639488, 0.39685169, 0.7802413 , 0.60610723, 0.7802413 ,
        0.71404958, 0.72285801, 0.51515152, 0.87343178, 0.88570076,
        0.72285801, 0.57030551, 0.87343178, 0.55466739, 0.65232614,
        0.73623252, 0.81536276, 0.81536276, 0.83995965, 0.60610723,
        0.51515152, 0.81536276, 0.72285801, 0.60610723, 0.43729604,
        0.7802413 , 0.71404958, 0.72285801, 0.7802413 , 0.65232614,
        0.72285801, 0.72285801, 0.68686869, 0.46469263, 0.51515152,
        0.72285801, 0.72285801, 0.81536276, 0.72285801, 0.81536276,
        0.68686869, 0.71404958, 0.72285801, 0.71404958, 0.63639488,
        0.7802413 , 0.72285801, 0.81536276, 0.87343178, 0.81536276,
        0.65232614, 0.83995965, 0.55466739, 0.88570076, 0.46469263,
        0.72285801, 0.83995965, 0.85858586, 0.81

In [50]:
# Merging dist1 and dist2
dists_together = np.vstack([dists1.ravel(), dists2.ravel()]).T

In [51]:
dists_together

array([[0.73044037, 0.81536276],
       [0.75741824, 0.83995965],
       [0.27946315, 0.88570076],
       ...,
       [0.82286049, 0.51515152],
       [0.93258482, 0.60610723],
       [0.70292797, 0.71404958]])

In [52]:
dists = dists_together.mean(axis=1)

In [53]:
# Getting closest 10 restaurants
closest = dists.argsort().ravel()[:40]
"""closest = dists1.argsort().ravel()[:20]"""

'closest = dists1.argsort().ravel()[:20]'

In [54]:
# Gives the indices of restaurants
closest

array([693, 695, 900, 815, 989, 304, 877, 245, 462,  69, 952, 288, 575,
       442, 147, 450, 628, 738, 531, 716, 619, 515, 993,  29, 740, 250,
       727, 382, 762, 189, 688,  44, 258, 761, 373, 603, 647, 650, 331,
       609])

In [55]:
# My favorite restaurant
pho_res.loc[pho_res['business_id'] == business_choose, ['business_id', 'categories', 'name', 'stars']]

Unnamed: 0,business_id,categories,name,stars
675,fEUsAP6znkde43BYNoxLvQ,"Venues & Event Spaces, Event Planning & Servic...",Rise Craft Coffee + Eatery,5.0


In [56]:
# Recommendation
recommend = pho_res.loc[pho_res['business_id'].isin(pho_res['business_id'].iloc[closest]), ['business_id', 'categories', 'name', 'stars', 'latitude', 'longitude']]

In [57]:
rec = recommend.sort_values(by = "stars", ascending=False).head()

In [58]:
rec

Unnamed: 0,business_id,categories,name,stars,latitude,longitude
47,KDh82ODr4adz9ck6ltQ_YQ,"Vegan, Food, Restaurants, American (New), Vege...",Tastybox,5.0,33.455411,-112.064847
580,IZFODW0fifph9urOnl6pGQ,"Food, Cafes, Coffee Roasteries, Coffee & Tea, ...",King Coffee Roastery,4.5,33.655624,-112.011827
1412,4RV97YE8VEw05tu0WO425g,"Restaurants, Food Trucks, Food, Mexican",La Frontera,4.5,33.450534,-112.047722
1193,MeSrAvgBNWHP2Js2--b6zw,"Restaurants, Coffee & Tea, Creperies, Ice Crea...",P.A.K Crepes & Coffee,4.5,33.495506,-112.222148
73,tU90S8KAgeg_2dZQeYfg0w,"Food, Bagels, Breakfast & Brunch, Donuts, Rest...",Sunrise Donuts,4.5,33.378942,-112.134609


In [59]:
lat = list(rec.latitude)
lon = list(rec.longitude)

In [60]:
lat

[33.4554106, 33.655624269200004, 33.4505338, 33.4955059, 33.3789419421]

In [61]:
lon

[-112.0648466,
 -112.01182723049999,
 -112.0477217,
 -112.2221483,
 -112.1346091984]

In [62]:
import folium
import numpy as np
import pandas as pd
from collections import namedtuple

def get_arrows(locations, color='blue', size=6, n_arrows=3):
    Point = namedtuple('Point', field_names=['lat', 'lon'])
    
    # creating point from our Point named tuple
    p1 = Point(locations[0][0], locations[0][1])
    p2 = Point(locations[1][0], locations[1][1])
    
    # getting the rotation needed for our marker.  
    # Subtracting 90 to account for the marker's orientation
    # of due East(get_bearing returns North)
    rotation = get_bearing(p1, p2) - 90
    
    # get an evenly space list of lats and lons for our arrows
    # note that I'm discarding the first and last for aesthetics
    # as I'm using markers to denote the start and end
    arrow_lats = np.linspace(p1.lat, p2.lat, n_arrows + 2)[1:n_arrows+1]
    arrow_lons = np.linspace(p1.lon, p2.lon, n_arrows + 2)[1:n_arrows+1]
    
    arrows = []
    
    #creating each "arrow" and appending them to our arrows list
    for points in zip(arrow_lats, arrow_lons):
        arrows.append(folium.RegularPolygonMarker(location=points, 
                      fill_color=color, number_of_sides=3, 
                      radius=size, rotation=rotation))
    return arrows
def get_middle(p1,p2):
    return [(p1[0]+p2[0])/2, (p1[1]+p2[1])/2]

def get_bearing(p1, p2):    
    long_diff = np.radians(p2.lon - p1.lon)
    
    lat1 = np.radians(p1.lat)
    lat2 = np.radians(p2.lat)
    
    x = np.sin(long_diff) * np.cos(lat2)
    y = (np.cos(lat1) * np.sin(lat2) 
        - (np.sin(lat1) * np.cos(lat2) 
        * np.cos(long_diff)))
    bearing = np.degrees(np.arctan2(x, y))
    
    # adjusting for compass bearing
    if bearing < 0:
        return bearing + 360
    return bearing

p1 = []
p1.append(lat[0])
p1.append(lon[0])

p2 = []
p2.append(lat[1])
p2.append(lon[1])

p3 = []
p3.append(lat[2])
p3.append(lon[2])

p4 = []
p4.append(lat[3])
p4.append(lon[3])
p5 = []
p5.append(lat[4])
p5.append(lon[4])

# using omaha coordinates 
center_lat = 33.211898
center_lon = -111.394936

lats = np.random.uniform(low=center_lat - .25, high=center_lat + .25, size=(2,))
lons = np.random.uniform(low=center_lon - .25, high=center_lon + .25, size=(2,))

some_map = folium.Map(location=[center_lat, center_lon], zoom_start=10)
folium.Marker(location=p1,  icon=folium.Icon(color='blue')).add_to(some_map)
#icon = folium.features.CustomIcon(icon_url,icon_size=(50, 50))
folium.Marker(location=p2, icon=folium.Icon(color='blue')).add_to(some_map)
folium.Marker(location=p3, icon=folium.Icon(color='blue')).add_to(some_map)
folium.Marker(location=p3, icon=folium.Icon(color='blue')).add_to(some_map)
folium.Marker(location=p4, icon=folium.Icon(color='blue')).add_to(some_map)
folium.Marker(location=p5, icon=folium.Icon(color='blue')).add_to(some_map)

#folium.PolyLine(locations=[p1, p2], color='red').add_to(some_map)



"""
arrows = get_arrows(locations=[p1, p2], n_arrows=1)
for arrow in arrows:
    arrow.add_to(some_map)
    
"""

some_map