In [2]:
import requests
import json
import io
import numpy as np
import pandas as pd
import geopandas as gpd
import folium
from IPython.display import clear_output
from bokeh.io import show, output_file, save, output_notebook
from bokeh.plotting import figure
from bokeh.palettes import Cividis256
from bokeh.transform import factor_cmap
from bokeh.models import ColumnDataSource
from bokeh.resources import CDN
from bokeh.embed import file_html

### Get neighborhood boundaries 

In [3]:
# NYC neighborhood geojson from insideairbnb.com
nycNeigh = gpd.read_file('http://data.insideairbnb.com/united-states/ny/new-york-city/2019-06-02/visualisations/neighbourhoods.geojson')
nycNeigh.shape

(233, 3)

In [4]:
nycNeigh['neighCenter'] = nycNeigh.geometry.centroid
nycNeigh['search_query'] = nycNeigh['neighbourhood'] + ', ' + nycNeigh['neighbourhood_group'] + ', NY'
nycNeigh.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry,neighCenter,search_query
0,Bayswater,Queens,"(POLYGON ((-73.76670799999999 40.614911, -73.7...",POINT (-73.769503479281 40.61740017645801),"Bayswater, Queens, NY"
1,Allerton,Bronx,"(POLYGON ((-73.848597 40.87167, -73.845823 40....",POINT (-73.8599842270222 40.86472735568424),"Allerton, Bronx, NY"
2,City Island,Bronx,"(POLYGON ((-73.782822 40.843919, -73.782572 40...",POINT (-73.78665350533366 40.84693161668218),"City Island, Bronx, NY"
3,Ditmars Steinway,Queens,"(POLYGON ((-73.90160299999999 40.76777, -73.90...",POINT (-73.90600334689506 40.77780367906778),"Ditmars Steinway, Queens, NY"
4,Ozone Park,Queens,"(POLYGON ((-73.83754399999999 40.691364, -73.8...",POINT (-73.84636439104509 40.67908264800706),"Ozone Park, Queens, NY"


### Show on map

In [5]:
# basemap
NYC_COORDINATES = (40.7128, -74.0060)
nycMap = folium.Map(location=NYC_COORDINATES, tiles='cartodbpositron', zoom_start=10)


# add neighborhood boundaries
folium.GeoJson(
    nycNeigh['geometry'],
    name='geojson',
    style_function=lambda feature: {
        'fillColor': None,
        'color' : 'gray',
        'weight' : 1,
        'fillOpacity' : 0.3,
        }).add_to(nycMap)


# add points
for i, loc in enumerate(nycNeigh.neighCenter):
    folium.Circle(radius=200, 
                  location=[loc.y, loc.x], 
                  popup=nycNeigh['neighbourhood'][i],
                  color='crimson',
                  fill=True).add_to(nycMap)
    
    
nycMap

Map showing the boundaries of each neighborhood and the neighborhood centroid.

In [6]:
nycMap.save('nyc_neighborhoods.html')

### Crawl Yelp API

In [7]:
def getYelpData(apiKey, businessCat, location):
    
    """Calls Yelp API and returns a list with business information in the given location.
    
    Args:
        api_key (str): Yelp API key.
        business_cat (str): Business category for request.
        location (str): Location of interest (e.g. East Village, NY).
    
    Returns:
        dataframe : Dataframe including all available business information (e.g. name, rating, address, etc.)
        
    """
    
    # authorize API call
    headers = {'Authorization': 'Bearer %s' % apiKey}
    
    # API url
    url='https://api.yelp.com/v3/businesses/search'
    
    # set request parameters
    params = {'term': businessCat, 'location': location, 'limit': 50}
    
    # make the request
    req = requests.get(url, params=params, headers=headers)
    
    # read request as json
    j = req.json()
    
    # convert to dataframe
    df = pd.DataFrame(j['businesses'])
    
    return df

In [8]:
# Yelp API key. Get yours here: https://www.yelp.com/developers/v3/manage_app
f = open("yelp-api-key.json", "r")
apiKey = f.read()

#### Request restaurant info for all neighborhoods

In [9]:
data = pd.DataFrame()

for idx, neighbourhood in enumerate(nycNeigh['search_query']):
    
    # call API for a neighborhood
    yelp_data = getYelpData(apiKey, 'restaurants', neighbourhood)
    yelp_data['neighbourhood'] = neighbourhood
    
    # put all data together 
    data = pd.concat([data, yelp_data]) 
    
    if idx in np.arange(0, len(nycNeigh['search_query']), 20):
        clear_output()
        print ("Processed %s / %s examples" %(idx,str(len(nycNeigh['search_query']))))

Processed 220 / 233 examples


In [10]:
data.shape

(11535, 17)

In [11]:
data.head()

Unnamed: 0,alias,categories,coordinates,display_phone,distance,id,image_url,is_closed,location,name,phone,price,rating,review_count,transactions,url,neighbourhood
0,dredsurfer-grill-far-rockaway-2,"[{'alias': 'tradamerican', 'title': 'American ...","{'latitude': 40.594113, 'longitude': -73.750546}",(718) 471-1691,1853.71395,9EiSjoAmpySrzJjEQzwP7w,https://s3-media2.fl.yelpcdn.com/bphoto/rFCNBo...,False,"{'address1': '16-27 Seagirt Blvd', 'address2':...",DredSurfer Grill,17184711691,$$,4.0,10,"[pickup, delivery]",https://www.yelp.com/biz/dredsurfer-grill-far-...,"Bayswater, Queens, NY"
1,mamas-cooking-far-rockaway,"[{'alias': 'caribbean', 'title': 'Caribbean'},...","{'latitude': 40.603509, 'longitude': -73.753601}",(718) 327-3720,1135.135763,B2d_xUEt9QoC2DluIbeb3g,https://s3-media2.fl.yelpcdn.com/bphoto/BeaOfz...,False,"{'address1': '1044 Beach 21st St', 'address2':...",Mama's Cooking,17183273720,,4.5,3,[],https://www.yelp.com/biz/mamas-cooking-far-roc...,"Bayswater, Queens, NY"
2,dred-surfer-bar-far-rockaway,"[{'alias': 'caribbean', 'title': 'Caribbean'},...","{'latitude': 40.594113, 'longitude': -73.750547}",(646) 463-3271,1853.651289,eXSiOni-X1oUsxFPWypfrA,https://s3-media3.fl.yelpcdn.com/bphoto/JzhWSL...,False,"{'address1': '16-27 Seagirt Blvd', 'address2':...",Dred Surfer Bar,16464633271,,5.0,1,[],https://www.yelp.com/biz/dred-surfer-bar-far-r...,"Bayswater, Queens, NY"
3,tacos-y-mas-mexican-restaurant-far-rockaway,"[{'alias': 'mexican', 'title': 'Mexican'}, {'a...","{'latitude': 40.6029614, 'longitude': -73.7507...",(718) 327-3001,1384.4963,gMMwaw61b722ngFem5abhA,https://s3-media4.fl.yelpcdn.com/bphoto/CDkcSG...,False,"{'address1': '1831 Mott Ave', 'address2': '', ...",Tacos Y Mas Mexican Restaurant,17183273001,$,3.5,53,"[pickup, delivery]",https://www.yelp.com/biz/tacos-y-mas-mexican-r...,"Bayswater, Queens, NY"
4,marios-far-rockaway-2,"[{'alias': 'chicken_wings', 'title': 'Chicken ...","{'latitude': 40.6028743817863, 'longitude': -7...",(718) 471-5103,1406.522481,BQxIP160lPc1ido3TKbA9g,https://s3-media1.fl.yelpcdn.com/bphoto/ph4fgx...,False,"{'address1': '1813 Mott Ave', 'address2': '', ...",Mario's,17184715103,$,4.5,10,"[pickup, delivery]",https://www.yelp.com/biz/marios-far-rockaway-2...,"Bayswater, Queens, NY"


In [12]:
data.to_csv(r'yelp_ratings.csv', index=False)

In [13]:
data.groupby('neighbourhood')['rating'].mean().sort_values(ascending=False)

neighbourhood
Sunnyside, Queens, NY                        4.380000
Lower East Side, Manhattan, NY               4.360000
Greenpoint, Brooklyn, NY                     4.340000
West Village, Manhattan, NY                  4.300000
Bedford-Stuyvesant, Brooklyn, NY             4.290000
Ditmars Steinway, Queens, NY                 4.280000
Flushing, Queens, NY                         4.270000
Bushwick, Brooklyn, NY                       4.260000
NoHo, Manhattan, NY                          4.260000
Whitestone, Queens, NY                       4.260000
South Slope, Brooklyn, NY                    4.250000
Williamsburg, Brooklyn, NY                   4.250000
Chinatown, Manhattan, NY                     4.250000
Navy Yard, Brooklyn, NY                      4.250000
East Elmhurst, Queens, NY                    4.250000
Ridgewood, Queens, NY                        4.240000
East Village, Manhattan, NY                  4.240000
Astoria, Queens, NY                          4.240000
Two Bridges, M

In [14]:
avg_ratings = pd.DataFrame({'neighborhood': data.groupby('neighbourhood')['rating'].mean().sort_values(ascending=False).index,
                           'avg_rating':data.groupby('neighbourhood')['rating'].mean().sort_values(ascending=False).values})

### All together
Merge yelp data with neighborhood geojson file

In [15]:
yelp_ratings = avg_ratings.merge(nycNeigh, left_on='neighborhood', right_on='search_query')

In [16]:
yelp_ratings.head()

Unnamed: 0,neighborhood,avg_rating,neighbourhood,neighbourhood_group,geometry,neighCenter,search_query
0,"Sunnyside, Queens, NY",4.38,Sunnyside,Queens,"(POLYGON ((-73.91189 40.748715, -73.911642 40....",POINT (-73.92012593552306 40.74020523800707),"Sunnyside, Queens, NY"
1,"Lower East Side, Manhattan, NY",4.36,Lower East Side,Manhattan,"(POLYGON ((-73.97489299999999 40.715152, -73.9...",POINT (-73.98461604379614 40.71608729315376),"Lower East Side, Manhattan, NY"
2,"Greenpoint, Brooklyn, NY",4.34,Greenpoint,Brooklyn,"(POLYGON ((-73.96226299999999 40.732916, -73.9...",POINT (-73.94607148318792 40.72762983871867),"Greenpoint, Brooklyn, NY"
3,"West Village, Manhattan, NY",4.3,West Village,Manhattan,"(POLYGON ((-74.00881 40.742378, -73.996797 40....",POINT (-74.0058486584057 40.73483919051645),"West Village, Manhattan, NY"
4,"Bedford-Stuyvesant, Brooklyn, NY",4.29,Bedford-Stuyvesant,Brooklyn,"(POLYGON ((-73.941149 40.700281, -73.905479 40...",POINT (-73.9382015250131 40.68706821542903),"Bedford-Stuyvesant, Brooklyn, NY"


### Choropleth

In [17]:
url = 'http://data.insideairbnb.com/united-states/ny/new-york-city/2019-06-02/visualisations'
neigh_geo = f'{url}/neighbourhoods.geojson'


# Initialize the map
m = folium.Map(location=NYC_COORDINATES, tiles='cartodbpositron', zoom_start=10)
 
    
# Define choropleth
choropleth = folium.Choropleth(geo_data=neigh_geo,
    data=yelp_ratings,
    bins=np.linspace(yelp_ratings.avg_rating.min(), yelp_ratings.avg_rating.max(), 10),
    name='yelp_ratings_choropleth',
    columns=['neighbourhood', 'avg_rating'],
    key_on='feature.properties.neighbourhood',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2).add_to(m)



# Find the neighborhood with highest average rating
maxRating = gpd.GeoDataFrame(yelp_ratings[yelp_ratings.avg_rating==yelp_ratings.avg_rating.max()])
maxRating.crs = {'init' :'epsg:4326'}
del maxRating['neighCenter']

folium.GeoJson(
    data=maxRating,
    name='highest_rated_neighborhood',
    style_function=lambda feature: {
        'fillColor': 'firebrick',
        'color' : 'gray',
        'weight' : 1,
        'fillOpacity' : 0.8,
        }
    ).add_to(m)


# hacky way to show popups in choropleth
for i, loc in enumerate(nycNeigh.neighCenter):
    folium.Circle(radius=1000, 
                  location=[loc.y, loc.x], 
                  popup=nycNeigh['neighbourhood'][i] + '\n AvgRating: ' +  
                         str(yelp_ratings[yelp_ratings.neighbourhood==nycNeigh['neighbourhood'][i]].avg_rating.values[0]),
                  color=None,
                  fill_opacity=0,
                  line_opacity=0,
                  fill=True).add_to(m)


# To control the layers shown
#folium.LayerControl().add_to(m)

m

In [18]:
m.save('yelp_choropleth.html')

### Cuisine with highest rating

In [19]:
data2 = data.copy()

In [20]:
print(data2.shape)
data2 = data2.drop_duplicates('id')
print(data2.shape)

(11535, 17)
(4548, 17)


In [21]:
# extract cuisine label
data2['cuisine'] = data2.categories.map(lambda x: x[0]['alias'])

In [22]:
# find top 30 cuisines in data
print(data2.groupby('cuisine')['rating'].count().sort_values(ascending=False).head(30).index.values)

top30 = data2[data2.cuisine.isin(data2.groupby('cuisine')['rating'].count().sort_values(ascending=False).head(30).index)]

['italian' 'pizza' 'mexican' 'chinese' 'newamerican' 'caribbean'
 'japanese' 'seafood' 'bars' 'tradamerican' 'thai' 'latin' 'mediterranean'
 'delis' 'sushi' 'spanish' 'indpak' 'korean' 'burgers' 'french' 'greek'
 'bbq' 'steak' 'breakfast_brunch' 'ramen' 'vietnamese' 'cafes' 'wine_bars'
 'cocktailbars' 'diners']


In [23]:
top30.groupby('cuisine')['rating'].mean().sort_values(ascending=False)

cuisine
cocktailbars        4.243902
wine_bars           4.238095
korean              4.185714
french              4.174603
greek               4.125000
mediterranean       4.115385
cafes               4.090909
indpak              4.054054
ramen               4.053191
newamerican         4.036649
breakfast_brunch    4.009804
thai                4.000000
italian             4.000000
mexican             3.989407
japanese            3.988806
vietnamese          3.978723
bars                3.938462
sushi               3.935897
pizza               3.850904
delis               3.848101
caribbean           3.831034
bbq                 3.774510
seafood             3.772388
chinese             3.747826
latin               3.727723
steak               3.715686
burgers             3.636364
spanish             3.614865
tradamerican        3.588462
diners              3.576923
Name: rating, dtype: float64

#### Interactive bar chart

In [29]:
# set custom colormap with 30 values from cividis256
cividis30=[]

for i in range(0, len(Cividis256), 8):
    cividis30.append(Cividis256[i])

cividis30 = cividis30[:30]


# variables to plot
cuisines = top30.groupby('cuisine')['rating'].mean().sort_values(ascending=False).index.values
cuisines_ratings = top30.groupby('cuisine')['rating'].mean().sort_values(ascending=False).values


# define fiuresource
source = ColumnDataSource(data=dict(
    x=list(cuisines_ratings),
    y=list(cuisines),
    colorMap = cividis30
))


# define figure
p = figure(y_range=cuisines[::-1], x_range=(3.5, cuisines_ratings.max()), 
           plot_width=450, plot_height=600, toolbar_location=None,
           title="Yelp ratings for the most popular NYC food categories",
           tools="hover", tooltips="@x{1.11}")

# plot bar chart
p.hbar(y='y', left=3.5, right='x', height=0.5, source=source, color='colorMap')

# figure parameters
p.xaxis.axis_label = "Avearage Yelp rating"
p.ygrid.grid_line_color = None
p.outline_line_color = None

# output
output_notebook()
output_file('flowers.html')
show(p)
save(p, 'cuisineYelpRatings.html')

'/Volumes/GoogleDrive/My Drive/PythonProjects/TooMuchFreeTime/Yelp API requests/cuisineYelpRatings.html'

In [28]:
foo = file_html(p, CDN, "cuisinesEmbed")
print(foo)





<!DOCTYPE html>
<html lang="en">
  
  <head>
    
      <meta charset="utf-8">
      <title>cuisinesEmbed</title>
      
      
        
          
        <link rel="stylesheet" href="https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.css" type="text/css" />
        
        
          
        <script type="text/javascript" src="https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.js"></script>
        <script type="text/javascript">
            Bokeh.set_log_level("info");
        </script>
        
      
      
    
  </head>
  
  
  <body>
    
      
        
          
          
            
              <div class="bk-root" id="df364612-44a6-4956-9da3-e7005881ae01" data-root-id="1002"></div>
            
          
        
      
      
        <script type="application/json" id="1250">
          {"7b604bcc-15d7-4093-95a8-e97cd245a9ff":{"roots":{"references":[{"attributes":{"callback":null,"end":4.2439024390243905,"start":3.5},"id":"1005","type":"Range1d"},{"attribute