In [1]:
import pandas as pd
import numpy as np
import folium
import requests

# Culinary diversity in Boston neighborhoods

In this notebook, we use the Foursquare API to explore restaurants in the neighborhoods of Boston and classify the neighborhoods based on the diversity of their culinary options.

## Geographical data for Boston neighborhoods

The geographical coordinates for the neighborhoods in Boston are available from the City of Boston: https://data.boston.gov/dataset/boston-neighborhoods. We will load this into the variable `geodata`.

In [99]:
url = 'http://bostonopendata-boston.opendata.arcgis.com/datasets/3525b0ee6e6b427f9aab5d0a1d0a1a28_0.geojson'
geodata = requests.get(url).json()
geodata

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'OBJECTID': 27,
    'Name': 'Roslindale',
    'Acres': 1605.5682375,
    'Neighborhood_ID': '15',
    'SqMiles': 2.51,
    'ShapeSTArea': 69938272.92557049,
    'ShapeSTLength': 53563.912597056624},
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[-71.12592717485386, 42.272013107957406],
       [-71.12610933458738, 42.2716219294518],
       [-71.12603188298199, 42.27158985153841],
       [-71.12571713956957, 42.27152070474045],
       [-71.12559042372907, 42.27146017841939],
       [-71.12523676125656, 42.271387313901805],
       [-71.12522437821433, 42.271425073651166],
       [-71.12489533053173, 42.27134458090032],
       [-71.12482468090687, 42.271318140479686],
       [-71.12485155056099, 42.27124753819149],
       [-71.12476329046935, 42.270292339717635],
       [-71.12470249712558, 42.270295367758344],
       [-71.12259088359436, 42.2700534081311],
       [-71.1223931813923, 42.270030

`geodata['features']` is a list of dictionaries with information for each neighborhood. We will create a dataframe to store the neighborhood name and geographical coordinates. Note the coordinates are given as either a `Polygon` or `MultiPolygon` boundary. `MultiPolygon` coordinates are in a list four layers deep, while `Polygon` coordinates are in a list only three layers deep. To account for this, we add a layer of nesting in the `Polygon` coordinate lists.

In [100]:
df = pd.DataFrame(columns=['neighborhood', 'coordinates'])

for f in geodata['features']:
    coords = f['geometry']['coordinates']
    if f['geometry']['type'] == 'Polygon':
        coords = [coords]
    
    df = df.append({'neighborhood': f['properties']['Name'],
               'coordinates': coords}, ignore_index=True)

In [101]:
df.head()

Unnamed: 0,neighborhood,coordinates
0,Roslindale,"[[[[-71.12592717485386, 42.272013107957406], [..."
1,Jamaica Plain,"[[[[-71.10499218689807, 42.326101682808066], [..."
2,Mission Hill,"[[[[-71.0904343142608, 42.33576996328494], [-7..."
3,Longwood,"[[[[-71.09810894210769, 42.33673037764089], [-..."
4,Bay Village,"[[[[-71.06662924918761, 42.34878268384542], [-..."


We should also find the geographical centers of the neighborhoods. We will find these by finding the center of the bounding box for the polygon coordinates. We will have to loop through the outer two list layers to get down to the [longitude, latitude] coordinates.

In [107]:
c[0][0][0]

[-71.12592717485386, 42.272013107957406]

In [113]:
df['center'] = ''

for i, c in enumerate(df['coordinates']):
    lng_min, lng_max, lat_min, lat_max = 0, -1000, 1000, 0
    for j in range(len(c)):
        for k in range(len(c[j])):
            for l in range(len(c[j][k])):
                lng, lat = c[j][k][l]
                if lng < lng_min:
                    lng_min = lng
                elif lng > lng_max:
                    lng_max = lng
                if lat < lat_min:
                    lat_min = lat
                elif lat > lat_max:
                    lat_max = lat
    df['center'][i] = np.array([lng_min + (lng_max-lng_min)/2, lat_min + (lat_max-lat_min)/2])

In [114]:
df.head()

Unnamed: 0,neighborhood,coordinates,center
0,Roslindale,"[[[[-71.12592717485386, 42.272013107957406], [...","[-71.12370041552998, 42.283002807183536]"
1,Jamaica Plain,"[[[[-71.10499218689807, 42.326101682808066], [...","[-71.11763985121655, 42.308218916603565]"
2,Mission Hill,"[[[[-71.0904343142608, 42.33576996328494], [-7...","[-71.10223689942548, 42.33189946360892]"
3,Longwood,"[[[[-71.09810894210769, 42.33673037764089], [-...","[-71.10438844396899, 42.33904686848325]"
4,Bay Village,"[[[[-71.06662924918761, 42.34878268384542], [-...","[-71.06963369151094, 42.34935274336108]"


# Exploring restaurants in neighborhoods

We will now use the Foursquare API to explore these neighborhoods. Below is the information needed to make requests to the API.

In [10]:
CLIENT_ID = 'ESQILQUJ45MEFP5CYAUTQAPRNT1YA22FASZDU0VCI5VIE2EP'
CLIENT_SECRET = 'MZIKSDQUL4OHRIPALRKSUQSKUMMVKM2PCGAMGJPWKJ3G0MBZ'
VERSION = '20200415' # Foursquare API version
LIMIT = 100

In [11]:
lat, lng = df['coordinates'][0]
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?\
                    &client_id={}\
                    &client_secret={}\
                    &v={}\
                    &ll={},{}\
                    &radius={}\
                    &limit={}'.format(
                        CLIENT_ID, 
                        CLIENT_SECRET, 
                        VERSION, 
                        lat, 
                        lng, 
                        radius, 
                        LIMIT)

ValueError: too many values to unpack (expected 2)

In [127]:
requests.get(url).json()#["response"]['groups'][0]['items']

{'meta': {'code': 200, 'requestId': '5e978c0402a172001bac8d9e'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Wakefield',
  'headerFullLocation': 'Wakefield, Bronx',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 9,
  'suggestedBounds': {'ne': {'lat': 40.899205181110005,
    'lng': -73.84125857127495},
   'sw': {'lat': 40.89020517211, 'lng': -73.8531424698231}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4c537892fd2ea593cb077a28',
       'name': 'Lollipops Gelato',
       'location': {'address': '4120 Baychester Ave',
        'crossStreet': 'Edenwald & Bussing Ave',
        'lat': 40.894123150205274,
        'lng': -73.84589162362325,
        'labeledLatLn

# Visualizing neighborhoods

Let's visualize the neighborhoods with a map. To find the geographical center, let's find the average latitude/longitude.

In [115]:
# find average latitude/longitude
lng_avg, lat_avg = df['center'].mean()

In [136]:
m = folium.Map(location=[lng_avg, lat_avg], zoom_start=12)

folium.GeoJson(geodata).add_to(m)

# loop over neighborhoods
for i in range(df.shape[0]):
    s = df['neighborhood'][i]
    label = folium.Popup(s)

    print(df['center'][i])
    folium.CircleMarker(
        df['center'][i][::-1],
        radius=4,
        popup=label).add_to(m)  

    
m

[-71.12370042  42.28300281]
[-71.11763985  42.30821892]
[-71.1022369   42.33189946]
[-71.10438844  42.33904687]
[-71.06963369  42.34935274]
[-71.05780046  42.35095551]
[-71.06287765  42.34940914]
[-71.05365978  42.36531226]
[-71.08662537  42.31682119]
[-71.07195335  42.34020802]
[-71.08078381  42.34926115]
[-71.01598897  42.37183231]
[-71.06414684  42.38190985]
[-71.06547457  42.36508809]
[-71.06883559  42.35662434]
[-71.05916881  42.35435669]
[-71.09714245  42.34397147]
[-71.15501232  42.34850652]
[-71.15966034  42.27842522]
[-71.12201083  42.25282226]
[-71.08755835  42.27807833]
[-71.06421207  42.29918337]
[-71.03956093  42.34657798]
[-71.0407409   42.33381424]
[-71.1289327   42.35982217]
[-70.97022699  42.33294972]


In [9]:
map = folium.Map(zoom_start=10)
folium.GeoJson(geodata).add_to(map)
map