# Import Libraries

In [211]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster
import ast

# Import Data

In [212]:
# read in dataset of geocoded addresses with coordinates
data = pd.read_csv('./data/Geocoded_Addresses.csv', usecols=['ADDRESS', 'POINT'])

In [213]:
data.head()

Unnamed: 0,ADDRESS,POINT
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(40.66293035, -73.9617257983564, 0.0)"
1,7114 AVENUE U BROOKLYN NY 11234,"(40.619892, -73.9068502510335, 0.0)"
2,1310 SURF AVENUE BROOKLYN NY 11224,"(40.57530105, -73.9814687031484, 0.0)"
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(40.7257247, -73.9970561, 0.0)"
4,60 WALL STREET MANHATTAN NY 10005,"(40.70617305, -74.0085161961879, 0.0)"


In [214]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 461 entries, 0 to 460
Data columns (total 2 columns):
ADDRESS    461 non-null object
POINT      461 non-null object
dtypes: object(2)
memory usage: 7.3+ KB


In [215]:
# read in data of restaurant names and addresses
restaurants = pd.read_csv('./data/NYC_Restaurant_Inspections_Data_Critical_2019.csv', usecols=['RESTAURANT NAME', 'ADDRESS'])

In [216]:
restaurants.head()

Unnamed: 0,RESTAURANT NAME,ADDRESS
0,WENDY'S,469 FLATBUSH AVENUE BROOKLYN NY 11225
1,WILKEN'S FINE FOOD,7114 AVENUE U BROOKLYN NY 11234
2,NATHAN'S FAMOUS,1310 SURF AVENUE BROOKLYN NY 11224
3,ANGELIKA FILM CENTER,18 WEST HOUSTON STREET MANHATTAN NY 10012
4,HO MEI RESTAURANT,10305 37 AVENUE QUEENS NY 11368


In [217]:
restaurants.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9620 entries, 0 to 9619
Data columns (total 2 columns):
RESTAURANT NAME    9620 non-null object
ADDRESS            9620 non-null object
dtypes: object(2)
memory usage: 150.4+ KB


# Data Wrangling

In [218]:
# check an individual value in POINT column to see how it is formatted - notice it is a string
data['POINT'][0]

'(40.66293035, -73.9617257983564, 0.0)'

In [219]:
# de-string the values in the POINT column
data['POINT'] = data['POINT'].apply(lambda x: ast.literal_eval(x))

In [220]:
# check the format of the individual value again
data['POINT'][0]

(40.66293035, -73.9617257983564, 0.0)

In [221]:
# split the POINT column into 3 separate columns for each coordinate
data[['X', 'Y', 'Z']] = pd.DataFrame(data['POINT'].tolist(), index=data.index)

In [222]:
data.head()

Unnamed: 0,ADDRESS,POINT,X,Y,Z
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(40.66293035, -73.9617257983564, 0.0)",40.66293,-73.961726,0.0
1,7114 AVENUE U BROOKLYN NY 11234,"(40.619892, -73.9068502510335, 0.0)",40.619892,-73.90685,0.0
2,1310 SURF AVENUE BROOKLYN NY 11224,"(40.57530105, -73.9814687031484, 0.0)",40.575301,-73.981469,0.0
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(40.7257247, -73.9970561, 0.0)",40.725725,-73.997056,0.0
4,60 WALL STREET MANHATTAN NY 10005,"(40.70617305, -74.0085161961879, 0.0)",40.706173,-74.008516,0.0


In [223]:
# look at the value counts of column Z. 
data.Z.value_counts()

0.0    461
Name: Z, dtype: int64

In [224]:
# drop column Z since they are all 0s here
data.drop('Z', axis=1, inplace=True)

In [225]:
data.head()

Unnamed: 0,ADDRESS,POINT,X,Y
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(40.66293035, -73.9617257983564, 0.0)",40.66293,-73.961726
1,7114 AVENUE U BROOKLYN NY 11234,"(40.619892, -73.9068502510335, 0.0)",40.619892,-73.90685
2,1310 SURF AVENUE BROOKLYN NY 11224,"(40.57530105, -73.9814687031484, 0.0)",40.575301,-73.981469
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(40.7257247, -73.9970561, 0.0)",40.725725,-73.997056
4,60 WALL STREET MANHATTAN NY 10005,"(40.70617305, -74.0085161961879, 0.0)",40.706173,-74.008516


In [226]:
# merge restaurant dataset to geocoded data so that final dataset has restuarant names with coordinates and addresses
locations = data.merge(restaurants, on='ADDRESS', how='left')

In [227]:
locations.shape

(486, 5)

In [228]:
locations.head()

Unnamed: 0,ADDRESS,POINT,X,Y,RESTAURANT NAME
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(40.66293035, -73.9617257983564, 0.0)",40.66293,-73.961726,WENDY'S
1,7114 AVENUE U BROOKLYN NY 11234,"(40.619892, -73.9068502510335, 0.0)",40.619892,-73.90685,WILKEN'S FINE FOOD
2,1310 SURF AVENUE BROOKLYN NY 11224,"(40.57530105, -73.9814687031484, 0.0)",40.575301,-73.981469,NATHAN'S FAMOUS
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(40.7257247, -73.9970561, 0.0)",40.725725,-73.997056,ANGELIKA FILM CENTER
4,60 WALL STREET MANHATTAN NY 10005,"(40.70617305, -74.0085161961879, 0.0)",40.706173,-74.008516,THE COUNTRY CAFE


In [229]:
locations.tail()

Unnamed: 0,ADDRESS,POINT,X,Y,RESTAURANT NAME
481,160 BROADWAY MANHATTAN NY 10038,"(40.70937965, -74.0100296338982, 0.0)",40.70938,-74.01003,MILK N' HONEY
482,917 MANOR ROAD STATEN ISLAND NY 10314,"(40.60439225, -74.1205629156551, 0.0)",40.604392,-74.120563,MANOR HOUSE RESTAURANT
483,129 GATES AVENUE BROOKLYN NY 11238,"(40.6849008, -73.9629318, 0.0)",40.684901,-73.962932,(LEWIS DRUG STORE) LOCANDA VINI E OLII
484,132 CROSBY STREET MANHATTAN NY 10012,"(40.7247777, -73.996323, 0.0)",40.724778,-73.996323,LAHORE DELICATESSEN
485,802-804 KINGS HIGHWAY BROOKLYN NY 11223,"(40.6064537755102, -73.9644138979592, 0.0)",40.606454,-73.964414,CAFE RENAISSANCE


In [230]:
# check for null values
locations.isnull().sum()

ADDRESS            0
POINT              0
X                  0
Y                  0
RESTAURANT NAME    0
dtype: int64

# Visualization

### Individual Points

In [239]:
# create a map with coordinates for NYC
m = folium.Map(
                        location = [40.7230018,-73.9911482], 
                        tiles = 'Stamen Toner',
                        zoom_start=11)


In [240]:
# add all coordinates to map
locations.apply(lambda row: folium.CircleMarker(
                                location=[row['X'], row['Y']], 
                                radius=10, 
                                popup=row['RESTAURANT NAME'], 
                                color='red', 
                                fill=True, 
                                fill_color='red').add_to(m), 
                        axis=1)

0      <folium.vector_layers.CircleMarker object at 0...
1      <folium.vector_layers.CircleMarker object at 0...
2      <folium.vector_layers.CircleMarker object at 0...
3      <folium.vector_layers.CircleMarker object at 0...
4      <folium.vector_layers.CircleMarker object at 0...
5      <folium.vector_layers.CircleMarker object at 0...
6      <folium.vector_layers.CircleMarker object at 0...
7      <folium.vector_layers.CircleMarker object at 0...
8      <folium.vector_layers.CircleMarker object at 0...
9      <folium.vector_layers.CircleMarker object at 0...
10     <folium.vector_layers.CircleMarker object at 0...
11     <folium.vector_layers.CircleMarker object at 0...
12     <folium.vector_layers.CircleMarker object at 0...
13     <folium.vector_layers.CircleMarker object at 0...
14     <folium.vector_layers.CircleMarker object at 0...
15     <folium.vector_layers.CircleMarker object at 0...
16     <folium.vector_layers.CircleMarker object at 0...
17     <folium.vector_layers.Ci

In [241]:
m

In [238]:
# save map to file
# m.save('./images/inspections_map_individual.html')
# m.save('./images/inspections_map_individual.png')

### Clustered Points

In [242]:
# create a map with coordinates for NYC
mp = folium.Map(
                        location = [40.7230018,-73.9911482],
                        zoom_start=11)

# customize basemap to be CartoDB Dark Matter
folium.TileLayer('cartodbdark_matter').add_to(mp)

<folium.raster_layers.TileLayer at 0x824f4c320>

In [243]:
# create cluster and add to map
cluster = MarkerCluster().add_to(mp)

In [244]:
# add all coordinates to cluster
locations.apply(lambda row: folium.Marker(
                                location=[row['X'], row['Y']],
                                popup = "{}, \n {}".format(row['RESTAURANT NAME'], row['ADDRESS']),
                                color = 'red',
                                fill_color = 'red').add_to(cluster), axis=1)

0      <folium.map.Marker object at 0x824f53080>
1      <folium.map.Marker object at 0x824f532b0>
2      <folium.map.Marker object at 0x824ed3e10>
3      <folium.map.Marker object at 0x824ed3c50>
4      <folium.map.Marker object at 0x824a04b00>
5      <folium.map.Marker object at 0x824a040f0>
6      <folium.map.Marker object at 0x824f534a8>
7      <folium.map.Marker object at 0x824f535f8>
8      <folium.map.Marker object at 0x824f53748>
9      <folium.map.Marker object at 0x824f53898>
10     <folium.map.Marker object at 0x824f539e8>
11     <folium.map.Marker object at 0x824f53b38>
12     <folium.map.Marker object at 0x824f53c88>
13     <folium.map.Marker object at 0x824f53dd8>
14     <folium.map.Marker object at 0x824f53f28>
15     <folium.map.Marker object at 0x824f53f60>
16     <folium.map.Marker object at 0x824f6b208>
17     <folium.map.Marker object at 0x824f6b358>
18     <folium.map.Marker object at 0x824f6b4a8>
19     <folium.map.Marker object at 0x824f6b5f8>
20     <folium.map.M

In [245]:
mp

In [237]:
# save map to file
# mp.save('./images/inspections_map_clustered.html')
# mp.save('./images/inspections_map_clustered.png')