# Import Libraries

In [40]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

# Import Data

In [41]:
# read in data into pandas dataframe
data = pd.read_csv('./data/NYC_Restaurant_Inspections_Data_Critical_2019.csv', usecols=['ID', 'RESTAURANT NAME', 'ADDRESS'])

In [42]:
data.head()

Unnamed: 0,ID,RESTAURANT NAME,ADDRESS
0,30112340,WENDY'S,469 FLATBUSH AVENUE BROOKLYN NY 11225
1,40356483,WILKEN'S FINE FOOD,7114 AVENUE U BROOKLYN NY 11234
2,40359705,NATHAN'S FAMOUS,1310 SURF AVENUE BROOKLYN NY 11224
3,40362274,ANGELIKA FILM CENTER,18 WEST HOUSTON STREET MANHATTAN NY 10012
4,40362432,HO MEI RESTAURANT,10305 37 AVENUE QUEENS NY 11368


In [43]:
# create a new dataframe with a list of unique addresses
addresses = pd.DataFrame(data.ADDRESS.unique().tolist(), columns=['ADDRESS']).copy()

In [44]:
addresses.head()

Unnamed: 0,ADDRESS
0,469 FLATBUSH AVENUE BROOKLYN NY 11225
1,7114 AVENUE U BROOKLYN NY 11234
2,1310 SURF AVENUE BROOKLYN NY 11224
3,18 WEST HOUSTON STREET MANHATTAN NY 10012
4,10305 37 AVENUE QUEENS NY 11368


In [45]:
addresses.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9209 entries, 0 to 9208
Data columns (total 1 columns):
ADDRESS    9209 non-null object
dtypes: object(1)
memory usage: 72.0+ KB


# Subset Data & Geocoding

In [65]:
# create a subset of data to run through geocoder
subset = addresses[:100].reset_index().copy()

In [66]:
# create an instance of the geocoder
geolocator = Nominatim(user_agent="inspections_locations")

In [67]:
# create an instance or geocoder object with rate limit implemented
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1, max_retries=0)

In [68]:
# create a new column in dataframe for storing the location details from geocoding
subset['LOCATION'] = subset['ADDRESS'].apply(geocode)

In [69]:
subset.head()

Unnamed: 0,index,ADDRESS,LOCATION
0,0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(469, Flatbush Avenue, Flatbush, BK, Kings Cou..."
1,1,7114 AVENUE U BROOKLYN NY 11234,"(Wilkens Fine Foods, 7114, Avenue U, Bergen Be..."
2,2,1310 SURF AVENUE BROOKLYN NY 11224,"(Nathan's Famous, 1310, Surf Avenue, West Brig..."
3,3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(18, West Houston Street, NoHo, Manhattan, Man..."
4,4,10305 37 AVENUE QUEENS NY 11368,


In [70]:
subset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
index       100 non-null int64
ADDRESS     100 non-null object
LOCATION    69 non-null object
dtypes: int64(1), object(2)
memory usage: 2.4+ KB


In [71]:
subset.tail()

Unnamed: 0,index,ADDRESS,LOCATION
95,95,7269 KISSENA BOULEVARD QUEENS NY 11367,"(Kissena Boulevard, Queens, Queens County, NYC..."
96,96,9402 SUTPHIN BOULEVARD QUEENS NY 11435,"(Sutphin Boulevard, Jamaica, Queens County, NY..."
97,97,8207 153 AVENUE QUEENS NY 11414,
98,98,9615 LIBERTY AVENUE QUEENS NY 11417,"(Liberty Avenue, Jamaica, Queens County, NYC, ..."
99,99,7720 18 AVENUE BROOKLYN NY 11214,


In [72]:
# create a new column called POINT and fill column with coordinates as tuple pulled from the Location column
subset['POINT'] = subset['LOCATION'].apply(lambda LOC: tuple(LOC.point) if LOC else None)

In [73]:
subset.head()

Unnamed: 0,index,ADDRESS,LOCATION,POINT
0,0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(469, Flatbush Avenue, Flatbush, BK, Kings Cou...","(40.66293035, -73.9617257983564, 0.0)"
1,1,7114 AVENUE U BROOKLYN NY 11234,"(Wilkens Fine Foods, 7114, Avenue U, Bergen Be...","(40.619892, -73.9068502510335, 0.0)"
2,2,1310 SURF AVENUE BROOKLYN NY 11224,"(Nathan's Famous, 1310, Surf Avenue, West Brig...","(40.57530105, -73.9814687031484, 0.0)"
3,3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(18, West Houston Street, NoHo, Manhattan, Man...","(40.7257247, -73.9970561, 0.0)"
4,4,10305 37 AVENUE QUEENS NY 11368,,


In [74]:
# create second subset of data from addresses dataframe of 100 addresses
subset2 = addresses[100:200].reset_index().copy()

In [75]:
subset2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
index      100 non-null int64
ADDRESS    100 non-null object
dtypes: int64(1), object(1)
memory usage: 1.6+ KB


In [76]:
subset2.head()

Unnamed: 0,index,ADDRESS
0,100,10102 103 AVENUE QUEENS NY 11417
1,101,500 FOREST AVENUE STATEN ISLAND NY 10310
2,102,500 EAST 30 STREET MANHATTAN NY 10016
3,103,345 COURT STREET BROOKLYN NY 11231
4,104,906 7 AVENUE MANHATTAN NY 10019


In [77]:
# create a new column in dataframe for storing the location details from geocoding
subset2['LOCATION'] = subset2['ADDRESS'].apply(geocode)

In [78]:
subset2.head()

Unnamed: 0,index,ADDRESS,LOCATION
0,100,10102 103 AVENUE QUEENS NY 11417,
1,101,500 FOREST AVENUE STATEN ISLAND NY 10310,"(500, Forest Avenue, West New Brighton, Todt H..."
2,102,500 EAST 30 STREET MANHATTAN NY 10016,
3,103,345 COURT STREET BROOKLYN NY 11231,"(Marco Polo Ristorante, 345, Court Street, Car..."
4,104,906 7 AVENUE MANHATTAN NY 10019,


In [80]:
# create a new column called POINT and fill column with coordinates as tuple pulled from the Location column
subset2['POINT'] = subset2['LOCATION'].apply(lambda LOC: tuple(LOC.point) if LOC else None)

In [81]:
subset2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
index       100 non-null int64
ADDRESS     100 non-null object
LOCATION    65 non-null object
POINT       65 non-null object
dtypes: int64(1), object(3)
memory usage: 3.2+ KB


In [84]:
# create third subset of data from addresses dataframe of 100 addresses
subset3 = addresses[200:300].reset_index().copy()

In [85]:
subset3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
index      100 non-null int64
ADDRESS    100 non-null object
dtypes: int64(1), object(1)
memory usage: 1.6+ KB


In [86]:
# create a new column in dataframe for storing the location details from geocoding
subset3['LOCATION'] = subset3['ADDRESS'].apply(geocode)

RateLimiter swallowed an error after 0 retries. Called with (*('1660 RICHMOND AVENUE STATEN ISLAND NY 10314',), **{}).
Traceback (most recent call last):
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/urllib/request.py", line 1317, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1229, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1275, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1224, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1016, in _send_output
    self.send(msg)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/

RateLimiter swallowed an error after 0 retries. Called with (*('3519 37 AVENUE QUEENS NY 11101',), **{}).
Traceback (most recent call last):
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/urllib/request.py", line 1317, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1229, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1275, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1224, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1016, in _send_output
    self.send(msg)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/htt

RateLimiter swallowed an error after 0 retries. Called with (*('90 BEDFORD STREET MANHATTAN NY 10014',), **{}).
Traceback (most recent call last):
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/urllib/request.py", line 1317, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1229, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1275, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1224, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1016, in _send_output
    self.send(msg)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3

In [142]:
# create a new column called POINT and fill column with coordinates as tuple pulled from the Location column
subset3['POINT'] = subset3['LOCATION'].apply(lambda LOC: tuple(LOC.point) if LOC else None)

In [87]:
# create fourth subset of data from addresses dataframe of 100 addresses
subset4 = addresses[300:400].reset_index().copy()

In [88]:
subset4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
index      100 non-null int64
ADDRESS    100 non-null object
dtypes: int64(1), object(1)
memory usage: 1.6+ KB


In [89]:
# create a new column in dataframe for storing the location details from geocoding
subset4['LOCATION'] = subset4['ADDRESS'].apply(geocode)

RateLimiter swallowed an error after 0 retries. Called with (*('1650 BROADWAY MANHATTAN NY 10019',), **{}).
Traceback (most recent call last):
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/urllib/request.py", line 1317, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1229, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1275, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1224, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1016, in _send_output
    self.send(msg)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/h

RateLimiter swallowed an error after 0 retries. Called with (*('351 EAST  103 STREET MANHATTAN NY 10029',), **{}).
Traceback (most recent call last):
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/urllib/request.py", line 1317, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1229, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1275, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1224, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1016, in _send_output
    self.send(msg)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/pyth

KeyboardInterrupt: 

In [90]:
subset4.head()

Unnamed: 0,index,ADDRESS
0,300,1650 BROADWAY MANHATTAN NY 10019
1,301,47 BOND STREET MANHATTAN NY 10012
2,302,209 EAST 5 STREET MANHATTAN NY 10003
3,303,351 EAST 103 STREET MANHATTAN NY 10029
4,304,40 EXCHANGE PLACE MANHATTAN NY 10005


In [91]:
# create fifth subset of data from addresses dataframe of 100 addresses
subset5 = addresses[400:500].reset_index().copy()

In [92]:
# create a new column in dataframe for storing the location details from geocoding
subset5['LOCATION'] = subset5['ADDRESS'].apply(geocode)

RateLimiter swallowed an error after 0 retries. Called with (*('296 3 AVENUE MANHATTAN NY 10010',), **{}).
Traceback (most recent call last):
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/urllib/request.py", line 1317, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1229, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1275, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1224, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1016, in _send_output
    self.send(msg)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/ht

In [94]:
subset5.head()

Unnamed: 0,index,ADDRESS,LOCATION
0,400,2845 RICHMOND AVENUE STATEN ISLAND NY 10314,"(2845, Richmond Avenue, Greenridge, Staten Isl..."
1,401,229 EAST 84 STREET MANHATTAN NY 10028,
2,402,8905 NORTHERN BOULEVARD QUEENS NY 11372,"(Northern Boulevard, Jackson Heights, Queens C..."
3,403,171 7 AVENUE BROOKLYN NY 11215,"(171, Avenue Y, Gravesend, Kings County, NYC, ..."
4,404,29 ST MARKS PLACE MANHATTAN NY 10003,"(29, Saint Mark's Place, East Village, Manhatt..."


In [100]:
# create a new column called POINT and fill column with coordinates as tuple pulled from the Location column
subset5['POINT'] = subset5['LOCATION'].apply(lambda LOC: tuple(LOC.point) if LOC else None)

In [101]:
subset5.head()

Unnamed: 0,index,ADDRESS,LOCATION,POINT
0,400,2845 RICHMOND AVENUE STATEN ISLAND NY 10314,"(2845, Richmond Avenue, Greenridge, Staten Isl...","(40.57588825, -74.1680247649206, 0.0)"
1,401,229 EAST 84 STREET MANHATTAN NY 10028,,
2,402,8905 NORTHERN BOULEVARD QUEENS NY 11372,"(Northern Boulevard, Jackson Heights, Queens C...","(40.755392, -73.887523, 0.0)"
3,403,171 7 AVENUE BROOKLYN NY 11215,"(171, Avenue Y, Gravesend, Kings County, NYC, ...","(40.5879432265193, -73.9734698839779, 0.0)"
4,404,29 ST MARKS PLACE MANHATTAN NY 10003,"(29, Saint Mark's Place, East Village, Manhatt...","(40.72904305, -73.988314799689, 0.0)"


In [95]:
# create a sixth subset of 100 datapoints from addresses dataframe
subset6 = addresses[500:600].reset_index().copy()

In [96]:
# create a new column in dataframe for storing the location details from geocoding
subset6['LOCATION'] = subset6['ADDRESS'].apply(geocode)

RateLimiter swallowed an error after 0 retries. Called with (*('10016 ROCKAWAY BEACH BOULEVARD QUEENS NY 11694',), **{}).
Traceback (most recent call last):
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/urllib/request.py", line 1317, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1229, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1275, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1224, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1016, in _send_output
    self.send(msg)
  File "/Users/A/anaconda3/envs/inspections_blog/l

In [97]:
subset6.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
index       100 non-null int64
ADDRESS     100 non-null object
LOCATION    65 non-null object
dtypes: int64(1), object(2)
memory usage: 2.4+ KB


In [98]:
subset6.head()

Unnamed: 0,index,ADDRESS,LOCATION
0,500,12 EAST 37 STREET MANHATTAN NY 10016,
1,501,153 EAST 53 STREET MANHATTAN NY 10022,
2,502,50 COURT STREET BROOKLYN NY 11201,"(50, Court Street, Cobble Hill, Kings County, ..."
3,503,15 WEST 46 STREET MANHATTAN NY 10036,
4,504,2158 8 AVENUE MANHATTAN NY 10026,"(Manhattan, New York County, NYC, New York, US..."


In [102]:
# create a new column called POINT and fill column with coordinates as tuple pulled from the Location column
subset6['POINT'] = subset6['LOCATION'].apply(lambda LOC: tuple(LOC.point) if LOC else None)

In [103]:
subset6.head()

Unnamed: 0,index,ADDRESS,LOCATION,POINT
0,500,12 EAST 37 STREET MANHATTAN NY 10016,,
1,501,153 EAST 53 STREET MANHATTAN NY 10022,,
2,502,50 COURT STREET BROOKLYN NY 11201,"(50, Court Street, Cobble Hill, Kings County, ...","(40.6923739, -73.9913434338016, 0.0)"
3,503,15 WEST 46 STREET MANHATTAN NY 10036,,
4,504,2158 8 AVENUE MANHATTAN NY 10026,"(Manhattan, New York County, NYC, New York, US...","(40.7900869, -73.9598295, 0.0)"


In [155]:
# create a seventh subset of 100 datapoints from addresses dataframe
subset7 = addresses[600:700].reset_index().copy()

In [156]:
# create a new column in dataframe for storing the location details from geocoding
subset7['LOCATION'] = subset7['ADDRESS'].apply(geocode)

RateLimiter swallowed an error after 0 retries. Called with (*('26421 UNION TURNPIKE QUEENS NY 11004',), **{}).
Traceback (most recent call last):
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/urllib/request.py", line 1317, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1229, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1275, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1224, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1016, in _send_output
    self.send(msg)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3

In [157]:
subset7.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
index       100 non-null int64
ADDRESS     100 non-null object
LOCATION    64 non-null object
dtypes: int64(1), object(2)
memory usage: 2.4+ KB


In [158]:
subset7.head()

Unnamed: 0,index,ADDRESS,LOCATION
0,600,1359 1 AVENUE MANHATTAN NY 10021,
1,601,14 1 AVENUE MANHATTAN NY 10009,
2,602,407 51 STREET BROOKLYN NY 11220,
3,603,31 GREAT JONES ST MANHATTAN NY 10012,"(31, Great Jones Street, NoHo Historic Distric..."
4,604,666 5 AVENUE MANHATTAN NY 10103,


In [159]:
# create a new column called POINT and fill column with coordinates as tuple pulled from the Location column
subset7['POINT'] = subset7['LOCATION'].apply(lambda LOC: tuple(LOC.point) if LOC else None)

In [160]:
subset7.head()

Unnamed: 0,index,ADDRESS,LOCATION,POINT
0,600,1359 1 AVENUE MANHATTAN NY 10021,,
1,601,14 1 AVENUE MANHATTAN NY 10009,,
2,602,407 51 STREET BROOKLYN NY 11220,,
3,603,31 GREAT JONES ST MANHATTAN NY 10012,"(31, Great Jones Street, NoHo Historic Distric...","(40.72682565, -73.9932827929892, 0.0)"
4,604,666 5 AVENUE MANHATTAN NY 10103,,


In [161]:
# create a seventh subset of 100 datapoints from addresses dataframe
subset8 = addresses[700:800].reset_index().copy()

In [162]:
# create a new column in dataframe for storing the location details from geocoding
subset8['LOCATION'] = subset8['ADDRESS'].apply(geocode)

RateLimiter swallowed an error after 0 retries. Called with (*('485 AMSTERDAM AVENUE MANHATTAN NY 10024',), **{}).
Traceback (most recent call last):
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/urllib/request.py", line 1317, in do_open
    encode_chunked=req.has_header('Transfer-encoding'))
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1229, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1275, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1224, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/python3.7/http/client.py", line 1016, in _send_output
    self.send(msg)
  File "/Users/A/anaconda3/envs/inspections_blog/lib/pyth

In [163]:
subset8.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
index       100 non-null int64
ADDRESS     100 non-null object
LOCATION    72 non-null object
dtypes: int64(1), object(2)
memory usage: 2.4+ KB


In [164]:
subset8.head()

Unnamed: 0,index,ADDRESS,LOCATION
0,700,4101 4 AVENUE BROOKLYN NY 11232,
1,701,2025 CHURCH AVENUE BROOKLYN NY 11226,"(2025, Church Avenue, Flatbush, BK, Kings Coun..."
2,702,175 SECOND AVENUE MANHATTAN NY 10003,
3,703,4220 WHITE PLAINS ROAD BRONX NY 10466,"(4220, White Plains Road, Wakefield, The Bronx..."
4,704,485 AMSTERDAM AVENUE MANHATTAN NY 10024,


In [165]:
# create a new column called POINT and fill column with coordinates as tuple pulled from the Location column
subset8['POINT'] = subset8['LOCATION'].apply(lambda LOC: tuple(LOC.point) if LOC else None)

In [166]:
subset8.head()

Unnamed: 0,index,ADDRESS,LOCATION,POINT
0,700,4101 4 AVENUE BROOKLYN NY 11232,,
1,701,2025 CHURCH AVENUE BROOKLYN NY 11226,"(2025, Church Avenue, Flatbush, BK, Kings Coun...","(40.650301, -73.9601161, 0.0)"
2,702,175 SECOND AVENUE MANHATTAN NY 10003,,
3,703,4220 WHITE PLAINS ROAD BRONX NY 10466,"(4220, White Plains Road, Wakefield, The Bronx...","(40.8939948, -73.8568371, 0.0)"
4,704,485 AMSTERDAM AVENUE MANHATTAN NY 10024,,


# Concatenation of All Subsets

In [112]:
# concat first two subsets into one table
subsets12 = pd.concat([subset, subset2], axis=0).reset_index().drop(['level_0', 'index'], axis=1)

In [113]:
subsets12.head()

Unnamed: 0,ADDRESS,LOCATION,POINT
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(469, Flatbush Avenue, Flatbush, BK, Kings Cou...","(40.66293035, -73.9617257983564, 0.0)"
1,7114 AVENUE U BROOKLYN NY 11234,"(Wilkens Fine Foods, 7114, Avenue U, Bergen Be...","(40.619892, -73.9068502510335, 0.0)"
2,1310 SURF AVENUE BROOKLYN NY 11224,"(Nathan's Famous, 1310, Surf Avenue, West Brig...","(40.57530105, -73.9814687031484, 0.0)"
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(18, West Houston Street, NoHo, Manhattan, Man...","(40.7257247, -73.9970561, 0.0)"
4,10305 37 AVENUE QUEENS NY 11368,,


In [114]:
subsets12.tail()

Unnamed: 0,ADDRESS,LOCATION,POINT
195,4018 MAIN STREET QUEENS NY 11354,"(Main Street, Linden Hill, Queens, Queens Coun...","(40.7629068, -73.8319907, 0.0)"
196,537 PARK AVENUE BROOKLYN NY 11205,"(537, Park Avenue, Marcy Houses, BK, Kings Cou...","(40.6970881, -73.9548986, 0.0)"
197,1919 HYLAN BOULEVARD STATEN ISLAND NY 10305,"(1919, Hylan Boulevard, Dongan Hills, Todt Hil...","(40.5828716, -74.0967526, 0.0)"
198,2788 BROADWAY MANHATTAN NY 10025,"(Absolute Bagels, 2788, Broadway, Manhattan Va...","(40.8024597, -73.9673698441467, 0.0)"
199,7002 34 AVENUE QUEENS NY 11372,,


In [117]:
# check shape - there should be 200 datapoints after concatenation
subsets12.shape

(200, 3)

In [144]:
# concat first concatenated dataframe with third subset of data
subsets123 = pd.concat([subsets12, subset3], axis=0, sort=False).reset_index().drop(['index', 'level_0'], axis=1)

In [145]:
# check shape - should be 300 data points after 2nd concatenation
subsets123.shape

(300, 3)

In [146]:
subsets123.head()

Unnamed: 0,ADDRESS,LOCATION,POINT
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(469, Flatbush Avenue, Flatbush, BK, Kings Cou...","(40.66293035, -73.9617257983564, 0.0)"
1,7114 AVENUE U BROOKLYN NY 11234,"(Wilkens Fine Foods, 7114, Avenue U, Bergen Be...","(40.619892, -73.9068502510335, 0.0)"
2,1310 SURF AVENUE BROOKLYN NY 11224,"(Nathan's Famous, 1310, Surf Avenue, West Brig...","(40.57530105, -73.9814687031484, 0.0)"
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(18, West Houston Street, NoHo, Manhattan, Man...","(40.7257247, -73.9970561, 0.0)"
4,10305 37 AVENUE QUEENS NY 11368,,


In [147]:
subsets123.tail()

Unnamed: 0,ADDRESS,LOCATION,POINT
295,3519 37 AVENUE QUEENS NY 11101,,
296,1866 RALPH AVENUE BROOKLYN NY 11236,,
297,289 MANHATTAN AVENUE BROOKLYN NY 11211,,
298,90 BEDFORD STREET MANHATTAN NY 10014,,
299,236 EAST 53 STREET MANHATTAN NY 10022,,


In [148]:
# concat 2nd concatenated dataframe with fifth subset of data (skipped fourth subset because there is no data obtained through geocoding)
subsets1235 = pd.concat([subsets123, subset5], axis=0, sort=False).reset_index().drop(['index', 'level_0'], axis=1)

In [149]:
subsets1235.head()

Unnamed: 0,ADDRESS,LOCATION,POINT
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(469, Flatbush Avenue, Flatbush, BK, Kings Cou...","(40.66293035, -73.9617257983564, 0.0)"
1,7114 AVENUE U BROOKLYN NY 11234,"(Wilkens Fine Foods, 7114, Avenue U, Bergen Be...","(40.619892, -73.9068502510335, 0.0)"
2,1310 SURF AVENUE BROOKLYN NY 11224,"(Nathan's Famous, 1310, Surf Avenue, West Brig...","(40.57530105, -73.9814687031484, 0.0)"
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(18, West Houston Street, NoHo, Manhattan, Man...","(40.7257247, -73.9970561, 0.0)"
4,10305 37 AVENUE QUEENS NY 11368,,


In [150]:
subsets1235.tail()

Unnamed: 0,ADDRESS,LOCATION,POINT
395,1295 MADISON AVENUE MANHATTAN NY 10128,"(1295, Madison Avenue, Upper East Side, Carneg...","(40.7848028, -73.955747, 0.0)"
396,337 EAST 49 STREET MANHATTAN NY 10017,,
397,1035 EAST 163 STREET BRONX NY 10459,,
398,296 3 AVENUE MANHATTAN NY 10010,,
399,260 WEST 44 STREET MANHATTAN NY 10036,,


In [167]:
# concat 3rd concatenated dataframe with sixth subset of data
subsets12356 = pd.concat([subsets1235, subset6], axis=0, sort=False).reset_index().drop(['index', 'level_0'], axis=1)

In [169]:
subsets12356.head()

Unnamed: 0,ADDRESS,LOCATION,POINT
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(469, Flatbush Avenue, Flatbush, BK, Kings Cou...","(40.66293035, -73.9617257983564, 0.0)"
1,7114 AVENUE U BROOKLYN NY 11234,"(Wilkens Fine Foods, 7114, Avenue U, Bergen Be...","(40.619892, -73.9068502510335, 0.0)"
2,1310 SURF AVENUE BROOKLYN NY 11224,"(Nathan's Famous, 1310, Surf Avenue, West Brig...","(40.57530105, -73.9814687031484, 0.0)"
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(18, West Houston Street, NoHo, Manhattan, Man...","(40.7257247, -73.9970561, 0.0)"
4,10305 37 AVENUE QUEENS NY 11368,,


In [170]:
subsets12356.tail()

Unnamed: 0,ADDRESS,LOCATION,POINT
495,375 WEST 125 STREET MANHATTAN NY 10027,,
496,72 BEDFORD STREET MANHATTAN NY 10014,"(72, Bedford Street, West Village, Manhattan, ...","(40.7314644, -74.0048032, 0.0)"
497,1664 RICHMOND ROAD STATEN ISLAND NY 10304,"(1664, Richmond Road, Dongan Hills, Staten Isl...","(40.5893815, -74.1012747, 0.0)"
498,109 MACDOUGAL STREET MANHATTAN NY 10012,"(109, MacDougal Street, University Village, Gr...","(40.7298167272727, -74.0007718181818, 0.0)"
499,107 UNIVERSITY PLACE MANHATTAN NY 10003,"(107, University Place, Washington Square Vill...","(40.7342068, -73.9923691, 0.0)"


In [171]:
# check null values
subsets12356.isnull().sum()

ADDRESS       0
LOCATION    175
POINT       175
dtype: int64

In [172]:
# concat 4th concatenated dataframe with seventh subset of data
subsets123567 = pd.concat([subsets12356, subset7], axis=0, sort=False).reset_index().drop(['index', 'level_0'], axis=1)

In [173]:
subsets123567.head()

Unnamed: 0,ADDRESS,LOCATION,POINT
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(469, Flatbush Avenue, Flatbush, BK, Kings Cou...","(40.66293035, -73.9617257983564, 0.0)"
1,7114 AVENUE U BROOKLYN NY 11234,"(Wilkens Fine Foods, 7114, Avenue U, Bergen Be...","(40.619892, -73.9068502510335, 0.0)"
2,1310 SURF AVENUE BROOKLYN NY 11224,"(Nathan's Famous, 1310, Surf Avenue, West Brig...","(40.57530105, -73.9814687031484, 0.0)"
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(18, West Houston Street, NoHo, Manhattan, Man...","(40.7257247, -73.9970561, 0.0)"
4,10305 37 AVENUE QUEENS NY 11368,,


In [174]:
subsets123567.tail()

Unnamed: 0,ADDRESS,LOCATION,POINT
595,1305 86 STREET BROOKLYN NY 11228,"(86 St, Gravesend, Kings County, NYC, New York...","(40.5906494, -73.975127, 0.0)"
596,556 DRIGGS AVENUE BROOKLYN NY 11211,"(556, Driggs Avenue, Greenpoint, Kings County,...","(40.7170007, -73.9566278, 0.0)"
597,709 LORIMER STREET BROOKLYN NY 11211,"(709, Lorimer Street, Greenpoint, Kings County...","(40.71809255, -73.9502882816778, 0.0)"
598,200 MADISON AVENUE MANHATTAN NY 10016,"(200 Madison Avenue, 200, Madison Avenue, Midt...","(40.7488852, -73.9827039003413, 0.0)"
599,1236 2 AVENUE MANHATTAN NY 10065,,


In [175]:
# concat last subset of data with 5th concatenated dataframe
all_subsets = pd.concat([subsets123567, subset8], axis=0, sort=False).reset_index().drop(['index', 'level_0'], axis=1)

In [176]:
all_subsets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 3 columns):
ADDRESS     700 non-null object
LOCATION    461 non-null object
POINT       461 non-null object
dtypes: object(3)
memory usage: 16.5+ KB


In [177]:
all_subsets.head()

Unnamed: 0,ADDRESS,LOCATION,POINT
0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(469, Flatbush Avenue, Flatbush, BK, Kings Cou...","(40.66293035, -73.9617257983564, 0.0)"
1,7114 AVENUE U BROOKLYN NY 11234,"(Wilkens Fine Foods, 7114, Avenue U, Bergen Be...","(40.619892, -73.9068502510335, 0.0)"
2,1310 SURF AVENUE BROOKLYN NY 11224,"(Nathan's Famous, 1310, Surf Avenue, West Brig...","(40.57530105, -73.9814687031484, 0.0)"
3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(18, West Houston Street, NoHo, Manhattan, Man...","(40.7257247, -73.9970561, 0.0)"
4,10305 37 AVENUE QUEENS NY 11368,,


In [178]:
all_subsets.tail()

Unnamed: 0,ADDRESS,LOCATION,POINT
695,160 BROADWAY MANHATTAN NY 10038,"(160, Broadway, Financial District, Manhattan,...","(40.70937965, -74.0100296338982, 0.0)"
696,917 MANOR ROAD STATEN ISLAND NY 10314,"(917, Manor Road, Castleton Corners, Staten Is...","(40.60439225, -74.1205629156551, 0.0)"
697,129 GATES AVENUE BROOKLYN NY 11238,"(129, Gates Avenue, Clinton Hill, BK, Kings Co...","(40.6849008, -73.9629318, 0.0)"
698,132 CROSBY STREET MANHATTAN NY 10012,"(132, Crosby Street, Crosby St 39-73, SoHo, Ma...","(40.7247777, -73.996323, 0.0)"
699,802-804 KINGS HIGHWAY BROOKLYN NY 11223,"(802, Kings Highway, Gravesend, BK, Kings Coun...","(40.6064537755102, -73.9644138979592, 0.0)"


In [179]:
# check null values
all_subsets.isnull().sum()

ADDRESS       0
LOCATION    239
POINT       239
dtype: int64

In [183]:
# drop rows with null values in Location and Point columns
all_subsets = all_subsets.dropna(subset=['LOCATION', 'POINT']).reset_index()

In [184]:
all_subsets.isnull().sum()

index       0
ADDRESS     0
LOCATION    0
POINT       0
dtype: int64

In [185]:
all_subsets.head()

Unnamed: 0,index,ADDRESS,LOCATION,POINT
0,0,469 FLATBUSH AVENUE BROOKLYN NY 11225,"(469, Flatbush Avenue, Flatbush, BK, Kings Cou...","(40.66293035, -73.9617257983564, 0.0)"
1,1,7114 AVENUE U BROOKLYN NY 11234,"(Wilkens Fine Foods, 7114, Avenue U, Bergen Be...","(40.619892, -73.9068502510335, 0.0)"
2,2,1310 SURF AVENUE BROOKLYN NY 11224,"(Nathan's Famous, 1310, Surf Avenue, West Brig...","(40.57530105, -73.9814687031484, 0.0)"
3,3,18 WEST HOUSTON STREET MANHATTAN NY 10012,"(18, West Houston Street, NoHo, Manhattan, Man...","(40.7257247, -73.9970561, 0.0)"
4,5,60 WALL STREET MANHATTAN NY 10005,"(60 Wall Street, 60, Wall Street, Financial Di...","(40.70617305, -74.0085161961879, 0.0)"


In [186]:
all_subsets.tail()

Unnamed: 0,index,ADDRESS,LOCATION,POINT
456,695,160 BROADWAY MANHATTAN NY 10038,"(160, Broadway, Financial District, Manhattan,...","(40.70937965, -74.0100296338982, 0.0)"
457,696,917 MANOR ROAD STATEN ISLAND NY 10314,"(917, Manor Road, Castleton Corners, Staten Is...","(40.60439225, -74.1205629156551, 0.0)"
458,697,129 GATES AVENUE BROOKLYN NY 11238,"(129, Gates Avenue, Clinton Hill, BK, Kings Co...","(40.6849008, -73.9629318, 0.0)"
459,698,132 CROSBY STREET MANHATTAN NY 10012,"(132, Crosby Street, Crosby St 39-73, SoHo, Ma...","(40.7247777, -73.996323, 0.0)"
460,699,802-804 KINGS HIGHWAY BROOKLYN NY 11223,"(802, Kings Highway, Gravesend, BK, Kings Coun...","(40.6064537755102, -73.9644138979592, 0.0)"


# Save Data to CSV File

In [187]:
# save dataframe to csv
# all_subsets.to_csv('./data/Geocoded_Addresses.csv')