# New York City Department of Health Data

Augmenting the addresses with Open Maps data using [``geopy`` on PyPi](https://pypi.python.org/pypi/geopy).

In [2]:
import pandas, bokeh, bokeh.plotting, geopy, geopy.geocoders, jinja2
bokeh.plotting.output_notebook(resources=bokeh.resources.CDN)

In [3]:
df = pandas.read_csv('../_data/DOHNYC.csv', low_memory=False)
df.head(2)

Unnamed: 0.1,Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,VIOLATION CODE,VIOLATION DESCRIPTION,CRITICAL FLAG,SCORE,GRADE,GRADE DATE,RECORD DATE,INSPECTION TYPE
0,0,30075445,MORRIS PARK BAKE SHOP,BRONX,1007,MORRIS PARK AVE,10462,7188924968,Bakery,2015-02-09,Violations were cited in the following area(s).,06C,Food not protected from potential source of co...,Critical,6,A,2015-02-09,2015-08-14,Cycle Inspection / Initial Inspection
1,22,30112340,WENDY'S,BROOKLYN,469,FLATBUSH AVENUE ...,11225,7182875005,Hamburgers,2015-05-07,Violations were cited in the following area(s).,04A,Food Protection Certificate not held by superv...,Critical,12,A,2015-05-07,2015-08-14,Cycle Inspection / Initial Inspection


In [4]:
# Compose address string for OpenMaps
addresses = False
for col in ['BUILDING','STREET','BORO','NY','ZIPCODE']:
    if not isinstance( addresses, pandas.Series):
        addresses = df[col].apply( lambda v: str(v)) + ' ' 
    else:
        addresses += (df[col].apply( lambda v: str(v)) if col in df.columns else col )+ ' ' 
addresses = addresses.unique() 

In [5]:
# Request the augmented data
geocoder = geopy.geocoders.Nominatim()
addy_mapper = {}
for address in addresses[:10]:
    location = geocoder.geocode( address, addressdetails=True, language=True, )
    if location and hasattr(location,'raw'):
        addy_mapper[address] =  location.raw
        for k, v in location.raw['address'].items():
            addy_mapper[address][k] = v
        del addy_mapper[address]['address']

In [6]:
d = pandas.DataFrame( addy_mapper ).transpose()

In [7]:
p = bokeh.models.GMapPlot(x_range=bokeh.models.DataRange1d(),y_range=bokeh.models.DataRange1d(),title='NYC Blech',
    map_options=bokeh.models.GMapOptions(
        lat=d['lat'].apply(lambda v: float(v)).mean(), lng=d['lon'].apply(lambda v: float(v)).mean(), map_type="roadmap", zoom=11
    ))
source = bokeh.plotting.ColumnDataSource(d)
renderer = p.add_glyph(source, bokeh.models.Circle(x='lon',y='lat',size=20))
p.add_tools( bokeh.models.HoverTool(renderers=[renderer],tooltips='<table>'+''.join(['<tr><th>'+c +'</th><td>@'+c+'</td></tr>' for c in d.columns])+'</table>'))
bokeh.plotting.show(p)

<bokeh.io._CommsHandle at 0x109abee80>