In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))
# import warnings
# warnings.filterwarnings('ignore')

In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import cufflinks as cf
from sodapy import Socrata
import folium
import json

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.set_config_file(offline=True, world_readable=False, theme='pearl')

import qgrid
qgrid.enable()

In [6]:
client = Socrata('data.cityofnewyork.us', None)
results = client.get('xx67-kt59', 
                     where="violation_code = '04L' and boro='manhattan' and grade_date >= '2018'")

df = pd.DataFrame(results).drop(['boro','action','camis','cuisine_description','grade_date','violation_description','inspection_date','record_date','phone','critical_flag'], 1)

df['lat'] = 0
df['long'] = 0

df['address'] = df['building'].astype(str) + ' ' + df['street'].astype(str) + ' NYC' 
df['score'] = df.score.astype(int)

df.loc[df.street=='WEST    4 STREET','street'] = 'West 4th STREET'
df.loc[df.address=='135 WEST   50 STREET NYC','address'] = '135 WEST   50th STREET NYC'
df.loc[df.address=='2082 FREDERICK DOUGLAS BOULEVARD NYC','address'] = '2082 Frederick Douglass Blvd, New York, NY 10026'
df.shape



(457, 11)

In [13]:
from geopy.geocoders import Nominatim
geolocator = Nominatim()

def lat_long(address):
    location = geolocator.geocode(address)
    try:
        return location.latitude, location.longitude
    except:
        print('could not find address: ', address)
        return np.nan, np.nan

In [None]:
%time df['lat'] = df.address.apply(lambda x: lat_long(x, mode='lat'))
%time df['long'] = df.address.apply(lambda x: lat_long(x, mode='long'))

df['violation_description'] = df.violation_description.str.replace("'",'')
df['dba'] = df.dba.str.replace("'",'')

In [14]:
df.to_csv('11222_2018.csv', index=False)

In [14]:
%%time
transformed = df.groupby('zipcode').agg({'dba':'count','address':'last'})
transformed['lat'] = 0
transformed['long'] = 0

lats_longs = []
for k, row in transformed.iterrows():
    lats_longs.append(lat_long(row.address))
    
lats_longs = pd.DataFrame(lats_longs)

transformed['lat'] = lats_longs[0].values
transformed['long'] = lats_longs[1].values

CPU times: user 500 ms, sys: 18.2 ms, total: 519 ms
Wall time: 18.5 s


In [15]:
transformed

In [16]:
transformed.to_csv('group_by_zip.csv', index=False)

## maps

In [18]:
lat_init, long_init = lat_long('3 washington square village NYC')

In [109]:
m = folium.Map(location=[lat_init, long_init], 
                         tiles='Stamen Toner',
                         zoom_start = 16) 

colors = {'Critical':'red', 'Not Critical':'blue'}

feature_group = folium.FeatureGroup("Locations")
for index, row in transformed.iterrows():
    feature_group.add_child(folium.CircleMarker(location=[row.lat, row.long],
                                                popup=row.index.tolist()[0],
                                                radius=row.dba,
                                                color='blue',
                                                fill_color='blue'))
    
m.add_child(feature_group)

display(m)

#### rat migrations

In [9]:
with open('/scratch/rag394/data/geospatial/nyu.json') as data_file:    
    data_json_nyc = json.load(data_file)

NYC_zctas = []
for k in range(len(data_json_nyc['features'])):
    NYC_zctas.append(data_json_nyc['features'][k]['properties']['zcta'])

In [21]:
transformed.reset_index(inplace=True)
transformed.columns = ['zcta','dba','address', 'lat', 'long']

In [37]:
transformed.dba.sort_values().unique()

array([ 1,  2,  3,  4,  5,  8,  9, 10, 12, 14, 15, 16, 17, 18, 22, 23, 24,
       25, 27, 30, 41])

In [42]:
list(range(0,41,10))

[0, 10, 20, 30, 40]

In [44]:
ny_map = folium.Map(location=[lat_init, long_init],
                    width='90%', height='100%',
                    tiles='Stamen Toner',
                     zoom_start=14)

ny_map.choropleth(geo_data='/scratch/rag394/data/geospatial/nyu.json',
                  data=transformed,
                  columns=['zcta', 'dba'],
                  #threshold_scale=range(0,41,10),
                  key_on='feature.properties.zcta',
                  fill_color='YlGnBu',
                  legend_name='reported rat health violations')

ny_map