In [1]:
import pandas as pd
import folium
from folium import plugins
import json
from shapely.geometry import shape, Point

In [2]:
#Load GeoJSON file
with open('zillow_nb_dc.geojson', 'r') as jsonFile:
    dcnb_zil = json.load(jsonFile)    

#Load Liquor license data 
df_liq = pd.read_csv("nightlife0816.csv")
df_liq['op_yr'] = df_liq['year'] - df_liq['firstYr']
df_liq['new_hood'] = ''  ## this will be populated based off of dcnb_zil geojson

In [3]:
#This is what one item in a GeoJSON file looks like, this is the Catholic University neighborhood in DC
dcnb_zil['features'][0]

{'type': 'Feature',
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-77.00433078657653, 38.94064135955279],
    [-77.0042257126969, 38.94064000000015],
    [-77.00343344158188, 38.94064000000015],
    [-77.00306371506144, 38.94064000000015],
    [-77.0026412056817, 38.94064223871585],
    [-77.0020600370666, 38.940646743899016],
    [-77.00112265082952, 38.94065],
    [-77.00052844749331, 38.94065],
    [-76.99998706223136, 38.94065],
    [-76.99957999999987, 38.94065000000054],
    [-76.99918849426867, 38.94066204633078],
    [-76.99847540140462, 38.94065432418696],
    [-76.99752526687807, 38.940640000000506],
    [-76.9970685603916, 38.94064955808621],
    [-76.99637900580295, 38.940657329849174],
    [-76.99589326586322, 38.94065216240297],
    [-76.99522928060622, 38.94064438147115],
    [-76.99456570334776, 38.940635318513394],
    [-76.99400357330178, 38.940626670358846],
    [-76.99310986604085, 38.94062464781805],
    [-76.992318147589, 38.94063137817085],
    [-76.991347

In [4]:
### take a look at the distinct neighborhoods in our file
hoods = []
for i in range(len(dcnb_zil['features'])):
    hoods.append(dcnb_zil['features'][i]['properties']['name'])

print(hoods)

['Catholic University', 'McLean Gardens', 'Benning Ridge', 'Southwest Federal Center', 'Civic Betterment', 'Forest Hills', 'Takoma', 'Chinatown', 'Gateway', 'Congress Heights', 'Saint Elizabeths', 'Lincoln Heights', 'Kenilworth', 'Bellevue', 'Kalorama', 'Barnaby Woods', 'Gallaudet', 'Pleasant Plains', 'National Mall - West Potomac Park', 'Fort Davis', 'Benning', 'Brightwood', 'Potomac Heights', 'Columbia Heights', 'U Street Corridor', 'Chevy Chase', 'Buena Vista', 'Anacostia Naval Station - Boiling Air Force Base', 'Capitol Hill', 'Glover Park', 'Theodore Roosevelt Island', 'Shipley Terrace', 'Manor Park', 'The Palisades', 'Sixteenth Street Heights', 'Dupont Circle', 'Ledroit Park', 'Woodlands', 'Mahaning Heights', 'Brightwood Park', 'Woodley Park', 'Deanwood', 'Navy Yard', 'Adams Morgan', 'Brentwood', 'Shepherd Park', 'Douglas', 'Bloomingdale', 'Tenleytown', 'Petworth', 'Mount Pleasant', 'Foggy Bottom', 'Shipley Terrace', 'Knox Hill', 'Trinidad', 'Hillcrest', 'Carver', 'Colonial Villa

In [5]:
## use shapely to check if lat/lon is within the zillow neighborhood polygons
## the liqour license data uses WaPo neighborhood data, and I have had issues working with its GeoJson file
## therefore I am re-encoding it using the Zillow neighborhood file

for i in range(len(df_liq)):
    point = Point(df_liq.iloc[i,4],df_liq.iloc[i,5]) ## grab a point from the liquour license data

    for feature in dcnb_zil['features']:
        polygon = shape(feature['geometry'])
        if polygon.contains(point): ## check if the polygon created by the zillow neigborhood contains the restaurant/bar
            df_liq.iloc[i, df_liq.columns.get_loc('new_hood')] = feature['properties']['name']
        
df_liq.to_csv("nightlife0816_newconde.csv", index = False) ## write the data so we don't have to re-run this every time

df_liq.head() ## making sure new_hood (neighborhoods) is populated

Unnamed: 0,license,year,name,address,lon,lat,quadrant,subhood,firstYr,op_yr,new_hood
0,ABRA001881,2008,Loeb's Restaurant,832 15TH ST NW,-77.033806,38.900575,NW,Downtown,2008,0,Downtown
1,ABRA001881,2009,Loeb's Restaurant,832 15TH ST NW,-77.033806,38.900575,NW,Downtown,2008,1,Downtown
2,ABRA001881,2010,Loeb's Restaurant,832 15TH ST NW,-77.033806,38.900575,NW,Downtown,2008,2,Downtown
3,ABRA005053,2008,Ivys Place,3520 CONNECTICUT AVE NW,-77.059302,38.936214,NW,Cleveland Park,2008,0,Cleveland Park
4,ABRA005053,2009,Ivys Place,3520 CONNECTICUT AVE NW,-77.059302,38.936214,NW,Cleveland Park,2008,1,Cleveland Park


In [39]:
df_liq = pd.read_csv("nightlife0816_newconde.csv")
df_liq_16 = df_liq.loc[df_liq['year'] == 2016] ##comment

In [46]:
#Roll-up the data to the neighborhood level
#df_1 allows us to make a map of the total count of bars by neighborhood
df_liq_1 = df_liq.groupby(['year','new_hood'], as_index = False).count()

##df_2 counts the number of bars that have been open for less than 2 years
df_liq_2 = df_liq[(df_liq.op_yr <= 1) & (df_liq.year == 2016)].groupby(['year','new_hood'], as_index = False).count()

In [13]:
##The greatest number of restaurants/bars were operating in Adams Morgan in 2012
df_liq_1[df_liq_1['new_hood'] == 'Adams Morgan']

Unnamed: 0,year,new_hood,license,name,address,lon,lat,quadrant,subhood,firstYr,op_yr
0,2008,Adams Morgan,71,71,71,71,71,71,71,71,71
58,2009,Adams Morgan,71,71,71,71,71,71,71,71,71
117,2010,Adams Morgan,67,67,67,67,67,67,67,67,67
177,2011,Adams Morgan,73,73,73,73,73,73,73,73,73
241,2012,Adams Morgan,82,82,82,82,82,82,82,82,82
304,2013,Adams Morgan,79,79,79,79,79,79,79,79,79
368,2014,Adams Morgan,77,77,77,77,77,77,77,77,77
433,2015,Adams Morgan,75,75,75,75,75,75,75,75,75
500,2016,Adams Morgan,72,72,72,72,72,72,72,72,72


In [14]:
##Since license data begins in 2008 the liq_2 dataset is only useful for years 2010 and later
df_liq_2[df_liq_2['new_hood'] == 'Adams Morgan']

Unnamed: 0,year,new_hood,license,name,address,lon,lat,quadrant,subhood,firstYr,op_yr
0,2008,Adams Morgan,71,71,71,71,71,71,71,71,71
58,2009,Adams Morgan,71,71,71,71,71,71,71,71,71
117,2010,Adams Morgan,7,7,7,7,7,7,7,7,7
154,2011,Adams Morgan,15,15,15,15,15,15,15,15,15
200,2012,Adams Morgan,23,23,23,23,23,23,23,23,23
247,2013,Adams Morgan,18,18,18,18,18,18,18,18,18
286,2014,Adams Morgan,9,9,9,9,9,9,9,9,9
330,2015,Adams Morgan,9,9,9,9,9,9,9,9,9
380,2016,Adams Morgan,15,15,15,15,15,15,15,15,15


In [17]:
## our data only exists from 2008, hence the largest number of bars are 8 years old
## there are very few bars that opened in 2009, the year of the financial crisis
df_liq_16['op_yr'].value_counts()

8    467
0    164
2    159
1    112
5    112
4     87
6     80
3     69
7      9
Name: op_yr, dtype: int64

In [28]:
### initiate map
folium.Map(location = [38.9072,-77.0369], zoom_start = 12)

In [27]:
dc = folium.Map(location = [38.9072,-77.0369], zoom_start = 12)
dc.choropleth(geo_data=dcnb_zil)
dc

In [49]:
### initiate map
dc = folium.Map(location = [38.9072,-77.0369], zoom_start = 12)

### add geo data to the map
dc.choropleth(
    geo_data=dcnb_zil,
    data = df_liq_2,
    key_on = 'feature.properties.name',
    columns = ['new_hood','license'],
    fill_color = 'RdYlGn',
    fill_opacity = 0.5,
    line_opacity = 1,
    legend_name = 'Number of businesses'
)
folium.LayerControl().add_to(dc)

dc

In [50]:
#liq_m = df_liq[['lat', 'lon']].as_matrix()

lats = df_liq['lat'].tolist()
lons = df_liq['lon'].tolist()
yrs = df_liq['op_yr'].tolist()


In [53]:
### initiate map
dc = folium.Map(location = [38.9072,-77.0369], zoom_start = 12)

dc.add_child(plugins.HeatMap(zip(lats, lons, yrs), radius = 8))