This notebook takes the merged Yelp and Census Tract data frame and calculates the frequency of restaurant counts for each price range (including those Missing) for each Census tract. 
Using GEOID (census tract identifier), I merged household income distribution data for each census tract (in percentages), then calculate the actual number of households for each income range.


In [2]:
# Setting up modules
import geopandas as gpd
from geopandas import GeoDataFrame
import numpy as np
import pandas as pd
from shapely.geometry import Point
import matplotlib.pylab as plt

# Data path 
path = '/Users/andrewnorris/restaurant-scene-ads/'

In [14]:
bk = gpd.read_file('/Users/andrewnorris/restaurant-scene-ads/data/Yelp/BK/BK_Yelp_CensusTract_NTA.shp')
bk.head()

Unnamed: 0,id,alias,name,is_closed,review_cou,rating,price,categories,latitude,longitude,...,NTAName,Households,0-25k,25k-50k,50k-75k,75k-100k,100k-125k,125k-150k,> 150k,geometry
0,6gzQLjzJk25ePm_JS7ZAug,esme-brooklyn-2,Esme,0,328,4.5,$$,newamerican|cocktailbars,40.733203,-73.954967,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95497 40.73320)
1,Swjm9no7DRqhThLlf0EHng,sama-street-brooklyn-2,Sama Street,0,58,4.5,$$,cocktailbars|panasian|tapasmallplates,40.73287,-73.95448,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95448 40.73287)
2,utM-5navObsVA5sCRHobzA,madre-brooklyn-2,Madre,0,38,5.0,MISSING,newamerican,40.73311,-73.95798,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95798 40.73311)
3,L9SuMN3UsGipopWOe3pr9w,chiko-brooklyn-2,Chiko,0,36,5.0,MISSING,japanese|sushi,40.7319,-73.95422,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95422 40.73190)
4,vyKBwzRdNX4yiJDIFv37iw,oxomoco-brooklyn-2,Oxomoco,0,247,4.0,$$$,mexican,40.72991,-73.95548,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95548 40.72991)


In [15]:
bk = pd.DataFrame(bk)

In [16]:
bk.columns

Index(['id', 'alias', 'name', 'is_closed', 'review_cou', 'rating', 'price',
       'categories', 'latitude', 'longitude', 'address', 'city', 'zipcode',
       'state', 'country', 'index_righ', 'NTACode', 'NTAName', 'Households',
       '0-25k', '25k-50k', '50k-75k', '75k-100k', '100k-125k', '125k-150k',
       '> 150k', 'geometry'],
      dtype='object')

In [17]:
bkcounts = pd.DataFrame(bk.groupby(['NTACode', 'price'])['price'].count())

In [18]:
bkcounts.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,price
NTACode,price,Unnamed: 2_level_1
BK09,$,29
BK09,$$,46
BK09,$$$,6
BK09,MISSING,16
BK17,$,35
BK17,$$,54
BK17,$$$,10
BK17,$$$$,2
BK17,MISSING,21
BK19,$,22


In [19]:
bkcounts = bkcounts.unstack(level='price', fill_value=0).reset_index()
bkcounts.head(10)

Unnamed: 0_level_0,NTACode,price,price,price,price,price
price,Unnamed: 1_level_1,$,$$,$$$,$$$$,MISSING
0,BK09,29,46,6,0,16
1,BK17,35,54,10,2,21
2,BK19,22,17,4,1,14
3,BK21,24,12,0,0,14
4,BK23,6,4,2,0,4
5,BK25,29,41,10,1,24
6,BK26,17,3,0,0,12
7,BK27,16,22,1,0,14
8,BK28,79,49,1,0,50
9,BK29,42,29,1,1,31


In [20]:
bkcounts.columns = ['_'.join(col) for col in bkcounts.columns]
bkcounts.columns = ['NTACode','price_$', 'price_$$', 'price_$$$', 'price_$$$$','MISSING']
bkcounts.head(10)

Unnamed: 0,NTACode,price_$,price_$$,price_$$$,price_$$$$,MISSING
0,BK09,29,46,6,0,16
1,BK17,35,54,10,2,21
2,BK19,22,17,4,1,14
3,BK21,24,12,0,0,14
4,BK23,6,4,2,0,4
5,BK25,29,41,10,1,24
6,BK26,17,3,0,0,12
7,BK27,16,22,1,0,14
8,BK28,79,49,1,0,50
9,BK29,42,29,1,1,31


In [22]:
bkcounts.columns

Index(['NTACode', 'price_$', 'price_$$', 'price_$$$', 'price_$$$$', 'MISSING'], dtype='object')

In [26]:
bkcounts.NTACode.dtype

dtype('O')

In [27]:
#bkcounts['GEOID'] = bkcounts['GEOID'].astype(np.int64) #bkinc data frame GEOID is int64 type - change for merging

In [28]:
#bkinc = pd.read_csv(path+'BK_hh_income_dis_count.csv')

In [29]:
#bkinc.head()

In [459]:
#bkinc = bkinc.rename(columns={'ID':'GEOID'})

In [30]:
bk_price_inc = bkcounts.merge(bk, how='inner', on='NTACode')

In [31]:
bk_price_inc.head(10)

Unnamed: 0,NTACode,price_$,price_$$,price_$$$,price_$$$$,MISSING,id,alias,name,is_closed,...,NTAName,Households,0-25k,25k-50k,50k-75k,75k-100k,100k-125k,125k-150k,> 150k,geometry
0,BK09,29,46,6,0,16,5Fz1WlY3lxy8gJFs8s_1Dw,the-binc-brooklyn,The Binc,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99231 40.69927)
1,BK09,29,46,6,0,16,_N92SH1Zi-Hbxn_bNHFz0Q,noodle-pudding-brooklyn,Noodle Pudding,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99203 40.69979)
2,BK09,29,46,6,0,16,j-a8HUcsBJXHV2IfF786jw,kogane-ramen-brooklyn-heights,Kogane Ramen,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99268 40.69861)
3,BK09,29,46,6,0,16,xhuDOqY9G5mPuSqBnMCaYw,henrys-end-brooklyn-4,Henry's End,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99257 40.69869)
4,BK09,29,46,6,0,16,OwH9eXxsBeeYl8s7TXUBiA,san-blas-brooklyn,San Blas,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99371 40.69452)
5,BK09,29,46,6,0,16,HcWm-9ZZu2_brnuRxN5Gpg,tutt-cafe-brooklyn,Tutt Cafe,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99322 40.70019)
6,BK09,29,46,6,0,16,njKwUg6oYDEEtLuunTmgbg,heights-falafel-brooklyn,Heights Falafel,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99271 40.69855)
7,BK09,29,46,6,0,16,3OKf1QQnDhjkBKVg0eMllQ,sushi-gallery-brooklyn,Sushi Gallery,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99337 40.69760)
8,BK09,29,46,6,0,16,lqkqsHe_2KN35LE5JIaYNg,henry-street-ale-house-brooklyn,Henry Street Ale House,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99235 40.69911)
9,BK09,29,46,6,0,16,wwgLgUlsM4qlSG3aA7o6Ig,b-good-brooklyn-3,B.GOOD,0,...,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,0.069906,0.110751,0.093747,0.075574,0.426901,POINT (-73.99366 40.69483)


In [32]:
bk_price_inc = bk_price_inc.rename(columns={"0-25k": "pct_0-25k", "25k-50k": "pct_25k-50k", '50k-75k':'pct_50k-75k', \
                                           '75k-100k':'pct_75k-100k', '100k-125k':'pct_100k-125k', '125k-150k':'pct_125k-150k',\
                                           '> 150k':'pct_> 150k'})

In [33]:
bk_price_inc['num_0-25k'] = bk_price_inc['Households']*bk_price_inc['pct_0-25k']
bk_price_inc['num_25k-50k'] = bk_price_inc['Households']*bk_price_inc['pct_25k-50k']
bk_price_inc['num_50k-75k'] = bk_price_inc['Households']*bk_price_inc['pct_50k-75k']
bk_price_inc['num_75k-100k'] = bk_price_inc['Households']*bk_price_inc['pct_75k-100k']
bk_price_inc['num_100k-125k'] = bk_price_inc['Households']*bk_price_inc['pct_100k-125k']
bk_price_inc['num_125k-150k'] = bk_price_inc['Households']*bk_price_inc['pct_125k-150k']
bk_price_inc['num_> 150k'] = bk_price_inc['Households']*bk_price_inc['pct_> 150k']

In [34]:
bk_price_inc.head()

Unnamed: 0,NTACode,price_$,price_$$,price_$$$,price_$$$$,MISSING,id,alias,name,is_closed,...,pct_125k-150k,pct_> 150k,geometry,num_0-25k,num_25k-50k,num_50k-75k,num_75k-100k,num_100k-125k,num_125k-150k,num_> 150k
0,BK09,29,46,6,0,16,5Fz1WlY3lxy8gJFs8s_1Dw,the-binc-brooklyn,The Binc,0,...,0.075574,0.426901,POINT (-73.99231 40.69927),1279.0,1201.0,777.0,1231.0,1042.0,840.0,4745.0
1,BK09,29,46,6,0,16,_N92SH1Zi-Hbxn_bNHFz0Q,noodle-pudding-brooklyn,Noodle Pudding,0,...,0.075574,0.426901,POINT (-73.99203 40.69979),1279.0,1201.0,777.0,1231.0,1042.0,840.0,4745.0
2,BK09,29,46,6,0,16,j-a8HUcsBJXHV2IfF786jw,kogane-ramen-brooklyn-heights,Kogane Ramen,0,...,0.075574,0.426901,POINT (-73.99268 40.69861),1279.0,1201.0,777.0,1231.0,1042.0,840.0,4745.0
3,BK09,29,46,6,0,16,xhuDOqY9G5mPuSqBnMCaYw,henrys-end-brooklyn-4,Henry's End,0,...,0.075574,0.426901,POINT (-73.99257 40.69869),1279.0,1201.0,777.0,1231.0,1042.0,840.0,4745.0
4,BK09,29,46,6,0,16,OwH9eXxsBeeYl8s7TXUBiA,san-blas-brooklyn,San Blas,0,...,0.075574,0.426901,POINT (-73.99371 40.69452),1279.0,1201.0,777.0,1231.0,1042.0,840.0,4745.0


In [36]:
bk_price_inc = pd.DataFrame(bk_price_inc.groupby(['NTACode', 'price'])['price'].count())

In [37]:
bk_price_inc.shape

(205, 1)

In [38]:
bk_price_inc.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,price
NTACode,price,Unnamed: 2_level_1
BK09,$,29
BK09,$$,46
BK09,$$$,6
BK09,MISSING,16
BK17,$,35
BK17,$$,54
BK17,$$$,10
BK17,$$$$,2
BK17,MISSING,21
BK19,$,22


In [465]:
bk_price_inc.to_csv('BK_incdist_pricefreq_NTA.csv')
