This notebook takes the merged Yelp and Census Tract data frame and calculates the frequency of restaurant counts for each price range (including those Missing) for each Census tract. 
Using GEOID (census tract identifier), I merged household income distribution data for each census tract (in percentages), then calculate the actual number of households for each income range.


In [46]:
# Setting up modules
import geopandas as gpd
from geopandas import GeoDataFrame
import numpy as np
import pandas as pd
from shapely.geometry import Point
import matplotlib.pylab as plt

# Data path 
path = '/Users/andrewnorris/restaurant-scene-ads/'

In [47]:
bk = gpd.read_file('/Users/andrewnorris/restaurant-scene-ads/data/Yelp/BK/BK_Yelp_CensusTract_NTA.shp')
bk.head()

Unnamed: 0,id,alias,name,is_closed,review_cou,rating,price,categories,latitude,longitude,...,NTAName,Households,0-25k,25k-50k,50k-75k,75k-100k,100k-125k,125k-150k,> 150k,geometry
0,6gzQLjzJk25ePm_JS7ZAug,esme-brooklyn-2,Esme,0,328,4.5,$$,newamerican|cocktailbars,40.733203,-73.954967,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95497 40.73320)
1,Swjm9no7DRqhThLlf0EHng,sama-street-brooklyn-2,Sama Street,0,58,4.5,$$,cocktailbars|panasian|tapasmallplates,40.73287,-73.95448,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95448 40.73287)
2,utM-5navObsVA5sCRHobzA,madre-brooklyn-2,Madre,0,38,5.0,MISSING,newamerican,40.73311,-73.95798,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95798 40.73311)
3,L9SuMN3UsGipopWOe3pr9w,chiko-brooklyn-2,Chiko,0,36,5.0,MISSING,japanese|sushi,40.7319,-73.95422,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95422 40.73190)
4,vyKBwzRdNX4yiJDIFv37iw,oxomoco-brooklyn-2,Oxomoco,0,247,4.0,$$$,mexican,40.72991,-73.95548,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95548 40.72991)


In [48]:
bk = pd.DataFrame(bk)

In [49]:
bk.columns

Index(['id', 'alias', 'name', 'is_closed', 'review_cou', 'rating', 'price',
       'categories', 'latitude', 'longitude', 'address', 'city', 'zipcode',
       'state', 'country', 'index_righ', 'NTACode', 'NTAName', 'Households',
       '0-25k', '25k-50k', '50k-75k', '75k-100k', '100k-125k', '125k-150k',
       '> 150k', 'geometry'],
      dtype='object')

In [50]:
bkcounts = pd.DataFrame(bk.groupby(['NTACode', 'price'])['price'].count())
bkcounts.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,price
NTACode,price,Unnamed: 2_level_1
BK09,$,29
BK09,$$,46
BK09,$$$,6
BK09,MISSING,16
BK17,$,35
BK17,$$,54
BK17,$$$,10
BK17,$$$$,2
BK17,MISSING,21
BK19,$,22


In [51]:
bkcounts = bkcounts.unstack(level='price', fill_value=0).reset_index()
bkcounts.head(10)

Unnamed: 0_level_0,NTACode,price,price,price,price,price
price,Unnamed: 1_level_1,$,$$,$$$,$$$$,MISSING
0,BK09,29,46,6,0,16
1,BK17,35,54,10,2,21
2,BK19,22,17,4,1,14
3,BK21,24,12,0,0,14
4,BK23,6,4,2,0,4
5,BK25,29,41,10,1,24
6,BK26,17,3,0,0,12
7,BK27,16,22,1,0,14
8,BK28,79,49,1,0,50
9,BK29,42,29,1,1,31


In [52]:
bkcounts.columns = ['_'.join(col) for col in bkcounts.columns]
bkcounts.columns = ['NTACode','price_$', 'price_$$', 'price_$$$', 'price_$$$$','MISSING']
bkcounts.head(10)

Unnamed: 0,NTACode,price_$,price_$$,price_$$$,price_$$$$,MISSING
0,BK09,29,46,6,0,16
1,BK17,35,54,10,2,21
2,BK19,22,17,4,1,14
3,BK21,24,12,0,0,14
4,BK23,6,4,2,0,4
5,BK25,29,41,10,1,24
6,BK26,17,3,0,0,12
7,BK27,16,22,1,0,14
8,BK28,79,49,1,0,50
9,BK29,42,29,1,1,31


In [53]:
bkcounts['NTACode'].nunique()

51

In [54]:
bk.head()

Unnamed: 0,id,alias,name,is_closed,review_cou,rating,price,categories,latitude,longitude,...,NTAName,Households,0-25k,25k-50k,50k-75k,75k-100k,100k-125k,125k-150k,> 150k,geometry
0,6gzQLjzJk25ePm_JS7ZAug,esme-brooklyn-2,Esme,0,328,4.5,$$,newamerican|cocktailbars,40.733203,-73.954967,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95497 40.73320)
1,Swjm9no7DRqhThLlf0EHng,sama-street-brooklyn-2,Sama Street,0,58,4.5,$$,cocktailbars|panasian|tapasmallplates,40.73287,-73.95448,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95448 40.73287)
2,utM-5navObsVA5sCRHobzA,madre-brooklyn-2,Madre,0,38,5.0,MISSING,newamerican,40.73311,-73.95798,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95798 40.73311)
3,L9SuMN3UsGipopWOe3pr9w,chiko-brooklyn-2,Chiko,0,36,5.0,MISSING,japanese|sushi,40.7319,-73.95422,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95422 40.73190)
4,vyKBwzRdNX4yiJDIFv37iw,oxomoco-brooklyn-2,Oxomoco,0,247,4.0,$$$,mexican,40.72991,-73.95548,...,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95548 40.72991)


In [55]:
bk.columns

Index(['id', 'alias', 'name', 'is_closed', 'review_cou', 'rating', 'price',
       'categories', 'latitude', 'longitude', 'address', 'city', 'zipcode',
       'state', 'country', 'index_righ', 'NTACode', 'NTAName', 'Households',
       '0-25k', '25k-50k', '50k-75k', '75k-100k', '100k-125k', '125k-150k',
       '> 150k', 'geometry'],
      dtype='object')

In [56]:
bk = bk.rename(columns={"0-25k": "pct_0-25k", "25k-50k": "pct_25k-50k", '50k-75k':'pct_50k-75k', \
                                           '75k-100k':'pct_75k-100k', '100k-125k':'pct_100k-125k', '125k-150k':'pct_125k-150k',\
                                           '> 150k':'pct_> 150k'})

In [57]:
bk['num_0-25k'] = bk['Households']*bk['pct_0-25k']
bk['num_25k-50k'] = bk['Households']*bk['pct_25k-50k']
bk['num_50k-75k'] = bk['Households']*bk['pct_50k-75k']
bk['num_75k-100k'] = bk['Households']*bk['pct_75k-100k']
bk['num_100k-125k'] = bk['Households']*bk['pct_100k-125k']
bk['num_125k-150k'] = bk['Households']*bk['pct_125k-150k']
bk['num_> 150k'] = bk['Households']*bk['pct_> 150k']
bk.head()

Unnamed: 0,id,alias,name,is_closed,review_cou,rating,price,categories,latitude,longitude,...,pct_125k-150k,pct_> 150k,geometry,num_0-25k,num_25k-50k,num_50k-75k,num_75k-100k,num_100k-125k,num_125k-150k,num_> 150k
0,6gzQLjzJk25ePm_JS7ZAug,esme-brooklyn-2,Esme,0,328,4.5,$$,newamerican|cocktailbars,40.733203,-73.954967,...,0.086914,0.182642,POINT (-73.95497 40.73320),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0
1,Swjm9no7DRqhThLlf0EHng,sama-street-brooklyn-2,Sama Street,0,58,4.5,$$,cocktailbars|panasian|tapasmallplates,40.73287,-73.95448,...,0.086914,0.182642,POINT (-73.95448 40.73287),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0
2,utM-5navObsVA5sCRHobzA,madre-brooklyn-2,Madre,0,38,5.0,MISSING,newamerican,40.73311,-73.95798,...,0.086914,0.182642,POINT (-73.95798 40.73311),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0
3,L9SuMN3UsGipopWOe3pr9w,chiko-brooklyn-2,Chiko,0,36,5.0,MISSING,japanese|sushi,40.7319,-73.95422,...,0.086914,0.182642,POINT (-73.95422 40.73190),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0
4,vyKBwzRdNX4yiJDIFv37iw,oxomoco-brooklyn-2,Oxomoco,0,247,4.0,$$$,mexican,40.72991,-73.95548,...,0.086914,0.182642,POINT (-73.95548 40.72991),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0


In [58]:
bkntainc = bk[['NTACode','NTAName', 'Households', 'pct_0-25k',
       'pct_25k-50k', 'pct_50k-75k', 'pct_75k-100k', 'pct_100k-125k',
       'pct_125k-150k', 'pct_> 150k', 'geometry', 'num_0-25k', 'num_25k-50k',
       'num_50k-75k', 'num_75k-100k', 'num_100k-125k', 'num_125k-150k',
       'num_> 150k']].copy()
bkntainc.head()

Unnamed: 0,NTACode,NTAName,Households,pct_0-25k,pct_25k-50k,pct_50k-75k,pct_75k-100k,pct_100k-125k,pct_125k-150k,pct_> 150k,geometry,num_0-25k,num_25k-50k,num_50k-75k,num_75k-100k,num_100k-125k,num_125k-150k,num_> 150k
0,BK76,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95497 40.73320),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0
1,BK76,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95448 40.73287),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0
2,BK76,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95798 40.73311),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0
3,BK76,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95422 40.73190),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0
4,BK76,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95548 40.72991),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0


In [59]:
bkntainc = bkntainc.drop_duplicates('NTACode')
bkntainc.head()

Unnamed: 0,NTACode,NTAName,Households,pct_0-25k,pct_25k-50k,pct_50k-75k,pct_75k-100k,pct_100k-125k,pct_125k-150k,pct_> 150k,geometry,num_0-25k,num_25k-50k,num_50k-75k,num_75k-100k,num_100k-125k,num_125k-150k,num_> 150k
0,BK76,Greenpoint,16223,0.20021,0.136843,0.151883,0.13598,0.105529,0.086914,0.182642,POINT (-73.95497 40.73320),3248.0,2220.0,2464.0,2206.0,1712.0,1410.0,2963.0
183,BK90,East Williamsburg,16075,0.284044,0.165723,0.12367,0.116579,0.087403,0.06507,0.157512,POINT (-73.94074 40.72103),4566.0,2664.0,1988.0,1874.0,1405.0,1046.0,2532.0
348,BK77,Bushwick North,19367,0.290029,0.207415,0.169515,0.106108,0.084164,0.053441,0.089327,POINT (-73.93327 40.70759),5617.0,4017.0,3283.0,2055.0,1630.0,1035.0,1730.0
602,BK78,Bushwick South,26616,0.349451,0.207582,0.152991,0.101743,0.068643,0.044222,0.075368,POINT (-73.93723 40.70914),9301.0,5525.0,4072.0,2708.0,1827.0,1177.0,2006.0
738,BK73,North Side-South Side,23862,0.192566,0.167337,0.113654,0.105859,0.088718,0.070321,0.261546,POINT (-73.95700 40.72119),4595.0,3993.0,2712.0,2526.0,2117.0,1678.0,6241.0


In [62]:
bk_price_inc = bkcounts.merge(bkntainc, how='inner', on='NTACode')
bk_price_inc.head()

Unnamed: 0,NTACode,price_$,price_$$,price_$$$,price_$$$$,MISSING,NTAName,Households,pct_0-25k,pct_25k-50k,...,pct_125k-150k,pct_> 150k,geometry,num_0-25k,num_25k-50k,num_50k-75k,num_75k-100k,num_100k-125k,num_125k-150k,num_> 150k
0,BK09,29,46,6,0,16,Brooklyn Heights-Cobble Hill,11115,0.11507,0.108052,...,0.075574,0.426901,POINT (-73.99231 40.69927),1279.0,1201.0,777.0,1231.0,1042.0,840.0,4745.0
1,BK17,35,54,10,2,21,Sheepshead Bay-Gerritsen Beach-Manhattan Beach,26150,0.253805,0.2026,...,0.064704,0.127801,POINT (-73.95266 40.57709),6637.0,5298.0,3965.0,2832.0,2384.0,1692.0,3342.0
2,BK19,22,17,4,1,14,Brighton Beach,14557,0.395823,0.185272,...,0.03579,0.109569,POINT (-73.96427 40.57672),5762.0,2697.0,2311.0,990.0,681.0,521.0,1595.0
3,BK21,24,12,0,0,14,Seagate-Coney Island,11236,0.478907,0.203364,...,0.02136,0.04717,POINT (-73.98381 40.57881),5381.0,2285.0,1447.0,711.0,642.0,240.0,530.0
4,BK23,6,4,2,0,4,West Brighton,8401,0.377217,0.21307,...,0.031663,0.063802,POINT (-73.98065 40.57919),3169.0,1790.0,1250.0,962.0,428.0,266.0,536.0


In [63]:
bk_price_inc.shape

(51, 23)

In [64]:
bk_price_inc.to_csv('BK_YelpPriceFreq_NTAIncDist.csv')
