This notebook takes the merged Yelp and Census Tract data frame and calculates the frequency of restaurant counts for each price range (including those Missing) for each Census tract. 
Using GEOID (census tract identifier), I merged household income distribution data for each census tract (in percentages), then calculate the actual number of households for each income range.


In [440]:
# Setting up modules
import geopandas as gpd
from geopandas import GeoDataFrame
import numpy as np
import pandas as pd
from shapely.geometry import Point
import matplotlib.pylab as plt

# Data path 
path = '/Users/andrewnorris/restaurant-scene-ads/'

In [441]:
bk = gpd.read_file('BK_Yelp_CensusTract.shp')
bk.head()

Unnamed: 0,id,alias,name,is_closed,review_cou,rating,price,categories,latitude,longitude,...,Geography,Households,0-25k,25k-50k,50k-75k,75k-100k,100k-125k,125k-150k,> 150k,geometry
0,6gzQLjzJk25ePm_JS7ZAug,esme-brooklyn-2,Esme,0,328,4.5,$$,newamerican|cocktailbars,40.733203,-73.954967,...,"Census Tract 563, Kings County, New York",2250,0.285,0.114,0.13,0.116,0.084,0.084,0.186,POINT (-73.95496677 40.73320339)
1,utM-5navObsVA5sCRHobzA,madre-brooklyn-2,Madre,0,38,5.0,MISSING,newamerican,40.73311,-73.95798,...,"Census Tract 563, Kings County, New York",2250,0.285,0.114,0.13,0.116,0.084,0.084,0.186,POINT (-73.95798000000001 40.73311)
2,J5y7oRfiGtxFDkYGKxFZkw,anella-brooklyn,Anella,0,408,4.0,$$,newamerican,40.73327,-73.95786,...,"Census Tract 563, Kings County, New York",2250,0.285,0.114,0.13,0.116,0.084,0.084,0.186,POINT (-73.95786 40.73327000000001)
3,_FICyzFLQxR7N62I6qU94A,kanahashi-brooklyn,KanaHashi,0,86,4.5,$$,sushi|lounges,40.73263,-73.95482,...,"Census Tract 563, Kings County, New York",2250,0.285,0.114,0.13,0.116,0.084,0.084,0.186,POINT (-73.95482 40.73263)
4,z5EpI39omxW6aK_X2uHi8A,lobster-joint-brooklyn,Lobster Joint,0,646,4.0,$$,seafood,40.7354,-73.95528,...,"Census Tract 563, Kings County, New York",2250,0.285,0.114,0.13,0.116,0.084,0.084,0.186,POINT (-73.95528 40.7354)


In [442]:
bk = pd.DataFrame(bk)

In [443]:
bk.columns

Index(['id', 'alias', 'name', 'is_closed', 'review_cou', 'rating', 'price',
       'categories', 'latitude', 'longitude', 'address', 'city', 'zipcode',
       'state', 'country', 'index_righ', 'GEOID', 'ID_1', 'Geography',
       'Households', '0-25k', '25k-50k', '50k-75k', '75k-100k', '100k-125k',
       '125k-150k', '> 150k', 'geometry'],
      dtype='object')

In [444]:
bkcounts = pd.DataFrame(bk.groupby(['GEOID', 'price'])['price'].count())

In [445]:
bkcounts.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,price
GEOID,price,Unnamed: 2_level_1
36047000000.0,$,6
36047000000.0,$$,11
36047000000.0,$$$,2
36047000000.0,MISSING,1
36047000000.0,$,5
36047000000.0,$$,8
36047000000.0,MISSING,12
36047000000.0,$,1
36047000000.0,$$,4
36047000000.0,$,1


In [447]:
bkcounts = bkcounts.unstack(level='price', fill_value=0).reset_index()
bkcounts.head(10)

Unnamed: 0_level_0,GEOID,price,price,price,price,price
price,Unnamed: 1_level_1,$,$$,$$$,$$$$,MISSING
0,36047000000.0,6,11,2,0,1
1,36047000000.0,5,8,0,0,12
2,36047000000.0,1,4,0,0,0
3,36047000000.0,1,8,0,0,2
4,36047000000.0,5,7,0,0,2
5,36047000000.0,3,6,2,0,2
6,36047000000.0,13,10,2,0,9
7,36047000000.0,23,16,0,0,13
8,36047000000.0,0,3,0,0,1
9,36047000000.0,4,31,1,0,12


In [448]:
bkcounts.columns = ['_'.join(col) for col in bkcounts.columns]
bkcounts.columns = ['GEOID','price_$', 'price_$$', 'price_$$$', 'price_$$$$','MISSING']
bkcounts.head(10)

Unnamed: 0,GEOID,price_$,price_$$,price_$$$,price_$$$$,MISSING
0,36047000000.0,6,11,2,0,1
1,36047000000.0,5,8,0,0,12
2,36047000000.0,1,4,0,0,0
3,36047000000.0,1,8,0,0,2
4,36047000000.0,5,7,0,0,2
5,36047000000.0,3,6,2,0,2
6,36047000000.0,13,10,2,0,9
7,36047000000.0,23,16,0,0,13
8,36047000000.0,0,3,0,0,1
9,36047000000.0,4,31,1,0,12


In [449]:
bk.columns

Index(['id', 'alias', 'name', 'is_closed', 'review_cou', 'rating', 'price',
       'categories', 'latitude', 'longitude', 'address', 'city', 'zipcode',
       'state', 'country', 'index_righ', 'GEOID', 'ID_1', 'Geography',
       'Households', '0-25k', '25k-50k', '50k-75k', '75k-100k', '100k-125k',
       '125k-150k', '> 150k', 'geometry'],
      dtype='object')

In [450]:
bkcounts.GEOID.dtype

dtype('float64')

In [456]:
bkcounts['GEOID'] = bkcounts['GEOID'].astype(np.int64) #bkinc data frame GEOID is int64 type - change for merging

In [457]:
bkinc = pd.read_csv(path+'BK_hh_income_dis_count.csv')

In [458]:
bkinc.head()

Unnamed: 0,ID,Geography,Households,0-25k,25k-50k,50k-75k,75k-100k,100k-125k,125k-150k,> 150k
0,36047000100,"Census Tract 1, Kings County, New York",2184,0.212,0.099,0.087,0.184,0.066,0.07,0.281
1,36047000200,"Census Tract 2, Kings County, New York",377,0.263,0.305,0.207,0.069,0.125,0.0,0.032
2,36047000301,"Census Tract 3.01, Kings County, New York",1865,0.043,0.205,0.009,0.107,0.124,0.076,0.435
3,36047000501,"Census Tract 5.01, Kings County, New York",1772,0.188,0.042,0.094,0.056,0.095,0.117,0.407
4,36047000502,"Census Tract 5.02, Kings County, New York",1560,0.132,0.115,0.074,0.107,0.099,0.072,0.399


In [459]:
bkinc = bkinc.rename(columns={'ID':'GEOID'})

In [460]:
bk_price_inc = bkcounts.merge(bkinc, how='inner', on='GEOID')

In [461]:
bk_price_inc.head(10)

Unnamed: 0,GEOID,price_$,price_$$,price_$$$,price_$$$$,MISSING,Geography,Households,0-25k,25k-50k,50k-75k,75k-100k,100k-125k,125k-150k,> 150k
0,36047000100,6,11,2,0,1,"Census Tract 1, Kings County, New York",2184,0.212,0.099,0.087,0.184,0.066,0.07,0.281
1,36047000200,5,8,0,0,12,"Census Tract 2, Kings County, New York",377,0.263,0.305,0.207,0.069,0.125,0.0,0.032
2,36047000301,1,4,0,0,0,"Census Tract 3.01, Kings County, New York",1865,0.043,0.205,0.009,0.107,0.124,0.076,0.435
3,36047000501,1,8,0,0,2,"Census Tract 5.01, Kings County, New York",1772,0.188,0.042,0.094,0.056,0.095,0.117,0.407
4,36047000502,5,7,0,0,2,"Census Tract 5.02, Kings County, New York",1560,0.132,0.115,0.074,0.107,0.099,0.072,0.399
5,36047000700,3,6,2,0,2,"Census Tract 7, Kings County, New York",1781,0.07,0.089,0.076,0.072,0.079,0.084,0.531
6,36047000900,13,10,2,0,9,"Census Tract 9, Kings County, New York",1953,0.036,0.097,0.078,0.12,0.102,0.038,0.527
7,36047001100,23,16,0,0,13,"Census Tract 11, Kings County, New York",655,0.096,0.015,0.067,0.145,0.154,0.087,0.435
8,36047001300,0,3,0,0,1,"Census Tract 13, Kings County, New York",914,0.061,0.171,0.109,0.089,0.128,0.106,0.336
9,36047001500,4,31,1,0,12,"Census Tract 15, Kings County, New York",3185,0.208,0.122,0.085,0.076,0.113,0.08,0.314


In [462]:
bk_price_inc = bk_price_inc.rename(columns={"0-25k": "pct_0-25k", "25k-50k": "pct_25k-50k", '50k-75k':'pct_50k-75k', \
                                           '75k-100k':'pct_75k-100k', '100k-125k':'pct_100k-125k', '125k-150k':'pct_125k-150k',\
                                           '> 150k':'pct_> 150k'})

In [463]:
bk_price_inc['num_0-25k'] = bk_price_inc['Households']*bk_price_inc['pct_0-25k']
bk_price_inc['num_25k-50k'] = bk_price_inc['Households']*bk_price_inc['pct_25k-50k']
bk_price_inc['num_50k-75k'] = bk_price_inc['Households']*bk_price_inc['pct_50k-75k']
bk_price_inc['num_75k-100k'] = bk_price_inc['Households']*bk_price_inc['pct_75k-100k']
bk_price_inc['num_100k-125k'] = bk_price_inc['Households']*bk_price_inc['pct_100k-125k']
bk_price_inc['num_125k-150k'] = bk_price_inc['Households']*bk_price_inc['pct_125k-150k']
bk_price_inc['num_> 150k'] = bk_price_inc['Households']*bk_price_inc['pct_> 150k']

In [464]:
bk_price_inc.head()

Unnamed: 0,GEOID,price_$,price_$$,price_$$$,price_$$$$,MISSING,Geography,Households,pct_0-25k,pct_25k-50k,...,pct_100k-125k,pct_125k-150k,pct_> 150k,num_0-25k,num_25k-50k,num_50k-75k,num_75k-100k,num_100k-125k,num_125k-150k,num_> 150k
0,36047000100,6,11,2,0,1,"Census Tract 1, Kings County, New York",2184,0.212,0.099,...,0.066,0.07,0.281,463.008,216.216,190.008,401.856,144.144,152.88,613.704
1,36047000200,5,8,0,0,12,"Census Tract 2, Kings County, New York",377,0.263,0.305,...,0.125,0.0,0.032,99.151,114.985,78.039,26.013,47.125,0.0,12.064
2,36047000301,1,4,0,0,0,"Census Tract 3.01, Kings County, New York",1865,0.043,0.205,...,0.124,0.076,0.435,80.195,382.325,16.785,199.555,231.26,141.74,811.275
3,36047000501,1,8,0,0,2,"Census Tract 5.01, Kings County, New York",1772,0.188,0.042,...,0.095,0.117,0.407,333.136,74.424,166.568,99.232,168.34,207.324,721.204
4,36047000502,5,7,0,0,2,"Census Tract 5.02, Kings County, New York",1560,0.132,0.115,...,0.099,0.072,0.399,205.92,179.4,115.44,166.92,154.44,112.32,622.44


In [427]:
bk_price_inc.shape

(634, 22)

In [465]:
bk_price_inc.to_csv('BK_incdist_pricefreq.csv')
