The purpose of this notebook is to merge the ACS data with the 2019 TIGER/Line shapefile of Census Tracts in New York. This enables further merging and cross-analysis with the georeferenced Yelp data. 

In [2]:
# Setting up modules
import geopandas as gpd
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

# Data path 
path = './data/'

In [17]:
# Import data
NY_state_census_tracts = gpd.read_file(path + "tl_2019_36_tract.shp")
MN_Yelp = gpd.read_file(path + "MN_Yelp.shp")
BK_Yelp = gpd.read_file(path + "BK_Yelp.shp")
BK_hh_income_dis_pct = pd.read_csv(path + 'BK_hh_income_dis_pct.csv')
BK_hh_income_dis_pct = pd.DataFrame(BK_hh_income_dis_pct)

##NY_state_census_tracts.head()
##MN_Yelp.head()

In [18]:
BK_hh_income_dis_pct.head()

Unnamed: 0,ID,Geography,Households,0-25k,25k-50k,50k-75k,75k-100k,100k-125k,125k-150k,> 150k
0,36047000100,"Census Tract 1, Kings County, New York",2184,0.212,0.099,0.087,0.184,0.066,0.07,0.281
1,36047000200,"Census Tract 2, Kings County, New York",377,0.263,0.305,0.207,0.069,0.125,0.0,0.032
2,36047000301,"Census Tract 3.01, Kings County, New York",1865,0.043,0.205,0.009,0.107,0.124,0.076,0.435
3,36047000501,"Census Tract 5.01, Kings County, New York",1772,0.188,0.042,0.094,0.056,0.095,0.117,0.407
4,36047000502,"Census Tract 5.02, Kings County, New York",1560,0.132,0.115,0.074,0.107,0.099,0.072,0.399


In [22]:
# Filter for NYC (Manhattan and Brooklyn)

BK = NY_state_census_tracts[NY_state_census_tracts["COUNTYFP"] == '047']
MN = NY_state_census_tracts[NY_state_census_tracts["COUNTYFP"] == '061']

# Cutting out all of the crap
MN = MN[["GEOID", "geometry"]]
BK = BK[["GEOID", "geometry"]]

# Converting type of column
# int wont work because it is too long
BK.GEOID = BK.GEOID.astype(float)
# Merging
BK = BK.merge(BK_hh_income_dis_pct, left_on = 'GEOID', right_on = 'ID')

In [24]:
# Are the CRSes identical?
MN.crs == MN_Yelp.crs
BK.crs == BK_Yelp.crs

True

In [31]:
# Spatial join
MN_Yelp_joined = gpd.sjoin(MN_Yelp, MN, how = 'inner', op = 'within')
MN_Yelp_joined.head(2)
BK_Yelp_joined = gpd.sjoin(BK_Yelp, BK, how = 'inner', op = 'within')
BK_Yelp_joined.head(2)


Unnamed: 0,id,alias,name,is_closed,review_cou,rating,price,categories,latitude,longitude,...,ID,Geography,Households,0-25k,25k-50k,50k-75k,75k-100k,100k-125k,125k-150k,> 150k
0,6gzQLjzJk25ePm_JS7ZAug,esme-brooklyn-2,Esme,0,328,4.5,$$,newamerican|cocktailbars,40.733203,-73.954967,...,36047056300,"Census Tract 563, Kings County, New York",2250,0.285,0.114,0.13,0.116,0.084,0.084,0.186
2,utM-5navObsVA5sCRHobzA,madre-brooklyn-2,Madre,0,38,5.0,MISSING,newamerican,40.73311,-73.95798,...,36047056300,"Census Tract 563, Kings County, New York",2250,0.285,0.114,0.13,0.116,0.084,0.084,0.186


In [32]:
BK_Yelp_joined.shape

(5824, 28)

In [33]:
# Saving to file as .shp
## Impt to set encoding = 'utf-8' or it will throw a codec error
MN_Yelp_joined.to_file(path + 'MN_Yelp_CensusTract.shp', driver='ESRI Shapefile', encoding = 'utf-8')
BK_Yelp_joined.to_file(path + 'BK_Yelp_CensusTract.shp', driver='ESRI Shapefile', encoding = 'utf-8')
