# Clean Census Block Data
Step required to make the Probabilistic Housing Unit Allocation work.

Census Block Data needs to be combined with Census Place and PUMA data.

Initial Block Data provides state, county, tract, and block group information but does not identify the Census Place (City) or the PUMA (Public Use Microdata Area). 

Census Blocks are used to define both Place Boundaries and PUMA boundaries. The geographies should be "nested" without any overlap between polygons.
    

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import pysal as ps
import math as math
import numpy as np
import geopandas as gpd
import pandas as pd
import shapely
import descartes

import folium as fm # folium has more dynamic maps - but requires internet connection

import os # For saving output to path

  from .sqlite import head_to_sql, start_sql


In [2]:
# Display versions being used - important information for replication
import sys
print("Python Version     ", sys.version)
print("pysal version:     ", ps.__version__)
print("numpy version:     ", np.__version__)
print("geopandas version: ", gpd.__version__)
print("pandas version:    ", pd.__version__)
print("shapely version:   ", shapely.__version__)
# print("descartes version:   ", descartes.__version__)  1.1.0
print("folium version:    ", fm.__version__)

Python Version      3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]
pysal version:      2.0.0
numpy version:      1.16.4
geopandas version:  0.5.0
pandas version:     0.24.2
shapely version:    1.6.4.post1
folium version:     0.9.1


In [3]:
# Store Program Name for output files to have the same name
programname = "IN-CORE_1av2_Joplin_CleanBlockData_2019-07-10"
# Make directory to save output
if not os.path.exists(programname):
    os.mkdir(programname)

## Read in Census Block Data
Census Blocks provide an estimate of how many residiential address points (housing units) should be located in each block.

In [4]:
census_blocks_shp = '../../SourceData/www2.census.gov/geo/tiger/TIGER2010BLKPOPHU/tabblock2010_29_pophu/tabblock2010_29_pophu.shp'
census_blocks_gdf = gpd.read_file(census_blocks_shp)
census_blocks_gdf.head()

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,geometry
0,29,1,950100,1112,290019501001112,N,1,2,"POLYGON ((-92.504606 40.275872, -92.5046809999..."
1,29,1,950100,1071,290019501001071,N,4,13,"POLYGON ((-92.520138 40.330393, -92.520079 40...."
2,29,1,950100,1131,290019501001131,N,0,0,"POLYGON ((-92.582551 40.249861, -92.582589 40...."
3,29,1,950100,1109,290019501001109,N,4,5,"POLYGON ((-92.487843 40.258084, -92.487934 40...."
4,29,1,950100,1033,290019501001033,N,2,4,"POLYGON ((-92.62551099999999 40.3159, -92.6255..."


In [5]:
census_blocks_gdf.crs

{'init': 'epsg:4269'}

#### Note 
EPSG 4269 uses NAD 83 which will have slightly different lat lon points when compared to EPSG 4326 which uses WGS 84.

In [6]:
# Select Counties for Jasper County (29097) and Newton County (29145)
countyselect = ["145","097"]
census_blocks_gdf['CountySelect'] = np.where(census_blocks_gdf['COUNTYFP10'].isin(countyselect),1,0)
pd.crosstab(index=census_blocks_gdf['CountySelect'], columns="count")

col_0,count
CountySelect,Unnamed: 1_level_1
0,333944
1,9621


In [7]:
census_blocks_joplin_gdf = census_blocks_gdf[census_blocks_gdf['CountySelect'] == 1]
census_blocks_joplin_gdf.head()

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,geometry,CountySelect
148552,29,97,12100,1047,290970121001047,N,2,4,"POLYGON ((-94.13775 37.32550000000001, -94.138...",1
148553,29,97,12100,1050,290970121001050,N,3,7,"POLYGON ((-94.137637 37.328675, -94.119315 37....",1
148554,29,97,12100,1094,290970121001094,N,4,13,"POLYGON ((-94.214761 37.293836, -94.230751 37....",1
148555,29,97,12100,1093,290970121001093,N,5,5,"POLYGON ((-94.197294 37.27723599999999, -94.19...",1
148556,29,97,12100,1130,290970121001130,N,4,9,"POLYGON ((-94.151792 37.276275, -94.1519139999...",1


In [8]:
# Add Representative Point
census_blocks_joplin_gdf.loc[census_blocks_joplin_gdf.index, 'rppnt4269'] = census_blocks_joplin_gdf['geometry'].representative_point()
census_blocks_joplin_gdf['rppnt4269'].label = "Representative Point EPSG 4269 (WKT)"
census_blocks_joplin_gdf['rppnt4269'].notes = "Internal Point within census block poly EPSG 4269"

# Add Column that Duplicates Polygon Geometry - allows for swithcing between point and polygon geometries for spatial join
census_blocks_joplin_gdf.loc[census_blocks_joplin_gdf.index, 'blk104269'] = census_blocks_joplin_gdf['geometry']
census_blocks_joplin_gdf['blk104269'].label = "2010 Census Block Polygon EPSG 4269 (WKT)"
census_blocks_joplin_gdf['blk104269'].notes = "Polygon Shape Points for 2010 Census Block EPSG 4269"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


## Add Address Point Count that Includes Group Quarters

In [9]:
census_addresspoints = 'IN-CORE_2bv1_Joplin_BlockAPCounts_2019-06-10/IN-CORE_2bv1_Joplin_BlockAPCounts_2019-06-10.csv'
census_addresspoints = pd.read_csv(census_addresspoints)
census_addresspoints.head()

Unnamed: 0,blockid,apcount,pop10,gqpop10
0,290970101001000,3,5,0
1,290970101001001,1,1,0
2,290970101001002,2,9,0
3,290970101001003,21,40,0
4,290970101001006,6,12,0


In [10]:
# Merge ID - Block ID - Needs to be a string
census_addresspoints['blockid'].dtype

dtype('int64')

In [11]:
# Convert blockid Parcel ID to a String
census_addresspoints['BLOCKID10'] = census_addresspoints['blockid'].apply(lambda x : str((x)))
census_addresspoints['BLOCKID10'].dtype

dtype('O')

In [12]:
census_blocks_joplin_gdf['BLOCKID10'].dtype

dtype('O')

In [13]:
# Merge Address Point Count with Block Data 
census_blocks_joplin_gdf = pd.merge(census_blocks_joplin_gdf, census_addresspoints,
                                  left_on='BLOCKID10', right_on='BLOCKID10', how='left')
census_blocks_joplin_gdf[['BLOCKID10']].describe()

Unnamed: 0,BLOCKID10
count,9621
unique,9621
top,290970117005022
freq,1


In [14]:
census_blocks_joplin_gdf.head()

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,geometry,CountySelect,rppnt4269,blk104269,blockid,apcount,pop10,gqpop10
0,29,97,12100,1047,290970121001047,N,2,4,"POLYGON ((-94.13775 37.32550000000001, -94.138...",1,POINT (-94.14712692867573 37.320531),"POLYGON ((-94.13775 37.32550000000001, -94.138...",290970100000000.0,2.0,4.0,0.0
1,29,97,12100,1050,290970121001050,N,3,7,"POLYGON ((-94.137637 37.328675, -94.119315 37....",1,POINT (-94.12889453205761 37.316266),"POLYGON ((-94.137637 37.328675, -94.119315 37....",290970100000000.0,3.0,7.0,0.0
2,29,97,12100,1094,290970121001094,N,4,13,"POLYGON ((-94.214761 37.293836, -94.230751 37....",1,POINT (-94.22108805775474 37.302019),"POLYGON ((-94.214761 37.293836, -94.230751 37....",290970100000000.0,4.0,13.0,0.0
3,29,97,12100,1093,290970121001093,N,5,5,"POLYGON ((-94.197294 37.27723599999999, -94.19...",1,POINT (-94.20604827951779 37.2853095),"POLYGON ((-94.197294 37.27723599999999, -94.19...",290970100000000.0,5.0,5.0,0.0
4,29,97,12100,1130,290970121001130,N,4,9,"POLYGON ((-94.151792 37.276275, -94.1519139999...",1,POINT (-94.16736082693089 37.276177),"POLYGON ((-94.151792 37.276275, -94.1519139999...",290970100000000.0,4.0,9.0,0.0


In [15]:
# Compare Population Counts - they should be equal - differences come from Households with more than 7 people
census_blocks_joplin_gdf['popdiff'] = census_blocks_joplin_gdf['POP10'] - census_blocks_joplin_gdf['pop10']
census_blocks_joplin_gdf['popdiff'].describe()

count    6324.000000
mean        0.147691
std         0.828869
min         0.000000
25%         0.000000
50%         0.000000
75%         0.000000
max        35.000000
Name: popdiff, dtype: float64

## Add Place Name (Cities) To Blocks
### Read in place polygons for state and select places in study area
Place names provide link to population demographics for cities and places defined by the Census. The Census communicates with cities and updates city boundaries based on policitical boundaries set by communities.

In [16]:
# Location of Place Names Defined By US Census
census_place_shp = '../../SourceData/www2.census.gov/geo/tiger/TIGER2010/PLACE/2010/tl_2010_29_place10/tl_2010_29_place10.shp'
census_place_gdf = gpd.read_file(census_place_shp)
census_place_gdf.head()

Unnamed: 0,STATEFP10,PLACEFP10,PLACENS10,GEOID10,NAME10,NAMELSAD10,LSAD10,CLASSFP10,PCICBSA10,PCINECTA10,MTFCC10,FUNCSTAT10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,geometry
0,29,56620,2585133,2956620,Peaceful Village,Peaceful Village village,47,C1,N,N,G4110,A,442829,0,38.4676529,-90.5427132,"POLYGON ((-90.53806999999999 38.46518, -90.538..."
1,29,40214,2585135,2940214,Lake Tekakwitha,Lake Tekakwitha village,47,C1,N,N,G4110,A,634746,98224,38.4424234,-90.7177452,"POLYGON ((-90.719143 38.438113, -90.7190459999..."
2,29,66337,2396909,2966337,Scotsdale,Scotsdale town,43,C1,N,N,G4110,A,1903806,0,38.3912051,-90.5908242,"POLYGON ((-90.593825 38.384454, -90.5940689999..."
3,29,57278,2396196,2957278,Pevely,Pevely city,25,C1,N,N,G4110,A,11769121,391052,38.2873675,-90.3990884,"POLYGON ((-90.396846 38.305376, -90.396732 38...."
4,29,56226,2399623,2956226,Parkdale,Parkdale village,47,C1,N,N,G4110,A,326303,0,38.4808895,-90.527063,"POLYGON ((-90.529484 38.484091, -90.5289329999..."


In [17]:
census_place_gdf['PLACEFP10'].describe()

count      1032
unique     1032
top       24760
freq          1
Name: PLACEFP10, dtype: object

In [18]:
census_place_gdf.crs

{'init': 'epsg:4269'}

In [19]:
census_blocks_joplin_gdf.crs

{'init': 'epsg:4269'}

In [20]:
# Find the bounds of the Census Block File
minx = census_blocks_joplin_gdf.bounds.minx.min()
miny = census_blocks_joplin_gdf.bounds.miny.min()
maxx = census_blocks_joplin_gdf.bounds.maxx.max()
maxy = census_blocks_joplin_gdf.bounds.maxy.max()
census_blocks_joplin_gdf_bounds = [minx, miny, maxx, maxy]
census_blocks_joplin_gdf_bounds

[-94.61959399999999, 36.747817999999995, -94.052313, 37.364173]

In [21]:
# Select Places within Bounds of Study Area
# build the r-tree index - for Places
sindex_census_place_gdf = census_place_gdf.sindex
possible_matches_index = list(sindex_census_place_gdf.intersection(census_blocks_joplin_gdf_bounds))
joplin_area_census_place_gdf = census_place_gdf.iloc[possible_matches_index]
joplin_area_census_place_gdf['NAME10'].describe()

count            44
unique           44
top       Neck City
freq              1
Name: NAME10, dtype: object

In [22]:
# plot the intersections and the city
census_place_gdf_map = fm.Map(location=[(miny+maxy)/2,(minx+maxx)/2], zoom_start=10)
fm.GeoJson(joplin_area_census_place_gdf).add_to(census_place_gdf_map)
display(census_place_gdf_map)

### Spatial Join Place Names to Block IDS

In [23]:
# Confirm Count of Unique ID in layer to which data will be added
census_blocks_joplin_gdf['BLOCKID10'].describe()

count                9621
unique               9621
top       290970117005022
freq                    1
Name: BLOCKID10, dtype: object

In [24]:
# build the r-tree index - Using Representative Point
census_blocks_joplin_gdf.loc[census_blocks_joplin_gdf.index,'geometry'] = census_blocks_joplin_gdf['rppnt4269']
sindex_census_blocks_joplin_gdf = census_blocks_joplin_gdf.sindex

# find the points that intersect with each subpolygon and add ID to Point
for index, place in joplin_area_census_place_gdf.iterrows():
    # print(place['NAME10'])

    # find approximate matches with r-tree, then precise matches from those approximate ones
    possible_matches_index = list(sindex_census_blocks_joplin_gdf.intersection(place['geometry'].bounds))
    possible_matches = census_blocks_joplin_gdf.iloc[possible_matches_index]
    precise_matches = possible_matches[possible_matches.intersects(place['geometry'])]
    census_blocks_joplin_gdf.loc[precise_matches.index,'PLCGEOID10'] = place['GEOID10']
    census_blocks_joplin_gdf.loc[precise_matches.index,'PLCNAME10'] = place['NAME10']

In [25]:
# Confirm Count of Unique ID in layer to which data will be added
census_blocks_joplin_gdf['BLOCKID10'].describe()

count                9621
unique               9621
top       290970117005022
freq                    1
Name: BLOCKID10, dtype: object

In [26]:
census_blocks_joplin_gdf['PLCGEOID10'].describe()

count        5377
unique         42
top       2937592
freq         2025
Name: PLCGEOID10, dtype: object

In [27]:
# Switch Block Geography back to polygons
census_blocks_joplin_gdf.loc[census_blocks_joplin_gdf.index,'geometry'] = census_blocks_joplin_gdf['blk104269']
census_blocks_joplin_gdf.head()

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,geometry,CountySelect,rppnt4269,blk104269,blockid,apcount,pop10,gqpop10,popdiff,PLCGEOID10,PLCNAME10
0,29,97,12100,1047,290970121001047,N,2,4,"POLYGON ((-94.13775 37.32550000000001, -94.138...",1,POINT (-94.14712692867573 37.320531),"POLYGON ((-94.13775 37.32550000000001, -94.138...",290970100000000.0,2.0,4.0,0.0,0.0,,
1,29,97,12100,1050,290970121001050,N,3,7,"POLYGON ((-94.137637 37.328675, -94.119315 37....",1,POINT (-94.12889453205761 37.316266),"POLYGON ((-94.137637 37.328675, -94.119315 37....",290970100000000.0,3.0,7.0,0.0,0.0,,
2,29,97,12100,1094,290970121001094,N,4,13,"POLYGON ((-94.214761 37.293836, -94.230751 37....",1,POINT (-94.22108805775474 37.302019),"POLYGON ((-94.214761 37.293836, -94.230751 37....",290970100000000.0,4.0,13.0,0.0,0.0,,
3,29,97,12100,1093,290970121001093,N,5,5,"POLYGON ((-94.197294 37.27723599999999, -94.19...",1,POINT (-94.20604827951779 37.2853095),"POLYGON ((-94.197294 37.27723599999999, -94.19...",290970100000000.0,5.0,5.0,0.0,0.0,,
4,29,97,12100,1130,290970121001130,N,4,9,"POLYGON ((-94.151792 37.276275, -94.1519139999...",1,POINT (-94.16736082693089 37.276177),"POLYGON ((-94.151792 37.276275, -94.1519139999...",290970100000000.0,4.0,9.0,0.0,0.0,,


In [28]:
# Look at One Place plot the intersections and the city
neosho_place_gdf_map = fm.Map(location=[(miny+maxy)/2,(minx+maxx)/2], zoom_start=10)
census_blocks_neosho_gdf = census_blocks_joplin_gdf[census_blocks_joplin_gdf['PLCNAME10'].notnull()]
blockstyle_function = lambda x: {'color':'green','fillColor': 'transparent' }
placetooltip=fm.features.GeoJsonTooltip(fields=['NAME10'],
                                              aliases = ['Place Name'],
                                              labels=True,
                                              sticky=False
                                             )
fm.GeoJson(census_blocks_neosho_gdf['geometry'],name='Census Blocks',style_function=blockstyle_function).add_to(neosho_place_gdf_map)
fm.GeoJson(joplin_area_census_place_gdf,name='Census Places',tooltip=placetooltip).add_to(neosho_place_gdf_map)
fm.LayerControl().add_to(neosho_place_gdf_map)
neosho_place_gdf_map.save(programname+'census_blocks_places.html')
# Error Displaying Map display(neosho_place_gdf_map)

## How many blocks do not have place names?

In [29]:
census_blocks_joplin_gdf[census_blocks_joplin_gdf['PLCNAME10'].isnull()].describe()

Unnamed: 0,HOUSING10,POP10,CountySelect,blockid,apcount,pop10,gqpop10,popdiff
count,4244.0,4244.0,4244.0,2484.0,2484.0,2484.0,2484.0,2484.0
mean,5.606503,13.801367,1.0,291229300000000.0,9.580918,23.428341,0.058374,0.151771
std,11.570734,29.076377,0.0,239328700000.0,13.817094,34.634704,1.499166,1.036542
min,0.0,0.0,1.0,290970100000000.0,1.0,0.0,0.0,0.0
25%,0.0,0.0,1.0,290970100000000.0,2.0,4.0,0.0,0.0
50%,1.0,2.0,1.0,291450200000000.0,5.0,11.0,0.0,0.0
75%,6.0,15.0,1.0,291450200000000.0,12.0,29.0,0.0,0.0
max,209.0,585.0,1.0,291450200000000.0,209.0,585.0,46.0,35.0


## How many places do not have blocks?

In [30]:
# Collapse Blocks By Place Name and Count Blocks 
census_blocks_joplin_gdf_blockcount = census_blocks_joplin_gdf[['PLCNAME10']]
census_blocks_joplin_gdf_blockcount['block_count'] = 1
census_blocks_joplin_gdf_blockcount_sum = census_blocks_joplin_gdf_blockcount.groupby(['PLCNAME10']).sum()
census_blocks_joplin_gdf_blockcount_sum.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,block_count
PLCNAME10,Unnamed: 1_level_1
Airport Drive,32
Alba,31
Asbury,32
Avilla,11
Brooklyn Heights,9


In [31]:
# Add Block Count to Place Data
joplin_area_census_place_gdf_checkcount = pd.merge(joplin_area_census_place_gdf, census_blocks_joplin_gdf_blockcount_sum,
                                  left_on='NAME10', right_on='PLCNAME10', how='left')
joplin_area_census_place_gdf_checkcount.loc[joplin_area_census_place_gdf_checkcount['block_count'].isnull()]

Unnamed: 0,STATEFP10,PLACEFP10,PLACENS10,GEOID10,NAME10,NAMELSAD10,LSAD10,CLASSFP10,PCICBSA10,PCINECTA10,MTFCC10,FUNCSTAT10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,geometry,block_count
0,29,27874,2396964,2927874,Goodman,Goodman town,43,C1,N,N,G4110,A,3436505,0,36.7388212,-94.4091673,"POLYGON ((-94.42005999999999 36.745295, -94.41...",
1,29,79126,2397296,2979126,Wheaton,Wheaton city,25,C1,N,N,G4110,A,1316218,0,36.7616133,-94.0569912,"POLYGON ((-94.06256599999999 36.76087, -94.063...",


## Add PUMA ID To Blocks
### Read in PUMA polygons for state and select places in study area

In [32]:
# Location of PUMA Polygons Defined By US Census
census_puma_shp = '../../SourceData/www2.census.gov/geo/tiger/TIGER2010/PUMA5/2010/tl_2010_29_puma10/tl_2010_29_puma10.shp'
census_puma_gdf = gpd.read_file(census_puma_shp)
census_puma_gdf.head()

Unnamed: 0,STATEFP10,PUMACE10,GEOID10,NAMELSAD10,MTFCC10,FUNCSTAT10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,geometry
0,29,100,2900100,Northwest Missouri PUMA,G6120,S,18474488878,112659093,40.2124722,-94.0205197,"POLYGON ((-94.606122 40.095371, -94.6060919999..."
1,29,200,2900200,"Buchanan, Andrew & DeKalb Counties PUMA",G6120,S,3268786601,38387846,39.8465209,-94.6752059,"POLYGON ((-94.496583 39.747664, -94.496591 39...."
2,29,300,2900300,Northeast Missouri PUMA,G6120,S,14714991008,201603060,39.9808852,-92.083595,"POLYGON ((-91.43511699999999 40.384973, -91.43..."
3,29,400,2900400,"Lincoln, Warren, Audrain, Pike & Montgomery Co...",G6120,S,7651023378,123547548,39.0942668,-91.3420637,"POLYGON ((-91.418978 39.474644, -91.4166089999..."
4,29,500,2900500,"Cole, Callaway, Moniteau & Osage Counties PUMA",G6120,S,5821505865,80052982,38.6371207,-92.0892275,"POLYGON ((-92.21836499999999 38.687004, -92.21..."


In [33]:
census_puma_gdf['GEOID10'].describe()

count          47
unique         47
top       2901801
freq            1
Name: GEOID10, dtype: object

In [34]:
census_puma_gdf.crs

{'init': 'epsg:4269'}

In [35]:
census_blocks_joplin_gdf.crs

{'init': 'epsg:4269'}

In [36]:
# Find the bounds of the Census Block File
minx = census_blocks_joplin_gdf.bounds.minx.min()
miny = census_blocks_joplin_gdf.bounds.miny.min()
maxx = census_blocks_joplin_gdf.bounds.maxx.max()
maxy = census_blocks_joplin_gdf.bounds.maxy.max()
census_blocks_joplin_gdf_bounds = [minx, miny, maxx, maxy]
census_blocks_joplin_gdf_bounds

[-94.61959399999999, 36.747817999999995, -94.052313, 37.364173]

In [37]:
# Select pumas within Bounds of Study Area
# build the r-tree index - for pumas
sindex_census_puma_gdf = census_puma_gdf.sindex
possible_matches_index = list(sindex_census_puma_gdf.intersection(census_blocks_joplin_gdf_bounds))
joplin_area_census_puma_gdf = census_puma_gdf.iloc[possible_matches_index]
joplin_area_census_puma_gdf['GEOID10'].describe()

count           3
unique          3
top       2901200
freq            1
Name: GEOID10, dtype: object

In [38]:
# plot the intersections and the city
census_puma_gdf_map = fm.Map(location=[(miny+maxy)/2,(minx+maxx)/2], zoom_start=10)
fm.GeoJson(joplin_area_census_puma_gdf).add_to(census_puma_gdf_map)
display(census_puma_gdf_map)

### Spatial Join PUMA ID to Block IDS

In [39]:
# Confirm Count of Unique ID in layer to which data will be added
census_blocks_joplin_gdf['BLOCKID10'].describe()

count                9621
unique               9621
top       290970117005022
freq                    1
Name: BLOCKID10, dtype: object

In [40]:
# build the r-tree index - Using Representative Point
census_blocks_joplin_gdf.loc[census_blocks_joplin_gdf.index,'geometry'] = census_blocks_joplin_gdf['rppnt4269']
sindex_census_blocks_joplin_gdf = census_blocks_joplin_gdf.sindex

# find the points that intersect with each subpolygon and add ID to Point
for index, puma in joplin_area_census_puma_gdf.iterrows():

    
    # find approximate matches with r-tree, then precise matches from those approximate ones
    possible_matches_index = list(sindex_census_blocks_joplin_gdf.intersection(puma['geometry'].bounds))
    possible_matches = census_blocks_joplin_gdf.iloc[possible_matches_index]
    precise_matches = possible_matches[possible_matches.intersects(puma['geometry'])]
    census_blocks_joplin_gdf.loc[precise_matches.index,'PUMGEOID10'] = puma['GEOID10']
    census_blocks_joplin_gdf.loc[precise_matches.index,'PUMNAME10'] = puma['NAMELSAD10']

In [41]:
# Confirm Count of Unique ID in layer to which data will be added
census_blocks_joplin_gdf['BLOCKID10'].describe()

count                9621
unique               9621
top       290970117005022
freq                    1
Name: BLOCKID10, dtype: object

In [42]:
census_blocks_joplin_gdf['PUMGEOID10'].describe()

count        9621
unique          1
top       2902800
freq         9621
Name: PUMGEOID10, dtype: object

In [43]:
# Switch Block Geography back to polygons
census_blocks_joplin_gdf.loc[census_blocks_joplin_gdf.index,'geometry'] = census_blocks_joplin_gdf['blk104269']
census_blocks_joplin_gdf.head()

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,geometry,CountySelect,...,blk104269,blockid,apcount,pop10,gqpop10,popdiff,PLCGEOID10,PLCNAME10,PUMGEOID10,PUMNAME10
0,29,97,12100,1047,290970121001047,N,2,4,"POLYGON ((-94.13775 37.32550000000001, -94.138...",1,...,"POLYGON ((-94.13775 37.32550000000001, -94.138...",290970100000000.0,2.0,4.0,0.0,0.0,,,2902800,Jasper & Newton Counties PUMA
1,29,97,12100,1050,290970121001050,N,3,7,"POLYGON ((-94.137637 37.328675, -94.119315 37....",1,...,"POLYGON ((-94.137637 37.328675, -94.119315 37....",290970100000000.0,3.0,7.0,0.0,0.0,,,2902800,Jasper & Newton Counties PUMA
2,29,97,12100,1094,290970121001094,N,4,13,"POLYGON ((-94.214761 37.293836, -94.230751 37....",1,...,"POLYGON ((-94.214761 37.293836, -94.230751 37....",290970100000000.0,4.0,13.0,0.0,0.0,,,2902800,Jasper & Newton Counties PUMA
3,29,97,12100,1093,290970121001093,N,5,5,"POLYGON ((-94.197294 37.27723599999999, -94.19...",1,...,"POLYGON ((-94.197294 37.27723599999999, -94.19...",290970100000000.0,5.0,5.0,0.0,0.0,,,2902800,Jasper & Newton Counties PUMA
4,29,97,12100,1130,290970121001130,N,4,9,"POLYGON ((-94.151792 37.276275, -94.1519139999...",1,...,"POLYGON ((-94.151792 37.276275, -94.1519139999...",290970100000000.0,4.0,9.0,0.0,0.0,,,2902800,Jasper & Newton Counties PUMA


In [44]:
# Save Work at this point as CSV
savefile = sys.path[0]+"/"+programname+"/"+programname+"EPSG4269.csv"
census_blocks_joplin_gdf.to_csv(savefile)