<a href="https://colab.research.google.com/github/npr99/IN-CORE_notebooks/blob/main/IN_CORE_1cv2_Lumberton_CleanBuildingInventory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Clean Building Inventory
Step required to make the Probabilistic Housing Unit Allocation work.

The Building Inventory
    
    1. Read in Building Inventory
    2. Check to make sure building inventory has a unique id
    3. Add representative point and polygon WKTs
    4. Add Census Block ID
    

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import math as math
import numpy as np
import geopandas as gpd
import pandas as pd
import shapely
import descartes

import folium as fm # folium has more dynamic maps - but requires internet connection

import os # For saving output to path



In [None]:
# Display versions being used - important information for replication
import sys
print("Python Version     ", sys.version)
print("numpy version:     ", np.__version__)
print("geopandas version: ", gpd.__version__)
print("pandas version:    ", pd.__version__)
print("shapely version:   ", shapely.__version__)
# print("descartes version:   ", descartes.__version__)  1.1.0
print("folium version:    ", fm.__version__)

Python Version      3.7.10 | packaged by conda-forge | (default, Feb 19 2021, 15:37:01) [MSC v.1916 64 bit (AMD64)]
numpy version:      1.20.1
geopandas version:  0.9.0
pandas version:     0.24.2
shapely version:    1.7.1
folium version:     0.9.1


In [None]:
# Store Program Name for output files to have the same name
programname = "IN-CORE_1cv2_Lumberton_CleanBuildingInventory_2021-04-15"
# Make directory to save output
if not os.path.exists(programname):
    os.mkdir(programname)

## Setup access to IN-CORE
https://incore.ncsa.illinois.edu/ 

In [None]:
from pyincore import IncoreClient, Dataset, FragilityService, MappingSet, DataService
from pyincore_viz.geoutil import GeoUtil as viz

In [None]:
client = IncoreClient()
# IN-CORE chaches files on the local machine, it might be necessary to clear the memory
#client.clear_cache()

Enter username: natrose
Enter password: ········
Connection successful to IN-CORE services. pyIncore version detected: 0.9.0


In [None]:
# create data_service object for loading files
data_service = DataService(client)

## Read in Building Inventory
The building inventory provide basic understanding of where address points can be located.

In [None]:
# Lumberton, NC Building inventory
bldg_inv_id = "6036c2a9e379f22e1658d451" 
# load building inventory
bldg_inv = Dataset.from_data_service(bldg_inv_id, data_service)
filename = bldg_inv.get_file_path('shp')
print("The IN-CORE Dataservice has saved the Building Inventory on your local machine: "+filename)

The IN-CORE Dataservice has saved the Building Inventory on your local machine: C:\Users\nathanael99\.incore\cache_data\6036c2a9e379f22e1658d451\lumberton-bldg-v7\lumberton-bldg-v7.shp


In [None]:
bldg_inv_gdf = gpd.read_file(filename)
bldg_inv_gdf.crs = {'init': 'epsg:4326'}
bldg_inv_gdf.head()

Unnamed: 0,ffe_elev,archetype,parid,struct_typ,no_stories,a_stories,b_stories,bsmt_type,sq_foot,gsq_foot,...,strctid,appr_bldg,appr_land,appr_tot,year_built,lhsm_elev,g_elev,guid,age_group,geometry
0,0.0,0,,,0,,,,20,20,...,,,,,,,,2d32aeff-7b75-47e6-b7a5-4f4adca4b021,,POINT (-78.99633 34.65436)
1,0.0,8,,,0,,,,0,0,...,,,,,,,,78e8556b-15b3-45e9-a72d-dba53a188b8d,,POINT (-79.01852 34.64057)
2,0.0,8,,,0,,,,0,0,...,,,,,,,,6b481629-e0c6-48f6-b1ce-d57f65d35cb6,,POINT (-79.02847 34.60277)
3,36.8808,0,,,1,,,,0,0,...,,,,,,,,a6875194-ad6b-4061-9855-fe8a8b0f5ba6,,POINT (-79.05967 34.61999)
4,31.78537,2,3715560148.0,,1,,,,1128,1128,...,,,,,1988.0,,,3928ae4d-4450-427f-8fc3-2294d36879f8,3.0,POINT (-78.94659 34.55213)


In [None]:
# lok at Archtypes
#pd.crosstab(index=bldg_inv_gdf.archetype, columns="count")
bldg_inv_gdf.groupby(['archetype']).count()

Unnamed: 0_level_0,ffe_elev,parid,struct_typ,no_stories,a_stories,b_stories,bsmt_type,sq_foot,gsq_foot,occ_type,...,strctid,appr_bldg,appr_land,appr_tot,year_built,lhsm_elev,g_elev,guid,age_group,geometry
archetype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,25,23,0,25,0,0,0,25,25,24,...,0,0,0,0,23,0,0,25,23,25
1,6070,6070,0,6070,0,0,0,6070,6070,6070,...,0,0,0,0,6061,0,0,6070,6061,6070
2,10273,10273,0,10273,0,0,0,10273,10273,10273,...,0,0,0,0,10258,0,0,10273,10258,10273
3,249,249,0,249,0,0,0,249,249,249,...,0,0,0,0,249,0,0,249,249,249
4,1391,1391,0,1391,0,0,0,1391,1391,1391,...,0,0,0,0,1387,0,0,1391,1387,1391
5,1060,1060,0,1060,0,0,0,1060,1060,1060,...,0,0,0,0,1060,0,0,1060,1060,1060
6,9,9,0,9,0,0,0,9,9,9,...,0,0,0,0,9,0,0,9,9,9
7,10,10,0,10,0,0,0,10,10,10,...,0,0,0,0,10,0,0,10,10,10
8,14,12,0,14,0,0,0,14,14,12,...,0,0,0,0,12,0,0,14,12,14
9,149,149,0,149,0,0,0,149,149,149,...,0,0,0,0,149,0,0,149,149,149


## Check Unique Building ID
Building ID will be important for linking Address Point Inventory to Buildings and Critical Infrastructure Inventories to Buildings.

ID must be unique and non-missing.

In [None]:
# Count the number of Unique Values
bldg_inv_gdf.guid.describe()

count                                    20091
unique                                   20091
top       a80a0280-ee59-4ce7-8f07-9ed9472eaa48
freq                                         1
Name: guid, dtype: object

In [None]:
# Count the number of Unique Values
bldg_inv_gdf.guid.nunique()

20091

In [None]:
# Are there any missing values for the unique id?
bldg_inv_gdf.loc[bldg_inv_gdf.guid.isnull()]

Unnamed: 0,ffe_elev,archetype,parid,struct_typ,no_stories,a_stories,b_stories,bsmt_type,sq_foot,gsq_foot,...,strctid,appr_bldg,appr_land,appr_tot,year_built,lhsm_elev,g_elev,guid,age_group,geometry


In [None]:
# Move Primary Key Column Building ID to first Column
cols = ['guid']  + [col for col in bldg_inv_gdf if col != 'guid']
cols
bldg_inv_gdf = bldg_inv_gdf[cols]
bldg_inv_gdf.head()

Unnamed: 0,guid,ffe_elev,archetype,parid,struct_typ,no_stories,a_stories,b_stories,bsmt_type,sq_foot,...,occ_typ2,strctid,appr_bldg,appr_land,appr_tot,year_built,lhsm_elev,g_elev,age_group,geometry
0,2d32aeff-7b75-47e6-b7a5-4f4adca4b021,0.0,0,,,0,,,,20,...,,,,,,,,,,POINT (-78.99633 34.65436)
1,78e8556b-15b3-45e9-a72d-dba53a188b8d,0.0,8,,,0,,,,0,...,,,,,,,,,,POINT (-79.01852 34.64057)
2,6b481629-e0c6-48f6-b1ce-d57f65d35cb6,0.0,8,,,0,,,,0,...,,,,,,,,,,POINT (-79.02847 34.60277)
3,a6875194-ad6b-4061-9855-fe8a8b0f5ba6,36.8808,0,,,1,,,,0,...,,,,,,,,,,POINT (-79.05967 34.61999)
4,3928ae4d-4450-427f-8fc3-2294d36879f8,31.78537,2,3715560148.0,,1,,,,1128,...,,,,,,1988.0,,,3.0,POINT (-78.94659 34.55213)


In [None]:
# Confirm Building ID is Unique and Non-Missing
bldg_inv_gdf.guid.describe()

count                                    20091
unique                                   20091
top       a80a0280-ee59-4ce7-8f07-9ed9472eaa48
freq                                         1
Name: guid, dtype: object

## Read in Census Block Data
Census Blocks provide an estimate of how many residiential address points (housing units) should be located in each block.

In [None]:
source_program = 'IN-CORE_1av2_Lumberton_CleanBlockData_2021-04-15'
census_blocks_csv = source_program+"/"+source_program+"EPSG4269.csv"
census_blocks_df = pd.read_csv(census_blocks_csv)
census_blocks_gdf = gpd.GeoDataFrame(census_blocks_df)
census_blocks_gdf.head()

Unnamed: 0.1,Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,geometry,...,blockid,tothupoints,popcount,HU100,POP100,popdiff,PLCGEOID10,PLCNAME10,PUMGEOID10,PUMNAME10
0,0,37,155,961900,2028,371559619002028,N,14,52,"POLYGON ((-79.22246 34.458838, -79.222528 34.4...",...,371559619002028,14,51,14,52,1,,,3705100,Robeson County (West)--Lumberton City PUMA
1,1,37,155,961900,2054,371559619002054,N,1,3,"POLYGON ((-79.179851 34.40192, -79.180036 34.4...",...,371559619002054,1,3,1,3,0,,,3705100,Robeson County (West)--Lumberton City PUMA
2,2,37,155,961700,2069,371559617002069,N,41,99,"POLYGON ((-79.172814 34.480917, -79.172749 34....",...,371559617002069,41,99,41,99,0,,,3705100,Robeson County (West)--Lumberton City PUMA
3,3,37,155,961700,2065,371559617002065,N,6,22,"POLYGON ((-79.15764299999999 34.503279, -79.15...",...,371559617002065,6,21,6,22,1,,,3705100,Robeson County (West)--Lumberton City PUMA
4,4,37,155,961700,2058,371559617002058,N,19,55,"POLYGON ((-79.15830299999999 34.497355, -79.15...",...,371559617002058,19,55,19,55,0,,,3705100,Robeson County (West)--Lumberton City PUMA


In [None]:
census_blocks_gdf.columns

Index(['Unnamed: 0', 'STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'BLOCKCE',
       'BLOCKID10', 'PARTFLG', 'HOUSING10', 'POP10', 'geometry',
       'CountySelect', 'rppnt4269', 'blk104269', 'blockid', 'tothupoints',
       'popcount', 'HU100', 'POP100', 'popdiff', 'PLCGEOID10', 'PLCNAME10',
       'PUMGEOID10', 'PUMNAME10'],
      dtype='object')

In [None]:
# Use shapely.wkt loads to convert WKT to GeoSeries
from shapely.wkt import loads

census_blocks_gdf['geometry'] = census_blocks_gdf['geometry'].apply(lambda x: loads(x))
census_blocks_gdf['geometry'].geom_type.describe()

count        5799
unique          1
top       Polygon
freq         5799
dtype: object

In [None]:
census_blocks_gdf = census_blocks_gdf.set_geometry(census_blocks_gdf['geometry'])
census_blocks_gdf.crs = {'init':'epsg:4269'}
census_blocks_gdf.head()

Unnamed: 0.1,Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,geometry,...,blockid,tothupoints,popcount,HU100,POP100,popdiff,PLCGEOID10,PLCNAME10,PUMGEOID10,PUMNAME10
0,0,37,155,961900,2028,371559619002028,N,14,52,"POLYGON ((-79.22246 34.45884, -79.22253 34.458...",...,371559619002028,14,51,14,52,1,,,3705100,Robeson County (West)--Lumberton City PUMA
1,1,37,155,961900,2054,371559619002054,N,1,3,"POLYGON ((-79.17985 34.40192, -79.18004 34.401...",...,371559619002054,1,3,1,3,0,,,3705100,Robeson County (West)--Lumberton City PUMA
2,2,37,155,961700,2069,371559617002069,N,41,99,"POLYGON ((-79.17281 34.48092, -79.17275 34.480...",...,371559617002069,41,99,41,99,0,,,3705100,Robeson County (West)--Lumberton City PUMA
3,3,37,155,961700,2065,371559617002065,N,6,22,"POLYGON ((-79.15764 34.50328, -79.15784 34.502...",...,371559617002065,6,21,6,22,1,,,3705100,Robeson County (West)--Lumberton City PUMA
4,4,37,155,961700,2058,371559617002058,N,19,55,"POLYGON ((-79.15830 34.49735, -79.15664 34.498...",...,371559617002058,19,55,19,55,0,,,3705100,Robeson County (West)--Lumberton City PUMA


In [None]:
# Check CRS for Building Centroid and Block
census_blocks_gdf.crs

<Geographic 2D CRS: +init=epsg:4269 +type=crs>
Name: NAD83
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- name: North America - NAD83
- bounds: (167.65, 14.92, -47.74, 86.46)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [None]:
bldg_inv_gdf.crs

<Geographic 2D CRS: +init=epsg:4326 +type=crs>
Name: WGS 84
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [None]:
# Convert Census Block CRS to Buildings CRS
census_blocks_gdf = census_blocks_gdf.to_crs(bldg_inv_gdf.crs)
census_blocks_gdf.crs

<Geographic 2D CRS: +init=epsg:4326 +type=crs>
Name: WGS 84
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [None]:
# Check change in Geometry
census_blocks_gdf['blk104269'] = census_blocks_gdf['blk104269'].apply(lambda x: loads(x))
census_blocks_gdf[['geometry','blk104269']].loc[census_blocks_gdf['geometry'] != census_blocks_gdf['blk104269']]

Unnamed: 0,geometry,blk104269


In [None]:
census_blocks_gdf[['geometry','blk104269']].head()

Unnamed: 0,geometry,blk104269
0,"POLYGON ((-79.22246 34.45884, -79.22253 34.458...","POLYGON ((-79.22246 34.458838, -79.222528 34.4..."
1,"POLYGON ((-79.17985 34.40192, -79.18004 34.401...","POLYGON ((-79.179851 34.40192, -79.180036 34.4..."
2,"POLYGON ((-79.17281 34.48092, -79.17275 34.480...","POLYGON ((-79.172814 34.480917, -79.172749 34...."
3,"POLYGON ((-79.15764 34.50328, -79.15784 34.502...","POLYGON ((-79.15764299999999 34.503279, -79.15..."
4,"POLYGON ((-79.15830 34.49735, -79.15664 34.498...","POLYGON ((-79.15830299999999 34.497355, -79.15..."


In [None]:
census_blocks_gdf.geometry.name

'geometry'

### Need to explore projection issues
It looks like NAD 83 (EPSG 4326) and WGS 84 (EPSG 4269) Produce the same lat lan coordinates. I was expecting there to be slight differences.

In [None]:
# Convert BLOCKID10 to a string
census_blocks_gdf['BLOCKID10'] = census_blocks_gdf['BLOCKID10'].apply(lambda x : str(int(x)))

## Add State, County, and Census Block ID to Each Footprint

## Select Blocks within Bounding Box of Buildings

In [None]:
census_blocks_gdf['BLOCKID10'].describe()

count                5799
unique               5799
top       371559602012097
freq                    1
Name: BLOCKID10, dtype: object

In [None]:
# Find the bounds of the Builidngs to select Census Blocks
# Add Small Buffer for blocks on the edges
buffer = 0.001
minx = bldg_inv_gdf.bounds.minx.min() - buffer # subtract buffer from minimum values
miny = bldg_inv_gdf.bounds.miny.min() - buffer # subtract buffer from minimum values
maxx = bldg_inv_gdf.bounds.maxx.max() + buffer
maxy = bldg_inv_gdf.bounds.maxy.max() + buffer
building_gdf_bounds = [minx, miny, maxx, maxy]
building_gdf_bounds

[-79.15224977698432, 34.53346979490929, -78.90000338074236, 34.734480476141755]

In [None]:
# Select pumas within Bounds of Study Area
# build the r-tree index - for census blocks
sindex_census_blocks_gdf = census_blocks_gdf.sindex
possible_matches_index = list(sindex_census_blocks_gdf.intersection(building_gdf_bounds))
building_census_blocks_gdf = census_blocks_gdf.iloc[possible_matches_index]
building_census_blocks_gdf['BLOCKID10'].describe()

count                2155
unique               2155
top       371559612002000
freq                    1
Name: BLOCKID10, dtype: object

## Add Census Geogrpahy Details to Buildings

In [None]:
# Significant help from: https://geoffboeing.com/2016/10/r-tree-spatial-index-python/
# Significant help from: https://github.com/gboeing/urban-data-science/blob/master/19-Spatial-Analysis-and-Cartography/rtree-spatial-indexing.ipynb
# build the r-tree index - Using Representative Point
sindex_bldg_inv_gdf = bldg_inv_gdf.sindex
sindex_bldg_inv_gdf

rtree.index.Index(bounds=[-79.15124977698432, 34.534469794909285, -78.90100338074237, 34.73348047614176], size=20091)

In [None]:
# find the points that intersect with each subpolygon and add ID to Point
for index, block in building_census_blocks_gdf.iterrows():
    if index%100==0:
        print('.', end ="")

    # find approximate matches with r-tree, then precise matches from those approximate ones
    possible_matches_index = list(sindex_bldg_inv_gdf.intersection(block['geometry'].bounds))
    possible_matches = bldg_inv_gdf.iloc[possible_matches_index]
    precise_matches = possible_matches[possible_matches.intersects(block['geometry'])]
    bldg_inv_gdf.loc[precise_matches.index,'BLOCKID10'] = block['BLOCKID10']
    bldg_inv_gdf.loc[precise_matches.index,'STATEFP10'] = block['STATEFP10']
    bldg_inv_gdf.loc[precise_matches.index,'COUNTYFP10'] = block['COUNTYFP10']
    bldg_inv_gdf.loc[precise_matches.index,'TRACTCE10'] = block['TRACTCE10']
    bldg_inv_gdf.loc[precise_matches.index,'PUMGEOID10'] = block['PUMGEOID10']
    bldg_inv_gdf.loc[precise_matches.index,'PUMNAME10'] = block['PUMNAME10']
    bldg_inv_gdf.loc[precise_matches.index,'PLCGEOID10'] = block['PLCGEOID10']
    bldg_inv_gdf.loc[precise_matches.index,'PLCNAME10'] = block['PLCNAME10']
    bldg_inv_gdf.loc[precise_matches.index,'HOUSING10'] = block['HOUSING10']
    bldg_inv_gdf.loc[precise_matches.index,'apcount'] = block['tothupoints']
    bldg_inv_gdf.loc[precise_matches.index,'popdiff'] = block['popdiff']
    bldg_inv_gdf.loc[precise_matches.index,'POP10'] = block['POP10']

.........................

In [None]:
# Move Foriegn Key Columns Block ID State, County, Tract to first Columns
first_columns = ['guid','BLOCKID10','STATEFP10','COUNTYFP10','TRACTCE10','PUMGEOID10','PUMNAME10','PLCGEOID10','PLCNAME10']
cols = first_columns + [col for col in bldg_inv_gdf if col not in first_columns]
bldg_inv_gdf = bldg_inv_gdf[cols]
bldg_inv_gdf.head()

Unnamed: 0,guid,BLOCKID10,STATEFP10,COUNTYFP10,TRACTCE10,PUMGEOID10,PUMNAME10,PLCGEOID10,PLCNAME10,ffe_elev,...,appr_tot,year_built,lhsm_elev,g_elev,age_group,geometry,HOUSING10,apcount,popdiff,POP10
0,2d32aeff-7b75-47e6-b7a5-4f4adca4b021,371559613011113,37.0,155.0,961301.0,3705100.0,Robeson County (West)--Lumberton City PUMA,3739700.0,Lumberton,0.0,...,,,,,,POINT (-78.99633 34.65436),84.0,84.0,0.0,115.0
1,78e8556b-15b3-45e9-a72d-dba53a188b8d,371559607022057,37.0,155.0,960702.0,3705100.0,Robeson County (West)--Lumberton City PUMA,3739700.0,Lumberton,0.0,...,,,,,,POINT (-79.01852 34.64057),0.0,0.0,0.0,0.0
2,6b481629-e0c6-48f6-b1ce-d57f65d35cb6,371559608021059,37.0,155.0,960802.0,3705100.0,Robeson County (West)--Lumberton City PUMA,3739700.0,Lumberton,0.0,...,,,,,,POINT (-79.02847 34.60277),1.0,1.0,0.0,1.0
3,a6875194-ad6b-4061-9855-fe8a8b0f5ba6,371559608012025,37.0,155.0,960801.0,3705100.0,Robeson County (West)--Lumberton City PUMA,3739700.0,Lumberton,36.8808,...,,,,,,POINT (-79.05967 34.61999),10.0,10.0,0.0,22.0
4,3928ae4d-4450-427f-8fc3-2294d36879f8,371559615004008,37.0,155.0,961500.0,3704900.0,"Columbus, Bladen & Robeson (East) Counties PUMA",,,31.78537,...,,1988.0,,,3.0,POINT (-78.94659 34.55213),5.0,5.0,0.0,12.0


### How many buildings do not have block id information?

In [None]:
bldg_noblock_gdf = bldg_inv_gdf.loc[(bldg_inv_gdf['BLOCKID10'].isnull())]
bldg_noblock_gdf

Unnamed: 0,guid,BLOCKID10,STATEFP10,COUNTYFP10,TRACTCE10,PUMGEOID10,PUMNAME10,PLCGEOID10,PLCNAME10,ffe_elev,...,appr_tot,year_built,lhsm_elev,g_elev,age_group,geometry,HOUSING10,apcount,popdiff,POP10


In [None]:
# if there are missing buildings this code will help identify where they are - every building should have a block
# plot the building with missing block data
# Find the bounds of the Census Block File
minx = bldg_inv_gdf.bounds.minx.min()
miny = bldg_inv_gdf.bounds.miny.min()
maxx = bldg_inv_gdf.bounds.maxx.max()
maxy = bldg_inv_gdf.bounds.maxy.max()

blockstyle_function = lambda x: {'color':'green','fillColor': 'transparent' }

bldg_inv_gdf_map = fm.Map(location=[(miny+maxy)/2,(minx+maxx)/2], zoom_start=10)
fm.GeoJson(bldg_noblock_gdf).add_to(bldg_inv_gdf_map)
fm.GeoJson(census_blocks_gdf['geometry'],name='All Census Blocks',style_function=blockstyle_function).add_to(bldg_inv_gdf_map)
fm.GeoJson(building_census_blocks_gdf['geometry'],name='Selected Census Blocks').add_to(bldg_inv_gdf_map)
bldg_inv_gdf_map.save(programname+'buildings_noblocks.html')
# Error Displaying Map display(neosho_place_gdf_map)

### How many blocks that have housing units do not have buildings?

In [None]:
# Collapse Blocks By Place Name and Count Blocks 
building_gdf_blockbldgcount = bldg_inv_gdf[['BLOCKID10']]
building_gdf_blockbldgcount['blockbldsum'] = 1
building_gdf_blockbldsum = building_gdf_blockbldgcount.groupby(['BLOCKID10']).sum()
building_gdf_blockbldsum['blockbldsum'].describe()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



count    1572.000000
mean       12.780534
std        18.093528
min         1.000000
25%         3.000000
50%         7.000000
75%        15.000000
max       210.000000
Name: blockbldsum, dtype: float64

In [None]:
building_gdf_blockbldsum['blockid10'] = building_gdf_blockbldsum.index
building_gdf_blockbldsum['blockid10'] = building_gdf_blockbldsum['blockid10'].apply(lambda x : str((int(x))))
building_gdf_blockbldsum.head()

Unnamed: 0_level_0,blockbldsum,blockid10
BLOCKID10,Unnamed: 1_level_1,Unnamed: 2_level_1
371559601024055,1,371559601024055
371559604022066,6,371559604022066
371559605031030,17,371559605031030
371559605031031,12,371559605031031
371559605031036,16,371559605031036


In [None]:
building_gdf_blockbldsum.loc[building_gdf_blockbldsum['blockid10']=='371559601024055']

Unnamed: 0_level_0,blockbldsum,blockid10
BLOCKID10,Unnamed: 1_level_1,Unnamed: 2_level_1
371559601024055,1,371559601024055


In [None]:
census_blocks_gdf.loc[census_blocks_gdf['BLOCKID10']=='371559601024055']

Unnamed: 0.1,Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,geometry,...,blockid,tothupoints,popcount,HU100,POP100,popdiff,PLCGEOID10,PLCNAME10,PUMGEOID10,PUMNAME10
644,644,37,155,960102,4055,371559601024055,N,20,56,"POLYGON ((-78.99166 34.72658, -78.99196 34.726...",...,371559601024055,20,56,20,56,0,,,3705100,Robeson County (West)--Lumberton City PUMA


In [None]:
# Add Building Count to Block Data
census_blocks_gdf_checkbuilding_count = pd.merge(census_blocks_gdf, building_gdf_blockbldsum, 
                                                 left_on='BLOCKID10', right_on='blockid10', how='left')
census_blocks_gdf_checkbuilding_count.loc[census_blocks_gdf_checkbuilding_count['BLOCKID10']=='371559601024055']

Unnamed: 0.1,Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE,BLOCKID10,PARTFLG,HOUSING10,POP10,geometry,...,popcount,HU100,POP100,popdiff,PLCGEOID10,PLCNAME10,PUMGEOID10,PUMNAME10,blockbldsum,blockid10
644,644,37,155,960102,4055,371559601024055,N,20,56,"POLYGON ((-78.99166 34.72658, -78.99196 34.726...",...,56,20,56,0,,,3705100,Robeson County (West)--Lumberton City PUMA,1.0,371559601024055


In [None]:
displaycols = ['BLOCKID10','tothupoints']
blocks_withoutbuildings = census_blocks_gdf_checkbuilding_count.loc[(census_blocks_gdf_checkbuilding_count['blockbldsum'].isnull()) & 
                                                       (census_blocks_gdf_checkbuilding_count['tothupoints']>0)]
pd.crosstab(blocks_withoutbuildings['PLCNAME10'], blocks_withoutbuildings['COUNTYFP10'], margins=True, margins_name="Total")


COUNTYFP10,155,Total
PLCNAME10,Unnamed: 1_level_1,Unnamed: 2_level_1
Elrod,10,10
Fairmont,115,115
Lumber Bridge,10,10
Lumberton,13,13
Marietta,12,12
Maxton,106,106
McDonald,9,9
Orrum,5,5
Parkton,33,33
Pembroke,74,74


### Create list of buildings to add to building inventory based on Census Block Data
The building inventory could be improved by adding building observations based on the Census Block Data. If the Census Block indicates that there should be housing units in the block then this information could be used to identify missing buildings in the existing inventory. 

In [None]:
missing_buildings_blocks_pd = pd.DataFrame(np.repeat(blocks_withoutbuildings.values,blocks_withoutbuildings['tothupoints'],axis=0))
missing_buildings_blocks_pd.columns = blocks_withoutbuildings.columns
missing_buildings_blocks_pd[['BLOCKID10','tothupoints','rppnt4269']].head(10)

Unnamed: 0,BLOCKID10,tothupoints,rppnt4269
0,371559619002028,14,POINT (-79.22459088452018 34.4587885)
1,371559619002028,14,POINT (-79.22459088452018 34.4587885)
2,371559619002028,14,POINT (-79.22459088452018 34.4587885)
3,371559619002028,14,POINT (-79.22459088452018 34.4587885)
4,371559619002028,14,POINT (-79.22459088452018 34.4587885)
5,371559619002028,14,POINT (-79.22459088452018 34.4587885)
6,371559619002028,14,POINT (-79.22459088452018 34.4587885)
7,371559619002028,14,POINT (-79.22459088452018 34.4587885)
8,371559619002028,14,POINT (-79.22459088452018 34.4587885)
9,371559619002028,14,POINT (-79.22459088452018 34.4587885)


In [None]:
pd.crosstab(missing_buildings_blocks_pd['PLCNAME10'], missing_buildings_blocks_pd['COUNTYFP10'], margins=True, margins_name="Total")

COUNTYFP10,155,Total
PLCNAME10,Unnamed: 1_level_1,Unnamed: 2_level_1
Elrod,192,192
Fairmont,1255,1255
Lumber Bridge,51,51
Lumberton,24,24
Marietta,79,79
Maxton,1041,1041
McDonald,49,49
Orrum,50,50
Parkton,209,209
Pembroke,1271,1271


In [None]:
missing_buildings_blocks_pd['source'] = "2010 Census Block Data"

In [None]:
# Save Work at this point as CSV
savefile = sys.path[0]+"/"+programname+"/"+programname+"missing_buildings_blocks_pd.csv"
missing_buildings_blocks_pd.to_csv(savefile)

### The Missing Buildings File Should be added to the address point inventory

### Save Work

In [None]:
bldg_inv_gdf.head()

Unnamed: 0,guid,BLOCKID10,STATEFP10,COUNTYFP10,TRACTCE10,PUMGEOID10,PUMNAME10,PLCGEOID10,PLCNAME10,ffe_elev,...,appr_tot,year_built,lhsm_elev,g_elev,age_group,geometry,HOUSING10,apcount,popdiff,POP10
0,2d32aeff-7b75-47e6-b7a5-4f4adca4b021,371559613011113,37.0,155.0,961301.0,3705100.0,Robeson County (West)--Lumberton City PUMA,3739700.0,Lumberton,0.0,...,,,,,,POINT (-78.99633 34.65436),84.0,84.0,0.0,115.0
1,78e8556b-15b3-45e9-a72d-dba53a188b8d,371559607022057,37.0,155.0,960702.0,3705100.0,Robeson County (West)--Lumberton City PUMA,3739700.0,Lumberton,0.0,...,,,,,,POINT (-79.01852 34.64057),0.0,0.0,0.0,0.0
2,6b481629-e0c6-48f6-b1ce-d57f65d35cb6,371559608021059,37.0,155.0,960802.0,3705100.0,Robeson County (West)--Lumberton City PUMA,3739700.0,Lumberton,0.0,...,,,,,,POINT (-79.02847 34.60277),1.0,1.0,0.0,1.0
3,a6875194-ad6b-4061-9855-fe8a8b0f5ba6,371559608012025,37.0,155.0,960801.0,3705100.0,Robeson County (West)--Lumberton City PUMA,3739700.0,Lumberton,36.8808,...,,,,,,POINT (-79.05967 34.61999),10.0,10.0,0.0,22.0
4,3928ae4d-4450-427f-8fc3-2294d36879f8,371559615004008,37.0,155.0,961500.0,3704900.0,"Columbus, Bladen & Robeson (East) Counties PUMA",,,31.78537,...,,1988.0,,,3.0,POINT (-78.94659 34.55213),5.0,5.0,0.0,12.0


In [None]:
# Check Columns
cols = [col for col in bldg_inv_gdf]
cols

['guid',
 'BLOCKID10',
 'STATEFP10',
 'COUNTYFP10',
 'TRACTCE10',
 'PUMGEOID10',
 'PUMNAME10',
 'PLCGEOID10',
 'PLCNAME10',
 'ffe_elev',
 'archetype',
 'parid',
 'struct_typ',
 'no_stories',
 'a_stories',
 'b_stories',
 'bsmt_type',
 'sq_foot',
 'gsq_foot',
 'occ_type',
 'occ_detail',
 'major_occ',
 'broad_occ',
 'repl_cst',
 'str_cst',
 'nstra_cst',
 'nstrd_cst',
 'dgn_lvl',
 'cont_val',
 'efacility',
 'dwell_unit',
 'str_typ2',
 'occ_typ2',
 'strctid',
 'appr_bldg',
 'appr_land',
 'appr_tot',
 'year_built',
 'lhsm_elev',
 'g_elev',
 'age_group',
 'geometry',
 'HOUSING10',
 'apcount',
 'popdiff',
 'POP10']

In [None]:
# Save Work at this point as CSV
savefile = sys.path[0]+"/"+programname+"/"+programname+"_EPSG4326.csv"
bldg_inv_gdf.to_csv(savefile)