<a href="https://colab.research.google.com/github/npr99/Archive/blob/master/IN_CORE_2ev1_Lumberton_AddStructureID.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Add Structure ID to Address Point and Building Inventory
The current version of IN-CORE uses Structure ID to connect the Address Point and Building Inventory. The variable structure ID needs to be non-missing in both files.

Current solution is to use GUID as the Structure ID and then fill in missing structure ids using the Block ID in the address point inventory.
    

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import math as math
import numpy as np
import geopandas as gpd
import pandas as pd
import shapely
import descartes

import folium as fm # folium has more dynamic maps - but requires internet connection

import os # For saving output to path



In [None]:
# Display versions being used - important information for replication
import sys
print("Python Version     ", sys.version)
print("numpy version:     ", np.__version__)
print("geopandas version: ", gpd.__version__)
print("pandas version:    ", pd.__version__)
print("shapely version:   ", shapely.__version__)
# print("descartes version:   ", descartes.__version__)  1.1.0
print("folium version:    ", fm.__version__)

Python Version      3.7.10 | packaged by conda-forge | (default, Feb 19 2021, 15:37:01) [MSC v.1916 64 bit (AMD64)]
numpy version:      1.20.2
geopandas version:  0.9.0
pandas version:     0.24.2
shapely version:    1.7.1
folium version:     0.9.1


In [None]:
# Store Program Name for output files to have the same name
programname = "IN-CORE_2ev1_Lumberton_AddStructureID_2021-04-27"
# Make directory to save output
if not os.path.exists(programname):
    os.mkdir(programname)

## Setup access to IN-CORE
https://incore.ncsa.illinois.edu/ 

In [None]:
from pyincore import IncoreClient, Dataset, FragilityService, MappingSet, DataService
from pyincore_viz.geoutil import GeoUtil as viz

In [None]:
client = IncoreClient()
# IN-CORE chaches files on the local machine, it might be necessary to clear the memory
#client.clear_cache()

Connection successful to IN-CORE services. pyIncore version detected: 0.9.2


In [None]:
# create data_service object for loading files
data_service = DataService(client)

## Read in Building Inventory
The building inventory provide basic understanding of where address points can be located.

In [None]:
# Lumberton, NC Building inventory
bldg_inv_id = "6036c2a9e379f22e1658d451" 
# load building inventory
bldg_inv = Dataset.from_data_service(bldg_inv_id, data_service)
filename = bldg_inv.get_file_path('shp')
print("The IN-CORE Dataservice has saved the Building Inventory on your local machine: "+filename)

Dataset already exists locally. Reading from local cached zip.
Unzipped folder found in the local cache. Reading from it...
The IN-CORE Dataservice has saved the Building Inventory on your local machine: C:\Users\nathanael99\.incore\cache_data\6036c2a9e379f22e1658d451\lumberton-bldg-v7\lumberton-bldg-v7.shp


In [None]:
bldg_inv_gdf = gpd.read_file(filename)
bldg_inv_gdf.crs = {'init': 'epsg:4326'}
bldg_inv_gdf.head()

Unnamed: 0,ffe_elev,archetype,parid,struct_typ,no_stories,a_stories,b_stories,bsmt_type,sq_foot,gsq_foot,...,strctid,appr_bldg,appr_land,appr_tot,year_built,lhsm_elev,g_elev,guid,age_group,geometry
0,0.0,0,,,0,,,,20,20,...,,,,,,,,2d32aeff-7b75-47e6-b7a5-4f4adca4b021,,POINT (-78.99633 34.65436)
1,0.0,8,,,0,,,,0,0,...,,,,,,,,78e8556b-15b3-45e9-a72d-dba53a188b8d,,POINT (-79.01852 34.64057)
2,0.0,8,,,0,,,,0,0,...,,,,,,,,6b481629-e0c6-48f6-b1ce-d57f65d35cb6,,POINT (-79.02847 34.60277)
3,36.8808,0,,,1,,,,0,0,...,,,,,,,,a6875194-ad6b-4061-9855-fe8a8b0f5ba6,,POINT (-79.05967 34.61999)
4,31.78537,2,3715560148.0,,1,,,,1128,1128,...,,,,,1988.0,,,3928ae4d-4450-427f-8fc3-2294d36879f8,3.0,POINT (-78.94659 34.55213)


In [None]:
# lok at Archtypes
#pd.crosstab(index=bldg_inv_gdf.archetype, columns="count")
bldg_inv_gdf.groupby(['archetype']).count()

Unnamed: 0_level_0,ffe_elev,parid,struct_typ,no_stories,a_stories,b_stories,bsmt_type,sq_foot,gsq_foot,occ_type,...,strctid,appr_bldg,appr_land,appr_tot,year_built,lhsm_elev,g_elev,guid,age_group,geometry
archetype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,25,23,0,25,0,0,0,25,25,24,...,0,0,0,0,23,0,0,25,23,25
1,6070,6070,0,6070,0,0,0,6070,6070,6070,...,0,0,0,0,6061,0,0,6070,6061,6070
2,10273,10273,0,10273,0,0,0,10273,10273,10273,...,0,0,0,0,10258,0,0,10273,10258,10273
3,249,249,0,249,0,0,0,249,249,249,...,0,0,0,0,249,0,0,249,249,249
4,1391,1391,0,1391,0,0,0,1391,1391,1391,...,0,0,0,0,1387,0,0,1391,1387,1391
5,1060,1060,0,1060,0,0,0,1060,1060,1060,...,0,0,0,0,1060,0,0,1060,1060,1060
6,9,9,0,9,0,0,0,9,9,9,...,0,0,0,0,9,0,0,9,9,9
7,10,10,0,10,0,0,0,10,10,10,...,0,0,0,0,10,0,0,10,10,10
8,14,12,0,14,0,0,0,14,14,12,...,0,0,0,0,12,0,0,14,12,14
9,149,149,0,149,0,0,0,149,149,149,...,0,0,0,0,149,0,0,149,149,149


In [None]:
# iterating the columns
for col in bldg_inv_gdf.columns:
    print(col)

ffe_elev
archetype
parid
struct_typ
no_stories
a_stories
b_stories
bsmt_type
sq_foot
gsq_foot
occ_type
occ_detail
major_occ
broad_occ
repl_cst
str_cst
nstra_cst
nstrd_cst
dgn_lvl
cont_val
efacility
dwell_unit
str_typ2
occ_typ2
strctid
appr_bldg
appr_land
appr_tot
year_built
lhsm_elev
g_elev
guid
age_group
geometry


In [None]:
bldg_inv_gdf.strctid.describe()

count     0
unique    0
Name: strctid, dtype: int64

In [None]:
# Make Strctid = GUID
bldg_inv_gdf.loc[(bldg_inv_gdf['guid'].notna()),
                            'strctid'] = bldg_inv_gdf.apply(lambda x: "ST"+ str(x['guid']).zfill(36), axis=1)
# Confirm Primary Key is Unique and Non-Missing
bldg_inv_gdf[['strctid']].head(10)

Unnamed: 0,strctid
0,ST2d32aeff-7b75-47e6-b7a5-4f4adca4b021
1,ST78e8556b-15b3-45e9-a72d-dba53a188b8d
2,ST6b481629-e0c6-48f6-b1ce-d57f65d35cb6
3,STa6875194-ad6b-4061-9855-fe8a8b0f5ba6
4,ST3928ae4d-4450-427f-8fc3-2294d36879f8
5,ST00624632-6917-4ffc-a597-bcb30eda00c8
6,STb234aa50-36bb-40e1-b470-146fe5a06111
7,ST33d858f3-9ebc-4065-8688-af69fb1026ad
8,ST57f6075f-69ba-4033-af32-fae6467e6f21
9,ST89003bde-787c-486a-ba67-eb2f1a5efe3f


In [None]:
# Make sure structure id is non-missing
bldg_inv_gdf.strctid.isna().sum()

0

In [None]:
# Move Primary Key Column Building ID to first Column
cols = ['strctid']  + [col for col in bldg_inv_gdf if col != 'strctid']
bldg_inv_gdf = bldg_inv_gdf[cols]
cols = ['guid']  + [col for col in bldg_inv_gdf if col != 'guid']
bldg_inv_gdf = bldg_inv_gdf[cols]
bldg_inv_gdf.head()

Unnamed: 0,guid,strctid,ffe_elev,archetype,parid,struct_typ,no_stories,a_stories,b_stories,bsmt_type,...,str_typ2,occ_typ2,appr_bldg,appr_land,appr_tot,year_built,lhsm_elev,g_elev,age_group,geometry
0,2d32aeff-7b75-47e6-b7a5-4f4adca4b021,ST2d32aeff-7b75-47e6-b7a5-4f4adca4b021,0.0,0,,,0,,,,...,,,,,,,,,,POINT (-78.99633 34.65436)
1,78e8556b-15b3-45e9-a72d-dba53a188b8d,ST78e8556b-15b3-45e9-a72d-dba53a188b8d,0.0,8,,,0,,,,...,,,,,,,,,,POINT (-79.01852 34.64057)
2,6b481629-e0c6-48f6-b1ce-d57f65d35cb6,ST6b481629-e0c6-48f6-b1ce-d57f65d35cb6,0.0,8,,,0,,,,...,,,,,,,,,,POINT (-79.02847 34.60277)
3,a6875194-ad6b-4061-9855-fe8a8b0f5ba6,STa6875194-ad6b-4061-9855-fe8a8b0f5ba6,36.8808,0,,,1,,,,...,,,,,,,,,,POINT (-79.05967 34.61999)
4,3928ae4d-4450-427f-8fc3-2294d36879f8,ST3928ae4d-4450-427f-8fc3-2294d36879f8,31.78537,2,3715560148.0,,1,,,,...,,,,,,1988.0,,,3.0,POINT (-78.94659 34.55213)


# Clean mereged data to match 

The latest Building Inventory of a shape file is of version 6, we use the type
ergo:buildingInventoryVer6 

https://opensource.ncsa.illinois.edu/confluence/display/INCORE1/Building+Inventory+Datatype+Schema

Variables to include
v6
guid

strctid 

struct_typ

archetype

occ_type

appr_bldg

sq_foot

dwell_unit

no_stories

cont_val

str_typ2

efacility

parid

year_built

a_stories

b_stories

bsmt_type

gsq_foot

occ_detail

major_occ

broad_occ

repl_cst

str_cst

nstra_cst

nstrd_cst

dgn_lvl

occ_typ2

appr_land

appr_tot

In [None]:
# create list of all required ergo:buildingInventoryVer6 columns
incore_columns = ['guid',
                  'strctid',
                  'struct_typ', 
                  'archetype', 
                  'occ_type', 
                  'appr_bldg', 
                  'sq_foot', 
                  'dwell_unit', 
                  'no_stories', 
                  'cont_val', 
                  'str_typ2', 
                  'efacility', 
                  'parid', 
                  'year_built', 
                  'a_stories', 
                  'b_stories', 
                  'bsmt_type', 
                  'gsq_foot', 
                  'occ_detail', 
                  'major_occ', 
                  'broad_occ', 
                  'repl_cst', 
                  'str_cst', 
                  'nstra_cst', 
                  'nstrd_cst', 
                  'dgn_lvl', 
                  'occ_typ2', 
                  'appr_land', 
                  'appr_tot']
incore_columns_df = pd.Series(incore_columns,name='varname')
incore_columns_df = pd.DataFrame(incore_columns,columns =['varname']).sort_values(by=['varname'])
incore_columns_df.head()

Unnamed: 0,varname
14,a_stories
5,appr_bldg
27,appr_land
28,appr_tot
3,archetype


In [None]:
columnlist = pd.DataFrame(bldg_inv_gdf.columns,columns =['varname']).sort_values(by=['varname'])
columnlist.head()

Unnamed: 0,varname
7,a_stories
32,age_group
26,appr_bldg
27,appr_land
28,appr_tot


In [None]:
columnlist_merge = columnlist.merge(incore_columns_df, how='outer', left_on='varname', right_on='varname', indicator = True)
columnlist_merge

Unnamed: 0,varname,_merge
0,a_stories,both
1,age_group,left_only
2,appr_bldg,both
3,appr_land,both
4,appr_tot,both
5,archetype,both
6,b_stories,both
7,broad_occ,both
8,bsmt_type,both
9,cont_val,both


In [None]:
left_only_cols = columnlist_merge.loc[columnlist_merge['_merge'] == 'left_only']
left_only_cols

Unnamed: 0,varname,_merge
1,age_group,left_only
13,ffe_elev,left_only
14,g_elev,left_only
15,geometry,left_only
18,lhsm_elev,left_only


### Check the Coordinate Reference System

In [None]:
type(bldg_inv_gdf.crs)

pyproj.crs.CRS

In [None]:
bldg_inv_gdf.crs

<Geographic 2D CRS: +init=epsg:4326 +type=crs>
Name: WGS 84
Axis Info [ellipsoidal]:
- lon[east]: Longitude (degree)
- lat[north]: Latitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### Save as Shapefile

In [None]:
savefile = programname+"/"+"IN-CORE_Lumberton_BuildingInventory_2021-04-27.shp"
bldg_inv_gdf.to_file(savefile)