# Run HUA

In [2]:
# Do you want to use IN-CORE Dataservice? If yes, set to True
use_incore = False
#use_incore = True

In [3]:
import pandas as pd
import geopandas as gpd # For reading in shapefiles
import numpy as np
import sys # For displaying package versions
import os # For managing directories and file paths if drive is mounted
import scooby # Reports Python environment
import time

# Check if using IN-CORE Dataservice
if use_incore:
    from pyincore import IncoreClient, Dataset, DataService

In [4]:
# To reload submodules need to use this magic command to set autoreload on
%load_ext autoreload
%autoreload 2
# open, read, and execute python program with reusable commands
from pyncoda.ncoda_00b_directory_design import directory_design
from pyncoda.ncoda_00e_geoutilities import spatial_join_points_to_poly
from pyncoda.ncoda_00h_bldg_archetype_structure import *
from pyncoda.ncoda_07a_generate_hui import generate_hui_functions
from pyncoda.ncoda_07c_generate_addpt import generate_addpt_functions
from pyncoda.ncoda_07d_run_hua_workflow import hua_workflow_functions
from pyncoda.CommunitySourceData.nsi_sec_usace_army_mil.nsi_01a_downloadfiles import download_nsi_files

In [5]:
# Generate report of Python environment
print(scooby.Report(additional=['pandas','pyincore','pyincore_viz','ipyleaflet','seaborn']))


--------------------------------------------------------------------------------
  Date: Mon Oct 16 21:57:14 2023 Central Daylight Time

                OS : Windows
            CPU(s) : 8
           Machine : AMD64
      Architecture : 64bit
               RAM : 15.8 GiB
       Environment : Jupyter

  Python 3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:01:59)
  [MSC v.1929 64 bit (AMD64)]

            pandas : 2.0.3
          pyincore : 1.8.0
      pyincore_viz : 1.8.1
        ipyleaflet : 0.17.4
           seaborn : 0.12.2
             numpy : 1.24.4
             scipy : 1.10.1
           IPython : 8.12.0
        matplotlib : 3.7.3
            scooby : 0.7.3
--------------------------------------------------------------------------------


In [6]:
# Check working directory - good practice for relative path access
os.getcwd()

'D:\\Equity Metric Trial'

In [7]:
# For the variable that provides information on the residential building type, what are the possible values and the estimated number of housing units?
# For more examples see the file pyncoda\ncoda_00h_bldg_archetype_structure.py
basic_residential_archetypes = { 
        1 : {'Description' : 'Residential', 'HU estimate' : 1}
            }

# add the Nofal residential types
Nofal_residential_archetypesv2 = { 
        1 : {'Description' : 
                'One-story sf residential building on a crawlspace foundation',
             'HU estimate' : 1},
        2 : {'Description' : 
                'One-story mf residential building on a slab-on-grade foundation',
              'HU estimate' : 1},
        3 : {'Description' : 
                'Two-story sf residential building on a crawlspace foundation',
            'HU estimate' : 1},
        4 : {'Description' : 
                'Two-story mf residential building on a slab-on-grade foundation',
            'HU estimate' : 1}
            }

In [8]:
# Edit Data Dictionary for Community

# Example of data dictionary for one community with one county
# Check how to capitalize the state name at
## https://www2.census.gov/geo/tiger/TIGER2020PL/STATE/

# NOTE on file path length. WINDOWS has a limit of 260 characters for file path length.
# Community name needs to be short to avoid this limit.

communities = {'Galveston_TX' : {
                    'community_name' : 'Galveston, TX',
                    'focalplace_name' : 'Galveston',
                    'STATE' : 'TEXAS',
                    'years' : ['2010'],
                    'counties' : { 
                        1 : {'FIPS Code' : '48167', 'Name' : 'Galveston County, TX'}},
                    'building_inventory' : { 
                        'id' : '63ff6b135c35c0353d5ed3ac',
                        'note' : 'Building inventory for Galveston Island, TX',
                        'archetype_var' : 'arch_flood',
                        'residential_archetypes' : Nofal_residential_archetypesv2,
                        'building_area_var' : 'sq_foot',
                        'bldg_uniqueid' : 'guid',
                        'filename' : r"D:\Equity Metric Trial\63ff6b135c35c0353d5ed3ac\galveston_bldg_island2.shp",
                        'building_area_cutoff' : 300}
                                }
                }

version = '2.0.0'
version_text = 'v2-0-0'

# Save Outputfolder - due to long folder name paths output saved to folder with shorter name
# files from this program will be saved with the program name - 
# this helps to follow the overall workflow
outputfolder = "OutputData"
basevintage = 2010

## Start MC From Here

In [10]:
# Set random seed for reproducibility

#randseeds = np.random.randint(1000000, size=10000)
#np.save('randseeds.npy', randseeds) # save the array once
randseeds = np.load('randseeds.npy') #load
#randseeds = np.random.randint(1000000, size=2) #trial to get the for loop going for one of them 

In [24]:
# get the subset of randseeds that want to run
#keep track of the randseeds
#[542:700]#[492:542]#[462:492] #[312:462]#[252:312]#[72:252] #[70:72]#[55:70] #[35:55] [33:35] #[32:33] #[17:32] #[15:17] #[12:15] #[6:12] #[4:6] #[0:4]
randseeds[253]
np.where(randseeds == 544183)[0] # trash 
randseeds[1000] # trash 

1000

In [None]:
t0 = time.time()
for x in randseeds[800:1000]:
    # have now set the seed for each one
    seed = x
    #now lets conduct the hua for all of these 
    # use incore = false
    # Read in Housing Unit Inventory
    generate_hui_df = generate_hui_functions(
                        communities =   communities,
                        seed =          seed,
                        version =       version,
                        version_text=   version_text,
                        basevintage=    basevintage,
                        outputfolder=   outputfolder,
                        use_incore=     use_incore)
    hui_dataset_id = generate_hui_df.generate_hui_v2_for_incore()
    # If using IN-CORE
    if use_incore:
        # Housing Unit inventory
        housing_unit_inv_id = hui_dataset_id
        # load housing unit inventory as pandas dataframe
        housing_unit_inv = Dataset.from_data_service(housing_unit_inv_id, data_service)
        filename = housing_unit_inv.get_file_path('csv')
        print("The IN-CORE Dataservice has saved the Housing Unit Inventory on your local machine: "+filename)
        # Convert CSV to Pandas Dataframe
        housing_unit_inv_df = pd.read_csv(filename, header="infer")
    else:
        housing_unit_inv_df = hui_dataset_id
        hui_dataset_id = 'local'
        # Generate Address Point Inventory and Run Housing Unit Allocation
    for community in communities.keys():
        # Set parameters for Address point generation
        bldg_inv_id = communities[community]['building_inventory']['id']
        archetype_var = communities[community]['building_inventory']['archetype_var']
        building_area_var = communities[community]['building_inventory']['building_area_var']
        building_area_cutoff = communities[community]['building_inventory']['building_area_cutoff']
        residential_archetypes = communities[community]['building_inventory']['residential_archetypes']
        
        # load building inventory
        # If using IN-CORE
        if use_incore:
            # Get the Unique ID
            bldg_uniqueid = 'guid'
            # Building inventory ID
            bldg_inv = Dataset.from_data_service(bldg_inv_id, data_service)
            filename = bldg_inv.get_file_path('shp')
            print("The IN-CORE Dataservice has saved the Building Inventory on your local machine: "+filename)
            bldg_inv_gdf = gpd.read_file(filename)
        # Check if building inventory is comes  from a filename if filename key exists
        elif 'filename' in communities[community]['building_inventory'].keys():
            # Get the Unique ID
            bldg_uniqueid = communities[community]['building_inventory']['bldg_uniqueid']
            
            print("Building inventory is from a file")
            bldg_filename = communities[community]['building_inventory']['filename']
            bldg_inv_gdf = gpd.read_file(bldg_filename)
        # Check if building inventory is from NSI
        elif 'NSI' in bldg_inv_id:
            # Get the Unique ID
            bldg_uniqueid = communities[community]['building_inventory']['bldg_uniqueid']
            # make an empty dictionary for saving county NSI files
            county_nsi_gdf = {}
            for county in communities[community]['counties'].keys():
                county_fips = communities[community]['counties'][county]['FIPS Code']
                state_county_name  = communities[community]['counties'][county]['Name']
                print("Downloading NSI files for:")
                print(state_county_name,': county FIPS Code',county_fips)
                county_nsi_gdf[county_fips] = download_nsi_files(county_fips=county_fips)
            # merge all counties into one geodataframe
            bldg_inv_gdf = pd.concat(county_nsi_gdf.values(), 
                                        ignore_index=True, axis=0)
    
        print("Generate Address point inventory for: "+community)
        print("Based on building inventory: "+bldg_inv_id)
        generate_addpt_df = generate_addpt_functions(
                            community =   community,
                            communities = communities,
                            hui_df = housing_unit_inv_df,
                            bldg_inv_gdf = bldg_inv_gdf,
                            bldg_inv_id = bldg_inv_id,
                            residential_archetypes = residential_archetypes,
                            bldg_uniqueid = bldg_uniqueid,
                            archetype_var = archetype_var,
                            building_area_var = building_area_var,
                            building_area_cutoff = building_area_cutoff,
                            seed =          seed,
                            version =       version,
                            version_text=   version_text,
                            basevintage=    basevintage,
                            outputfolder=   outputfolder,
                            use_incore=     use_incore
                            )
    
        addpt_dataset_id = generate_addpt_df.generate_addpt_v2_for_incore()
    
        ### Read in Address Point Inventory
        '''
        The address point inventory is an intermediate file based on the building inventory. 
        The address point inventory acts as the bridge between the building inventory 
        and the housing unit inventory.
        '''
        # Check if addpt_dataset_id is string
        if isinstance(addpt_dataset_id, str):
            print("The Address Point Inventory ID is a pandas string")
            # Address Point inventory
            addpt_inv_id = addpt_dataset_id
            # load housing unit inventory as pandas dataframe
            addpt_inv = Dataset.from_data_service(addpt_inv_id, data_service)
            filename = addpt_inv.get_file_path('csv')
            print("The IN-CORE Dataservice has saved the Address Point Inventory on your local machine: "+filename)
            addpt_inv_df = pd.read_csv(filename, header="infer")
        # else if addpt_dataset_id is a dataframe
        elif isinstance(addpt_dataset_id, pd.DataFrame):
            addpt_inv_df = addpt_dataset_id
            print("The Address Point Inventory ID contains a pandas dataframe")
        else:
            print("The Address Point Inventory is not a string or pandas dataframe")
    
        '''
        ### Run Housing Unit Allocation
        '''
    
        print("Housing Unit Allocation for: "+community)
        print("Based on housing unit inventory: "+hui_dataset_id)
        print("Based on building inventory: "+bldg_inv_id)
    
        outputfolders = directory_design(state_county_name = community,
                                                outputfolder = outputfolder)
    
        run_hua_gdf = hua_workflow_functions(
                            community =   community,
                            hui_df = housing_unit_inv_df,
                            bldg_gdf = bldg_inv_gdf,
                            bldg_inv_id = bldg_inv_id,
                            addpt_df = addpt_inv_df,
                            bldg_uniqueid = bldg_uniqueid,
                            archetype_var = archetype_var,
                            seed =          seed,
                            version =       version,
                            version_text=   version_text,
                            basevintage=    basevintage,
                            outputfolder=   outputfolder,
                            outputfolders = outputfolders,
                            use_incore=     use_incore
                            )
    
        hua_gdf = run_hua_gdf.housing_unit_allocation_workflow()
    
        ## Merge Housing Unit Allocation with Housing Unit Inventory
        # Merge HUA with HUI
        hua_cols = ['huid',bldg_uniqueid,'placeNAME10','huestimate','x','y']
        hua_hui_df = pd.merge(left = housing_unit_inv_df,
                            right = hua_gdf[hua_cols],
                            on='huid',
                            how='left')
    
        # Replace missing bldg_uniqueid 
        hua_hui_df[bldg_uniqueid] = hua_hui_df[bldg_uniqueid].fillna('missing building id')
    
        # Keep if huid is not missing
        hua_hui_df = hua_hui_df[hua_hui_df['huid'].notna()]
    
        ## Save Housing Unit Allocation to CSV and Upload to IN-CORE Dataservice
        # save hua_hui_gdf to csv
        check_folder = outputfolder
        output_filename = f'hua_{version_text}_{community}_{basevintage}_rs{seed}_{bldg_inv_id}'
        csv_filepath = check_folder+"/"+output_filename+'.csv'
        savefile = sys.path[0]+"/"+csv_filepath
        # Resave results for community name
        hua_hui_df.to_csv(savefile, index=False)
    
        # make a county list for community
        county_list = ''
        for county in communities[community]['counties'].keys():
            state_county = communities[community]['counties'][county]['FIPS Code']
            state_county_name  = communities[community]['counties'][county]['Name']
            print(state_county_name,': county FIPS Code',state_county)
            county_list = county_list + state_county_name+': county FIPS Code '+state_county
        county_list
        
        title = "Housing Unit Allocation v2.0.0 data for "+community + " " + str(basevintage)
        title
    
        if use_incore:
            # Upload to IN-CORE Dataservice
            run_hua_gdf.upload_hua_file_to_incore(title =title,
                                county_list = county_list,
                                csv_filepath = csv_filepath,
                                output_filename = output_filename)
t1 = time.time()
total_time = t1-t0

Generating Housing Unit Inventory v2.0.0 data for Galveston, TX
Galveston County, TX : county FIPS Code 48167

***************************************
    Version control - list of installed packages
***************************************

# packages in environment at C:\Users\abbyb\anaconda3\envs\hua:
#
# Name                    Version                   Build  Channel
affine                    2.4.0              pyhd8ed1ab_0    conda-forge
anyio                     4.0.0              pyhd8ed1ab_0    conda-forge
argon2-cffi               23.1.0             pyhd8ed1ab_0    conda-forge
argon2-cffi-bindings      21.2.0           py38h91455d4_4    conda-forge
arrow                     1.2.3              pyhd8ed1ab_0    conda-forge
asttokens                 2.4.0              pyhd8ed1ab_0    conda-forge
async-lru                 2.0.4              pyhd8ed1ab_0    conda-forge
attrs                     23.1.0             pyh71513ae_1    conda-forge
babel                     2.12.1          

  fig = plt.figure(figsize=figsize)


Housing Unit Inventory Codebook for Galveston, TX, 2010
Adding first figure to cover page: randincome_by_race48167.png
OutputData/GalvestonCounty_TX/06_Explore/randincome_by_race48167.png

huid
huid: Housing Unit ID
blockid
blockid: Block ID
bgid
bgid: 2010 Census Block Group ID
tractid
tractid: 2010 Census Tract ID
FIPScounty
FIPScounty: County FIPS Code
numprec
numprec: Number of Person Records
ownershp
ownershp: Tenure Status
6
ownershp: Tenure Status - Categorical codes, labels and frequencies
race
race: Race of Householder
6
race: Race of Householder - Categorical codes, labels and frequencies
hispan
hispan: Hispanic Householder
6
hispan: Hispanic Householder - Categorical codes, labels and frequencies
family
family: Family Household
6
family: Family Household - Categorical codes, labels and frequencies
vacancy
vacancy: Vacancy Type
6
vacancy: Vacancy Type - Categorical codes, labels and frequencies
gqtype
gqtype: Group Quarters Type
6
gqtype: Group Quarters Type - Categorical cod

In [21]:
total_time

55352.11421561241