# LIBARY

In [53]:
import geopandas as gpd
import pandas as pd
import numpy as np
import fiona
import pyogrio
import os

import requests
import zipfile
import os
from io import BytesIO
from bs4 import BeautifulSoup

# READ EVENT DATA

In [54]:
def read_event_data(eventid = 'nc72282711'):
    """
    Read event data from a GPKG file.
    """
    parent_dir = os.path.dirname(os.getcwd())
    event_dir = os.path.join(parent_dir, 'ShakeMaps', eventid)

    # Update with the actual path
    GPKG_PATH = os.path.join(event_dir, "eqmodel_outputs.gpkg")

    # Read the layer you want to inspect
    # tract_shakemap_mmi, tract_shakemap_pga, tract_shakemap_pgv --> same idea
    gdf = gpd.read_file(GPKG_PATH, layer="tract_shakemap_pga")
    # make sure that only row that is not nan is the one we want
    columns = gdf.columns
    gdf = gdf[[columns[0], columns[1], columns[2], columns[3], columns[-1]]]
    
    return gdf.loc[gdf[columns[1]].notna()]

In [55]:
eventdata = read_event_data()
eventdata.columns

Index(['GEOID', 'max_intensity', 'min_intensity', 'mean_intensity',
       'geometry'],
      dtype='object')

# READ BUILDING DATA

In [56]:
# In this module, assume that csv files are already exist in the directory.

In [57]:
# Check if a csv file for a state is exists
    # if exists, read it
    # if not, check if the gdb file exists
    # if exists, read it
def read_building_count_by_tract():
    """
    Read building count data from a CSV file.
    """
    parent_dir = os.path.dirname(os.getcwd())
    # Update with the actual path
    CSV_PATH = os.path.join(parent_dir, 'Data', 'building_data_csv', "aggregated_building_data.csv")
    # check if the file exists
    if not os.path.exists(CSV_PATH):
        print(f"CSV file for Building count data is not available.")
        return None
    else:
        gdf = pd.read_csv(CSV_PATH, dtype={'CENSUSCODE': str})
        gdf['CENSUSCODE'] = np.where(gdf['CENSUSCODE'].str.len() == 11, gdf['CENSUSCODE'], "0"+gdf['CENSUSCODE'])
        return gdf

In [58]:
states_data = [
    ("Alabama", "AL"), ("Alaska", "AK"), ("Arizona", "AZ"), ("Arkansas", "AR"),
    ("California", "CA"), ("Colorado", "CO"), ("Connecticut", "CT"), ("Delaware", "DE"),
    ("Florida", "FL"), ("Georgia", "GA"), ("Hawaii", "HI"), ("Idaho", "ID"),
    ("Illinois", "IL"), ("Indiana", "IN"), ("Iowa", "IA"), ("Kansas", "KS"),
    ("Kentucky", "KY"), ("Louisiana", "LA"), ("Maine", "ME"), ("Maryland", "MD"),
    ("Massachusetts", "MA"), ("Michigan", "MI"), ("Minnesota", "MN"), ("Mississippi", "MS"),
    ("Missouri", "MO"), ("Montana", "MT"), ("Nebraska", "NE"), ("Nevada", "NV"),
    ("New Hampshire", "NH"), ("New Jersey", "NJ"), ("New Mexico", "NM"), ("New York", "NY"),
    ("North Carolina", "NC"), ("North Dakota", "ND"), ("Ohio", "OH"), ("Oklahoma", "OK"),
    ("Oregon", "OR"), ("Pennsylvania", "PA"), ("Rhode Island", "RI"), ("South Carolina", "SC"),
    ("South Dakota", "SD"), ("Tennessee", "TN"), ("Texas", "TX"), ("Utah", "UT"),
    ("Vermont", "VT"), ("Virginia", "VA"), ("Washington", "WA"), ("West Virginia", "WV"),
    ("Wisconsin", "WI"), ("Wyoming", "WY")
]

In [59]:
read_building_count_by_tract().dtypes

CENSUSCODE                   object
OTHER_OTHER                   int64
RESIDENTIAL_MULTI FAMILY      int64
RESIDENTIAL_OTHER             int64
RESIDENTIAL_SINGLE FAMILY     int64
STATE_ID                     object
dtype: object

# INTERSECT WITH BUILDING STOCKS

In [60]:
def get_building_stock_data():
    """
    2. Check if the csv file exists
    3. If not, create the folder aand copy the csv file
    4. If exists, read the csv file
    """

    parent_dir = os.path.dirname(os.getcwd())
    # check if the folder exists
    CSV_PATH = os.path.join(parent_dir, 'Data', 'building_stock_data', 'Building_Percentages_Per_Tract_ALLSTATES.csv')
    

    if os.path.exists(CSV_PATH):
        print(f"Building stock data exists at {CSV_PATH}")
        gdf = gpd.read_file(CSV_PATH)
        gdf['CENSUSCODE'] = np.where(gdf['Tract'].str.len() == 11, gdf['Tract'], "0"+gdf['Tract'])

    else:
        print(f"Building stock data does not exist at {CSV_PATH}")
        # create or download the files
        pass
    
    return gdf

In [61]:
building_stock = get_building_stock_data()
building_stock.dtypes

Building stock data exists at /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_stock_data/Building_Percentages_Per_Tract_ALLSTATES.csv


field_1       object
Tract         object
W1            object
W2            object
S1L           object
S1M           object
S1H           object
S2L           object
S2M           object
S2H           object
S3            object
S4L           object
S4M           object
S4H           object
S5L           object
S5M           object
S5H           object
C1L           object
C1M           object
C1H           object
C2L           object
C2M           object
C2H           object
C3L           object
C3M           object
C3H           object
PC1           object
PC2L          object
PC2M          object
PC2H          object
RM1L          object
RM1M          object
RM2L          object
RM2M          object
RM2H          object
URML          object
URMM          object
MH            object
Total         object
CENSUSCODE    object
dtype: object

# JOIN COUNT BUILDING DATA AND BUILDING STOCK DATA

In [62]:
# take df_pivot and building_stock and merge them
def count_building_proportion(building_count, building_stock):
    # merge the dataframes
    merged_df = pd.merge(building_count, building_stock, on='CENSUSCODE', how='left')
    merged_df.drop(columns=['Tract'], axis=1, inplace=True)
    merged_df.drop(columns=['field_1'], axis=1, inplace=True)
    merged_df.bfill(inplace=True)
    return merged_df

In [63]:

df_output = count_building_proportion(read_building_count_by_tract(), get_building_stock_data())
df_output.dtypes

Building stock data exists at /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_stock_data/Building_Percentages_Per_Tract_ALLSTATES.csv


CENSUSCODE                   object
OTHER_OTHER                   int64
RESIDENTIAL_MULTI FAMILY      int64
RESIDENTIAL_OTHER             int64
RESIDENTIAL_SINGLE FAMILY     int64
STATE_ID                     object
W1                           object
W2                           object
S1L                          object
S1M                          object
S1H                          object
S2L                          object
S2M                          object
S2H                          object
S3                           object
S4L                          object
S4M                          object
S4H                          object
S5L                          object
S5M                          object
S5H                          object
C1L                          object
C1M                          object
C1H                          object
C2L                          object
C2M                          object
C2H                          object
C3L                         

In [64]:
df_output.loc[df_output['CENSUSCODE'] == '06001400100']

Unnamed: 0,CENSUSCODE,OTHER_OTHER,RESIDENTIAL_MULTI FAMILY,RESIDENTIAL_OTHER,RESIDENTIAL_SINGLE FAMILY,STATE_ID,W1,W2,S1L,S1M,...,PC2H,RM1L,RM1M,RM2L,RM2M,RM2H,URML,URMM,MH,Total
54207,6001400100,104,22,10,1162,CA,0.9312280701754386,0.0112280701754385,0.0049122807017543,0.0,...,0.0,0.0210526315789473,0.0,0.0014035087719298,0.0,0.0,0.0035087719298245,0.0,0.0,1.0007017543859649


# JOIN WITH EVENT DATA


In [65]:
eventdata.dtypes

GEOID               object
max_intensity      float64
min_intensity      float64
mean_intensity     float64
geometry          geometry
dtype: object

In [67]:
final_output = pd.merge(eventdata, df_output, left_on='GEOID', right_on='CENSUSCODE', how='left')
final_output.ffill(inplace=True)
final_output.drop(columns=['CENSUSCODE'], axis=1, inplace=True)
final_output.head()

Unnamed: 0,GEOID,max_intensity,min_intensity,mean_intensity,geometry,OTHER_OTHER,RESIDENTIAL_MULTI FAMILY,RESIDENTIAL_OTHER,RESIDENTIAL_SINGLE FAMILY,STATE_ID,...,PC2H,RM1L,RM1M,RM2L,RM2M,RM2H,URML,URMM,MH,Total
0,6001400100,0.02,0.02,0.02,"MULTIPOLYGON (((-122.24692 37.88544, -122.2466...",104.0,22.0,10.0,1162.0,CA,...,0.0,0.0210526315789473,0.0,0.0014035087719298,0.0,0.0,0.0035087719298245,0.0,0.0,1.0007017543859649
1,6001400200,0.02,0.02,0.02,"MULTIPOLYGON (((-122.25792 37.84261, -122.2577...",46.0,110.0,2.0,538.0,CA,...,0.0,0.0323785803237858,0.0,0.0024906600249066,0.0,0.0,0.0062266500622665,0.0,0.0,1.0024906600249066
2,6001400300,0.02,0.02,0.02,"MULTIPOLYGON (((-122.26563 37.83764, -122.2655...",67.0,416.0,7.0,1139.0,CA,...,0.0,0.0369267421083978,0.0,0.0023823704586063,0.0,0.0,0.0089338892197736,0.0,0.0,1.0000000000000002
3,6001400400,0.02,0.02,0.02,"MULTIPOLYGON (((-122.26183 37.84162, -122.2618...",57.0,391.0,3.0,777.0,CA,...,0.0,0.0316319194823867,0.0,0.0014378145219266,0.0,0.0,0.0071890726096333,0.0,0.0,0.9985621854780734
4,6001400500,0.02,0.02,0.02,"MULTIPOLYGON (((-122.26951 37.84858, -122.2693...",56.0,342.0,6.0,614.0,CA,...,0.0,0.0352177942539388,0.0,0.0018535681186283,0.0,0.0,0.0092678405931417,0.0,0.0,1.0009267840593143


# SAVE OUTPUT TO EVENT DIR


In [68]:
# Function to save GeoDataFrame to GeoPackage (Overwriting mode)
def save_to_geopackage(gdf, layer_name="tract_shakemap_pga", eventid = 'nc72282711'):
    """
    Saves a GeoDataFrame to the GeoPackage, overwriting the existing layer.

    Args:
        gdf (GeoDataFrame): The GeoDataFrame to save.
        layer_name (str): The name of the layer in the GeoPackage.
    """
    parent_dir = os.path.dirname(os.getcwd())
    event_dir = os.path.join(parent_dir, 'ShakeMaps', eventid)

    # Update with the actual path
    GPKG_PATH = os.path.join(event_dir, "eqmodel_outputs.gpkg")


    gdf.to_file(GPKG_PATH, layer=layer_name, driver="GPKG", mode="w")
    print(f"Saved {layer_name} to {GPKG_PATH} (overwritten).")

save_to_geopackage(final_output, layer_name="tract_shakemap_pga", eventid = 'nc72282711')

Saved tract_shakemap_pga to /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/ShakeMaps/nc72282711/eqmodel_outputs.gpkg (overwritten).


In [69]:
parent_dir = os.path.dirname(os.getcwd())
event_dir = os.path.join(parent_dir, 'ShakeMaps', 'nc72282711')

# Update with the actual path
GPKG_PATH = os.path.join(event_dir, "eqmodel_outputs.gpkg")
gpd.read_file(GPKG_PATH, layer="tract_shakemap_pga")

Unnamed: 0,GEOID,max_intensity,min_intensity,mean_intensity,OTHER_OTHER,RESIDENTIAL_MULTI FAMILY,RESIDENTIAL_OTHER,RESIDENTIAL_SINGLE FAMILY,STATE_ID,W1,...,RM1L,RM1M,RM2L,RM2M,RM2H,URML,URMM,MH,Total,geometry
0,06001400100,0.02,0.02,0.02,104.0,22.0,10.0,1162.0,CA,0.9312280701754386,...,0.021052631578947368,0.0,0.0014035087719298245,0.0,0.0,0.0035087719298245615,0.0,0.0,1.0007017543859649,"MULTIPOLYGON (((-122.24692 37.88544, -122.2466..."
1,06001400200,0.02,0.02,0.02,46.0,110.0,2.0,538.0,CA,0.8804483188044832,...,0.0323785803237858,0.0,0.0024906600249066002,0.0,0.0,0.0062266500622665,0.0,0.0,1.0024906600249066,"MULTIPOLYGON (((-122.25792 37.84261, -122.2577..."
2,06001400300,0.02,0.02,0.02,67.0,416.0,7.0,1139.0,CA,0.8618225134008338,...,0.03692674210839786,0.0,0.0023823704586063135,0.0,0.0,0.008933889219773675,0.0,0.0,1.0000000000000002,"MULTIPOLYGON (((-122.26563 37.83764, -122.2655..."
3,06001400400,0.02,0.02,0.02,57.0,391.0,3.0,777.0,CA,0.8871315600287563,...,0.03163191948238677,0.0,0.0014378145219266715,0.0,0.0,0.007189072609633357,0.0,0.0,0.9985621854780734,"MULTIPOLYGON (((-122.26183 37.84162, -122.2618..."
4,06001400500,0.02,0.02,0.02,56.0,342.0,6.0,614.0,CA,0.8730305838739574,...,0.03521779425393883,0.0,0.0018535681186283596,0.0,0.0,0.009267840593141797,0.0,0.0,1.0009267840593143,"MULTIPOLYGON (((-122.26951 37.84858, -122.2693..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2600,06115040800,0.01,0.01,0.01,454.0,128.0,136.0,1256.0,CA,0.8649350649350649,...,0.024025974025974027,0.0,0.001948051948051948,0.0,0.0,0.005844155844155844,0.0,0.04935064935064935,1.0006493506493503,"MULTIPOLYGON (((-121.51553 39.03064, -121.5153..."
2601,06115040901,0.01,0.01,0.01,388.0,109.0,491.0,1025.0,CA,0.5838454784899034,...,0.016681299385425813,0.0,0.001755926251097454,0.0,0.0,0.003511852502194908,0.0,0.3520632133450395,1.0008779631255487,"MULTIPOLYGON (((-121.58338 39.13621, -121.5831..."
2602,06115040902,0.01,0.01,0.01,759.0,0.0,0.0,0.0,CA,0.8811013767209012,...,0.03128911138923655,0.0,0.0025031289111389237,0.0,0.0,0.007509386733416771,0.0,0.0050062578222778474,0.997496871088861,"MULTIPOLYGON (((-121.47722 39.13334, -121.4770..."
2603,06115041000,0.01,0.01,0.01,759.0,0.0,0.0,0.0,CA,0.8811013767209012,...,0.03128911138923655,0.0,0.0025031289111389237,0.0,0.0,0.007509386733416771,0.0,0.0050062578222778474,0.997496871088861,"MULTIPOLYGON (((-121.63637 39.24608, -121.6362..."
