# LIBARY

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import fiona
import pyogrio
import os

import requests
import zipfile
import os
from io import BytesIO
from bs4 import BeautifulSoup

# READ EVENT DATA

In [2]:
def read_event_data(eventid = 'nc72282711'):
    """
    Read event data from a GPKG file.
    """
    parent_dir = os.path.dirname(os.getcwd())
    event_dir = os.path.join(parent_dir, 'ShakeMaps', eventid)

    # Update with the actual path
    GPKG_PATH = os.path.join(event_dir, "eqmodel_outputs.gpkg")

    # Read the layer you want to inspect
    # tract_shakemap_mmi, tract_shakemap_pga, tract_shakemap_pgv --> same idea
    gdf = gpd.read_file(GPKG_PATH, layer="tract_shakemap_mmi")
    # make sure that only row that is not nan is the one we want
    columns = gdf.columns
    
    return gdf.loc[gdf[columns[1]].notna()]

In [3]:
eventdata = read_event_data()
eventdata.head()

Unnamed: 0,GEOID,max_intensity,min_intensity,mean_intensity,geometry
3560,6001400100,3.8,3.4,3.5625,"POLYGON ((-122.24692 37.88544, -122.24667 37.8..."
3561,6001400200,3.6,3.4,3.511111,"POLYGON ((-122.25792 37.84261, -122.25773 37.8..."
3562,6001400300,3.6,3.4,3.538462,"POLYGON ((-122.26563 37.83764, -122.26557 37.8..."
3563,6001400400,3.6,3.6,3.6,"POLYGON ((-122.26183 37.84162, -122.26181 37.8..."
3564,6001400500,3.6,3.6,3.6,"POLYGON ((-122.26951 37.84858, -122.26937 37.8..."


# READ BUILDING DATA

In [None]:
def get_building_data_directory(stateid="CA"):
    cwd = os.getcwd()

    # get parent directory
    parent_dir = os.path.dirname(cwd)
    # get the building data directory
    building_data_directory = os.path.join(parent_dir, 'Data', 'building_data_gdb')
    # find all folder in the building data directory
    folders = [f for f in os.listdir(building_data_directory) if os.path.isdir(os.path.join(building_data_directory, f))]
    # get the folder that ends with stateid
    stateid_dir= [f for f in folders if f.endswith(f'{stateid}')][0]

    return os.path.join(building_data_directory, stateid_dir, f'{stateid}_Structures.gdb')

def get_building_data_csv(stateid):
    building_data_directory = get_building_data_directory(stateid)

    # get the csv file
    return os.path.join(building_data_directory, f'{stateid}.csv')

In [None]:
# if false makedir
def create_directory_if_not_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
    else:
        print(f"Directory {directory} already exists.")

In [169]:
# Check if a csv file for a state is exists
    # if exists, read it
    # if not, check if the gdb file exists
    # if exists, read it
def read_building_data(stateid):
    building_data_directory = get_building_data_directory(stateid)

    # get the csv file
    csv_path = get_building_data_csv(stateid)

    if os.path.exists(csv_path):
        print(f"Reading {csv_path}")
        return gpd.read_file(csv_path)
    else:
        print(f"{csv_path} does not exist.")
        gdb_path = os.path.join(building_data_directory)
        if os.path.exists(gdb_path):
            print(f"Reading {gdb_path}")
            return gpd.read_file(gdb_path)
        else:
            print(f"{gdb_path} does not exist.")
            print("Please download the gdb file from the USGS website.")

In [4]:
states_data = [
    ("Alabama", "AL"), ("Alaska", "AK"), ("Arizona", "AZ"), ("Arkansas", "AR"),
    ("California", "CA"), ("Colorado", "CO"), ("Connecticut", "CT"), ("Delaware", "DE"),
    ("Florida", "FL"), ("Georgia", "GA"), ("Hawaii", "HI"), ("Idaho", "ID"),
    ("Illinois", "IL"), ("Indiana", "IN"), ("Iowa", "IA"), ("Kansas", "KS"),
    ("Kentucky", "KY"), ("Louisiana", "LA"), ("Maine", "ME"), ("Maryland", "MD"),
    ("Massachusetts", "MA"), ("Michigan", "MI"), ("Minnesota", "MN"), ("Mississippi", "MS"),
    ("Missouri", "MO"), ("Montana", "MT"), ("Nebraska", "NE"), ("Nevada", "NV"),
    ("New Hampshire", "NH"), ("New Jersey", "NJ"), ("New Mexico", "NM"), ("New York", "NY"),
    ("North Carolina", "NC"), ("North Dakota", "ND"), ("Ohio", "OH"), ("Oklahoma", "OK"),
    ("Oregon", "OR"), ("Pennsylvania", "PA"), ("Rhode Island", "RI"), ("South Carolina", "SC"),
    ("South Dakota", "SD"), ("Tennessee", "TN"), ("Texas", "TX"), ("Utah", "UT"),
    ("Vermont", "VT"), ("Virginia", "VA"), ("Washington", "WA"), ("West Virginia", "WV"),
    ("Wisconsin", "WI"), ("Wyoming", "WY")
]

# INTERSECT WITH BUILDING STOCKS

In [46]:
# check if building stock data exists
def check_building_stock_folder_exists():
    building_data_directory = get_building_data_directory()
    # get the csv file
    folder_path = os.path.join(building_data_directory, 'building_stock_data')

    if os.path.exists(folder_path):
        print(f"Building stock data folder exists at {folder_path}")
        return folder_path
    else:
        print(f"Building stock data folder does not exist at {folder_path}")
        # create the directory
        create_directory_if_not_exists(folder_path)
        return folder_path

In [47]:
check_building_stock_folder_exists()

Building stock data folder exists at /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_data_gdb/building_stock_data


'/Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_data_gdb/building_stock_data'

In [58]:
def check_building_stock_data_exists():
    """Check csv file exists.
    1. Check if the folder exists
    2. Check if the csv file exists"""

    # check if the folder exists
    folder = check_building_stock_folder_exists()
    
    #building_data_directory = get_building_data_directory()
    # get the csv file
    csv_path = os.path.join(folder, 'Building_Percentages_Per_Tract_ALLSTATES.csv')

    if os.path.exists(csv_path):
        print(f"Building stock data exists at {csv_path}")
        return [True, csv_path]
    else:
        print(f"Building stock data does not exist at {csv_path}")
        # create or download the files
        return [False, None]

In [57]:
check_building_stock_data_exists()

Building stock data folder exists at /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_data_gdb/building_stock_data
Building stock data exists at /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_data_gdb/building_stock_data/Building_Percentages_Per_Tract_ALLSTATES.csv


'/Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_data_gdb/building_stock_data/Building_Percentages_Per_Tract_ALLSTATES.csv'

In [72]:
def get_building_stock_csv():
    """
    2. Check if the csv file exists
    3. If not, create the folder aand copy the csv file
    4. If exists, read the csv file
    """
    # check if the folder exists
    exist, csv_path = check_building_stock_data_exists()
    if exist==True:
        print(f"Reading {csv_path}")
        gdf = gpd.read_file(csv_path)
    
    gdf['CENSUSCODE'] = np.where(gdf['Tract'].str.len() == 11, gdf['Tract'], "0"+gdf['Tract'])
    return gdf

In [73]:
building_stock = get_building_stock_csv()
building_stock

Building stock data folder exists at /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_data_gdb/building_stock_data
Building stock data exists at /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_data_gdb/building_stock_data/Building_Percentages_Per_Tract_ALLSTATES.csv
Reading /Users/yusufpradana/Library/CloudStorage/OneDrive-Personal/Gradschool/4_SPRING25/Capstone/github/EarthquakeDamageModel_Heinz/Data/building_data_gdb/building_stock_data/Building_Percentages_Per_Tract_ALLSTATES.csv


Unnamed: 0,field_1,Tract,W1,W2,S1L,S1M,S1H,S2L,S2M,S2H,...,RM1L,RM1M,RM2L,RM2M,RM2H,URML,URMM,MH,Total,CENSUSCODE
0,0,35001000107,0.7042389210019268,0.010597302504816955,0.0009633911368015414,0.0,0.0,0.004816955684007707,0.0,0.0,...,0.22928709055876687,0.0,0.0009633911368015414,0.0,0.0,0.025048169556840076,0.0,0.0009633911368015414,1.0,35001000107
1,1,35001000108,0.6978260869565217,0.010869565217391304,0.0010869565217391304,0.0,0.0,0.007608695652173913,0.0,0.0,...,0.225,0.0,0.0010869565217391304,0.0,0.0,0.025,0.0,0.003260869565217391,1.0,35001000108
2,2,35001000109,0.6781157998037292,0.017664376840039256,0.0009813542688910696,0.0,0.0,0.007850834151128557,0.0,0.0,...,0.22767419038272815,0.0,0.001962708537782139,0.0,0.0,0.02551521099116781,0.0,0.0009813542688910696,1.0,35001000109
3,3,35001000110,0.6922398589065256,0.012345679012345678,0.0008818342151675485,0.0,0.0,0.008818342151675485,0.0,0.0,...,0.22134038800705466,0.0,0.001763668430335097,0.0,0.0,0.025573192239858905,0.0,0.003527336860670194,1.0,35001000110
4,4,35001000111,0.7141693811074918,0.007328990228013029,0.0008143322475570033,0.0,0.0,0.0024429967426710096,0.0,0.0,...,0.23452768729641693,0.0,0.0008143322475570033,0.0,0.0,0.024429967426710098,0.0,0.0,1.0,35001000111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69918,69918,5149952302,0.7565632458233891,0.01909307875894988,0.002386634844868735,0.0,0.0,0.003977724741447892,0.0,0.0,...,0.002386634844868735,0.0,0.0,0.0,0.0,0.14081145584725538,0.0,0.045346062052505964,1.0007955449482897,05149952302
69919,69919,5149952401,0.6774647887323944,0.004225352112676056,0.0007042253521126761,0.0,0.0,0.0014084507042253522,0.0,0.0,...,0.0007042253521126761,0.0,0.0,0.0,0.0,0.10774647887323943,0.0,0.19788732394366196,0.9985915492957745,05149952401
69920,69920,5149952402,0.654020618556701,0.006185567010309278,0.0008247422680412372,0.0,0.0,0.0012371134020618556,0.0,0.0,...,0.0008247422680412372,0.0,0.0,0.0,0.0,0.10721649484536082,0.0,0.2177319587628866,1.000824742268041,05149952402
69921,69921,5149952500,0.6979107848673066,0.011857707509881422,0.002258610954263128,0.0,0.0,0.003952569169960474,0.0,0.0,...,0.002258610954263128,0.0,0.0,0.0,0.0,0.12761151891586675,0.0,0.12987012987012986,1.0005646527385659,05149952500


# JOIN COUNT BUILDING DATA AND BUILDING STOCK DATA

In [90]:
# take df_pivot and building_stock and merge them
def merge_building_data(df_pivot, building_stock):
    # merge the dataframes
    merged_df = pd.merge(df_pivot, building_stock, on='CENSUSCODE', how='left')
    merged_df.drop(columns=['Tract'], axis=1, inplace=True)
    merged_df.drop(columns=['field_1'], axis=1, inplace=True)
    return merged_df

In [91]:
df_output = merge_building_data(df_pivot, building_stock)
df_output

Unnamed: 0,CENSUSCODE,OTHER_OTHER,RESIDENTIAL_MULTI FAMILY,RESIDENTIAL_OTHER,RESIDENTIAL_SINGLE FAMILY,W1,W2,S1L,S1M,S1H,...,PC2H,RM1L,RM1M,RM2L,RM2M,RM2H,URML,URMM,MH,Total
0,06001400100,104,22,10,1162,0.9312280701754386,0.011228070175438596,0.004912280701754386,0.0,0.0,...,0.0,0.021052631578947368,0.0,0.0014035087719298245,0.0,0.0,0.0035087719298245615,0.0,0.0,1.0007017543859649
1,06001400200,46,110,2,538,0.8804483188044832,0.0186799501867995,0.009962640099626401,0.0,0.0,...,0.0,0.0323785803237858,0.0,0.0024906600249066002,0.0,0.0,0.0062266500622665,0.0,0.0,1.0024906600249066
2,06001400300,67,416,7,1139,0.8618225134008338,0.02084574151280524,0.009529481834425254,0.0,0.0,...,0.0,0.03692674210839786,0.0,0.0023823704586063135,0.0,0.0,0.008933889219773675,0.0,0.0,1.0000000000000002
3,06001400400,57,391,3,777,0.8871315600287563,0.015815959741193385,0.005751258087706686,0.0,0.0,...,0.0,0.03163191948238677,0.0,0.0014378145219266715,0.0,0.0,0.007189072609633357,0.0,0.0,0.9985621854780734
4,06001400500,56,342,6,614,0.8730305838739574,0.014828544949026877,0.005560704355885079,0.0,0.0,...,0.0,0.03521779425393883,0.0,0.0018535681186283596,0.0,0.0,0.009267840593141797,0.0,0.0,1.0009267840593143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9108,06115040902,759,0,0,0,0.8811013767209012,0.011264080100125156,0.007509386733416771,0.0,0.0,...,0.0,0.03128911138923655,0.0,0.0025031289111389237,0.0,0.0,0.007509386733416771,0.0,0.0050062578222778474,0.997496871088861
9109,06115041001,579,90,360,1807,,,,,,...,,,,,,,,,,
9110,06115041002,270,155,628,1771,,,,,,...,,,,,,,,,,
9111,06115041101,373,86,596,903,,,,,,...,,,,,,,,,,


In [107]:
#change columns to float
# fillna forward
def clean_df(df):
    df = df.fillna(method='ffill')
    return df
df_output = clean_df(df_output)

  df = df.fillna(method='ffill')


In [108]:
df_output

Unnamed: 0,CENSUSCODE,OTHER_OTHER,RESIDENTIAL_MULTI FAMILY,RESIDENTIAL_OTHER,RESIDENTIAL_SINGLE FAMILY,W1,W2,S1L,S1M,S1H,...,PC2H,RM1L,RM1M,RM2L,RM2M,RM2H,URML,URMM,MH,Total
0,06001400100,104.0,22.0,10.0,1162.0,0.9312280701754386,0.011228070175438596,0.004912280701754386,0.0,0.0,...,0.0,0.021052631578947368,0.0,0.0014035087719298245,0.0,0.0,0.0035087719298245615,0.0,0.0,1.0007017543859649
1,06001400200,46.0,110.0,2.0,538.0,0.8804483188044832,0.0186799501867995,0.009962640099626401,0.0,0.0,...,0.0,0.0323785803237858,0.0,0.0024906600249066002,0.0,0.0,0.0062266500622665,0.0,0.0,1.0024906600249066
2,06001400300,67.0,416.0,7.0,1139.0,0.8618225134008338,0.02084574151280524,0.009529481834425254,0.0,0.0,...,0.0,0.03692674210839786,0.0,0.0023823704586063135,0.0,0.0,0.008933889219773675,0.0,0.0,1.0000000000000002
3,06001400400,57.0,391.0,3.0,777.0,0.8871315600287563,0.015815959741193385,0.005751258087706686,0.0,0.0,...,0.0,0.03163191948238677,0.0,0.0014378145219266715,0.0,0.0,0.007189072609633357,0.0,0.0,0.9985621854780734
4,06001400500,56.0,342.0,6.0,614.0,0.8730305838739574,0.014828544949026877,0.005560704355885079,0.0,0.0,...,0.0,0.03521779425393883,0.0,0.0018535681186283596,0.0,0.0,0.009267840593141797,0.0,0.0,1.0009267840593143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9108,06115040902,759.0,0.0,0.0,0.0,0.8811013767209012,0.011264080100125156,0.007509386733416771,0.0,0.0,...,0.0,0.03128911138923655,0.0,0.0025031289111389237,0.0,0.0,0.007509386733416771,0.0,0.0050062578222778474,0.997496871088861
9109,06115041001,579.0,90.0,360.0,1807.0,0.8811013767209012,0.011264080100125156,0.007509386733416771,0.0,0.0,...,0.0,0.03128911138923655,0.0,0.0025031289111389237,0.0,0.0,0.007509386733416771,0.0,0.0050062578222778474,0.997496871088861
9110,06115041002,270.0,155.0,628.0,1771.0,0.8811013767209012,0.011264080100125156,0.007509386733416771,0.0,0.0,...,0.0,0.03128911138923655,0.0,0.0025031289111389237,0.0,0.0,0.007509386733416771,0.0,0.0050062578222778474,0.997496871088861
9111,06115041101,373.0,86.0,596.0,903.0,0.8811013767209012,0.011264080100125156,0.007509386733416771,0.0,0.0,...,0.0,0.03128911138923655,0.0,0.0025031289111389237,0.0,0.0,0.007509386733416771,0.0,0.0050062578222778474,0.997496871088861


# JOIN WITH EVENT DATA


In [121]:
final_output = pd.merge(eventdata, df_output, left_on='GEOID', right_on='CENSUSCODE', how='left')
final_output.fillna(0, inplace=True)
final_output

Unnamed: 0,GEOID,max_intensity,min_intensity,mean_intensity,geometry,CENSUSCODE,OTHER_OTHER,RESIDENTIAL_MULTI FAMILY,RESIDENTIAL_OTHER,RESIDENTIAL_SINGLE FAMILY,...,PC2H,RM1L,RM1M,RM2L,RM2M,RM2H,URML,URMM,MH,Total
0,06001400100,3.8,3.4,3.562500,"POLYGON ((-122.24692 37.88544, -122.24667 37.8...",06001400100,104.0,22.0,10.0,1162.0,...,0.0,0.021052631578947368,0.0,0.0014035087719298245,0.0,0.0,0.0035087719298245615,0.0,0.0,1.0007017543859649
1,06001400200,3.6,3.4,3.511111,"POLYGON ((-122.25792 37.84261, -122.25773 37.8...",06001400200,46.0,110.0,2.0,538.0,...,0.0,0.0323785803237858,0.0,0.0024906600249066002,0.0,0.0,0.0062266500622665,0.0,0.0,1.0024906600249066
2,06001400300,3.6,3.4,3.538462,"POLYGON ((-122.26563 37.83764, -122.26557 37.8...",06001400300,67.0,416.0,7.0,1139.0,...,0.0,0.03692674210839786,0.0,0.0023823704586063135,0.0,0.0,0.008933889219773675,0.0,0.0,1.0000000000000002
3,06001400400,3.6,3.6,3.600000,"POLYGON ((-122.26183 37.84162, -122.26181 37.8...",06001400400,57.0,391.0,3.0,777.0,...,0.0,0.03163191948238677,0.0,0.0014378145219266715,0.0,0.0,0.007189072609633357,0.0,0.0,0.9985621854780734
4,06001400500,3.6,3.6,3.600000,"POLYGON ((-122.26951 37.84858, -122.26937 37.8...",06001400500,56.0,342.0,6.0,614.0,...,0.0,0.03521779425393883,0.0,0.0018535681186283596,0.0,0.0,0.009267840593141797,0.0,0.0,1.0009267840593143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2600,06115040800,3.6,3.0,3.326316,"POLYGON ((-121.51553 39.03064, -121.51533 39.0...",06115040800,454.0,128.0,136.0,1256.0,...,0.0,0.024025974025974027,0.0,0.001948051948051948,0.0,0.0,0.005844155844155844,0.0,0.04935064935064935,1.0006493506493503
2601,06115040901,3.6,3.0,3.272000,"POLYGON ((-121.58338 39.13621, -121.58319 39.1...",06115040901,388.0,109.0,491.0,1025.0,...,0.0,0.016681299385425813,0.0,0.001755926251097454,0.0,0.0,0.003511852502194908,0.0,0.3520632133450395,1.0008779631255487
2602,06115040902,3.4,3.0,3.200000,"POLYGON ((-121.47722 39.13334, -121.47709 39.1...",06115040902,759.0,0.0,0.0,0.0,...,0.0,0.03128911138923655,0.0,0.0025031289111389237,0.0,0.0,0.007509386733416771,0.0,0.0050062578222778474,0.997496871088861
2603,06115041000,3.4,2.8,3.140000,"POLYGON ((-121.63637 39.24608, -121.63629 39.2...",0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


# SAVE OUTPUT TO EVENT DIR


In [125]:
def save_output(final_output, eventid = 'nc72282711'):
    """
    Read event data from a GPKG file.
    """
    parent_dir = os.path.dirname(os.getcwd())
    event_dir = os.path.join(parent_dir, 'ShakeMaps', eventid)

    final_output.to_csv(os.path.join(event_dir, f'o3_output_{eventid}.csv'), index=False)

save_output(final_output)