Historical Flood Data

In [1]:
import pandas as pd
# historical flood data
def clean_flood_data():
    flood_df = pd.read_excel('FloodArchive.xlsx', engine='openpyxl')

    #filter only for United States
    flood_df = flood_df[flood_df['Country'] == 'USA']
    #add a zip code based on the long and lat
    
    return flood_df

flood_df = clean_flood_data()

In [2]:
print(f'USA number of flood incidents: {len(flood_df)}')
print(flood_df.head())
#save as csv
flood_df.to_csv('united_states_floods.csv', index=False)


USA number of flood incidents: 477
    ID GlideNumber Country OtherCountry      long      lat       Area  \
8    9           0     USA            0  -85.1742  40.6691  210527.96   
10  11           0     USA            0  -89.5537  40.6814   26266.14   
11  12           0     USA            0 -108.0930  35.3824   26527.13   
12  13           0     USA            0  -96.7845  29.6044  141508.00   
13  14           0     USA            0  -83.5377  42.0122   16883.54   

        Began      Ended Validation  Dead  Displaced          MainCause  \
8  1985-02-22 1985-03-01       News     7       2250  Rain and snowmelt   
10 1985-03-03 1985-03-08       News     4       2400  Rain and snowmelt   
11 1985-03-13 1985-03-14       News     0         80  Rain and snowmelt   
12 1985-03-14 1985-03-15       News     0          0         Heavy rain   
13 1985-03-30 1985-03-31       News     0        300         Heavy rain   

    Severity  
8        2.0  
10       2.0  
11       1.0  
12       1.0  


In [3]:
# !pip install folium
import folium
import math

#visual representation of FLOOD DATA
map_usa = folium.Map(location=[39.8283, -98.5795], zoom_start=5)
severity_colors = {
    1.0: "yellow", #large flood events: 1-2 decades-long reported interval since the last similar event
    1.5: "orange", #very large events: greater than 2 decades but less than 100 year estimated recurrence interval
    2.0: "red" #Extreme events: with an estimated recurrence interval greater than 100 years.
}

# Add a marker for each flood occurrence
for index, row in flood_df.iterrows():
    # Add a circle marker at each flood location (lat, long)
    folium.CircleMarker(
        location=[row['lat'], row['long']],
        popup=f"ID: {row['ID']} | Severity: {row['Severity']} | Displaced: {row['Displaced']} | Date: {row['Began']}",
        color=severity_colors[row['Severity']] ,
        fill=True,
        fill_opacity=0.6
    ).add_to(map_usa)

# Save map to HTML file
map_usa.save('flood_map.html')


In [None]:
# !pip install uszipcode

#add zipcode column to flood data
from uszipcode import SearchEngine

search = SearchEngine()

def get_zipcode(lat, lon):
    result = search.by_coordinates(lat, lon)
    if result:
        return result[0].zipcode
    return None

# Apply the get_zipcode function to each row and create a new column 'zipcode'
flood_df['zipcode'] = flood_df.apply(lambda row: get_zipcode(row['lat'], row['long']), axis=1)


In [13]:
#save as csv
pd.set_option('display.max_columns', None)
print(flood_df.head())
flood_df.to_csv('united_states_floods.csv', index=False)

    ID GlideNumber Country OtherCountry      long      lat       Area  \
8    9           0     USA            0  -85.1742  40.6691  210527.96   
10  11           0     USA            0  -89.5537  40.6814   26266.14   
11  12           0     USA            0 -108.0930  35.3824   26527.13   
12  13           0     USA            0  -96.7845  29.6044  141508.00   
13  14           0     USA            0  -83.5377  42.0122   16883.54   

        Began      Ended Validation  Dead  Displaced          MainCause  \
8  1985-02-22 1985-03-01       News     7       2250  Rain and snowmelt   
10 1985-03-03 1985-03-08       News     4       2400  Rain and snowmelt   
11 1985-03-13 1985-03-14       News     0         80  Rain and snowmelt   
12 1985-03-14 1985-03-15       News     0          0         Heavy rain   
13 1985-03-30 1985-03-31       News     0        300         Heavy rain   

    Severity zipcode  
8        2.0   46781  
10       2.0   61611  
11       1.0   87045  
12       1.0   789

Historical Housing Data

In [22]:
valid_zipcodes = flood_df[~flood_df['zipcode'].isna()]['zipcode'].unique()
def clean_housing_data():
    df = pd.read_csv('hpi_at_bdl_zip5.csv', dtype={'Five-Digit ZIP Code': str})
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    #zipcodes from flood data
    df = df[df['Five-Digit ZIP Code'].isin(valid_zipcodes)]
    return df

df_housing = clean_housing_data()

In [23]:
print(df_housing.head(10))
#save as csv
df_housing.to_csv('united_states_housing.csv', index=False)


     Five-Digit ZIP Code  Year  Annual Change (%)     HPI  HPI with 1990 base  \
2401               01253  2001                NaN  100.00                 NaN   
2402               01253  2002               9.38  109.38                 NaN   
2403               01253  2003               2.17  111.75                 NaN   
2404               01253  2004              21.80  136.12                 NaN   
2405               01253  2005               8.09  147.13                 NaN   
2406               01253  2006              12.92  166.14                 NaN   
2407               01253  2007              -2.73  161.60                 NaN   
2408               01253  2008              -1.61  159.00                 NaN   
2409               01253  2009              -2.66  154.77                 NaN   
2410               01253  2010               0.33  155.28                 NaN   

      HPI with 2000 base  
2401                 NaN  
2402                 NaN  
2403                 NaN  


Random Forest Regressor

In [None]:
#code for predicing HPI based on flood data