In [1]:
import pandas as pd
import warnings # Suppress all warnings 

In [2]:
import warnings # Suppress all warnings 
warnings.filterwarnings("ignore")
data = '../data/raw/DisasterDeclarationsSummaries.csv'
raw_df = pd.read_csv(data)

In [3]:
initial_df = raw_df.copy()

In [4]:
initial_df.drop(columns = ['femaDeclarationString', 'declarationType', 'declarationDate', 'declarationTitle', 'ihProgramDeclared', 
                           'iaProgramDeclared', 'placeCode', 'declarationRequestNumber', 'lastIAFilingDate', 'incidentId', 
                           'region', 'designatedIncidentTypes', 'lastRefresh', 'hash', 'id', 'paProgramDeclared', 'hmProgramDeclared',
                          'incidentBeginDate', 'incidentEndDate', 'disasterCloseoutDate', 'tribalRequest', 'fipsStateCode', 'fipsCountyCode'],
                inplace=True)

In [5]:
filtered_df = initial_df.copy()
filtered_df = initial_df[initial_df['fyDeclared'] >= 2000]
filtered_df

Unnamed: 0,disasterNumber,state,fyDeclared,incidentType,designatedArea
0,5530,NV,2024,Fire,Washoe (County)
1,5529,OR,2024,Fire,Washington (County)
2,5528,OR,2024,Fire,Jefferson (County)
3,5527,OR,2024,Fire,Deschutes (County)
4,5526,CO,2024,Fire,Jefferson (County)
...,...,...,...,...,...
51574,1305,NH,2000,Hurricane,Belknap (County)
51575,1305,NH,2000,Hurricane,Cheshire (County)
51576,1305,NH,2000,Hurricane,Grafton (County)
51577,1304,AZ,2000,Severe Storm,Cochise (County)


In [6]:
filtered_df['incidentType'].value_counts()

incidentType
Severe Storm           14190
Hurricane              12051
Biological              7857
Flood                   3775
Fire                    2930
Severe Ice Storm        2746
Snowstorm               1408
Tropical Storm          1046
Coastal Storm            551
Tornado                  434
Other                    298
Earthquake               186
Winter Storm             117
Freezing                  77
Typhoon                   58
Mud/Landslide             40
Dam/Levee Break           11
Chemical                   9
Tsunami                    9
Terrorist                  5
Straight-Line Winds        2
Volcanic Eruption          2
Toxic Substances           1
Drought                    1
Name: count, dtype: int64

In [7]:
filtered_df.rename(columns = {'fyDeclared': 'year'}, inplace = True)

In [8]:
filtered_df

Unnamed: 0,disasterNumber,state,year,incidentType,designatedArea
0,5530,NV,2024,Fire,Washoe (County)
1,5529,OR,2024,Fire,Washington (County)
2,5528,OR,2024,Fire,Jefferson (County)
3,5527,OR,2024,Fire,Deschutes (County)
4,5526,CO,2024,Fire,Jefferson (County)
...,...,...,...,...,...
51574,1305,NH,2000,Hurricane,Belknap (County)
51575,1305,NH,2000,Hurricane,Cheshire (County)
51576,1305,NH,2000,Hurricane,Grafton (County)
51577,1304,AZ,2000,Severe Storm,Cochise (County)


In [9]:
filtered_df['year'].unique()

array([2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014,
       2013, 2012, 2009, 2006, 2011, 2010, 2003, 2008, 2025, 2007, 2005,
       2004, 2002, 2001, 2000])

In [10]:
def clean_designated_area(area_name, state_name):
    # Remove parenthesis
    area_name = area_name.replace('(', '').replace(')', '').strip()
    # Add the state (from the state column)
    formatted_area = f"{area_name}, {state_name}"
    return formatted_area

# Apply the cleaning function on the 'designatedArea' and 'state' columns
filtered_df['disaster_area'] = filtered_df.apply(lambda row: clean_designated_area(row['designatedArea'], row['state']), axis=1)

filtered_df

Unnamed: 0,disasterNumber,state,year,incidentType,designatedArea,disaster_area
0,5530,NV,2024,Fire,Washoe (County),"Washoe County, NV"
1,5529,OR,2024,Fire,Washington (County),"Washington County, OR"
2,5528,OR,2024,Fire,Jefferson (County),"Jefferson County, OR"
3,5527,OR,2024,Fire,Deschutes (County),"Deschutes County, OR"
4,5526,CO,2024,Fire,Jefferson (County),"Jefferson County, CO"
...,...,...,...,...,...,...
51574,1305,NH,2000,Hurricane,Belknap (County),"Belknap County, NH"
51575,1305,NH,2000,Hurricane,Cheshire (County),"Cheshire County, NH"
51576,1305,NH,2000,Hurricane,Grafton (County),"Grafton County, NH"
51577,1304,AZ,2000,Severe Storm,Cochise (County),"Cochise County, AZ"


In [11]:
filtered_df.isnull().sum()

disasterNumber    0
state             0
year              0
incidentType      0
designatedArea    0
disaster_area     0
dtype: int64

In [12]:
duplicates = filtered_df[filtered_df.duplicated()]
print(duplicates)

       disasterNumber state  year incidentType  \
5235             4529    NM  2020   Biological   
5256             4529    NM  2020   Biological   
5262             4529    NM  2020   Biological   
5264             4529    NM  2020   Biological   
5404             4527    SD  2020   Biological   
5433             4527    SD  2020   Biological   
5450             4527    SD  2020   Biological   
5453             4527    SD  2020   Biological   
5457             4527    SD  2020   Biological   
5459             4527    SD  2020   Biological   
5464             4527    SD  2020   Biological   
5468             4527    SD  2020   Biological   
5471             4527    SD  2020   Biological   
5484             4527    SD  2020   Biological   
5488             4527    SD  2020   Biological   
5507             4527    SD  2020   Biological   
5512             4527    SD  2020   Biological   
5857             4522    ME  2020   Biological   
5868             4522    ME  2020   Biological   


In [13]:
filtered_df = filtered_df.drop_duplicates()
filtered_df

Unnamed: 0,disasterNumber,state,year,incidentType,designatedArea,disaster_area
0,5530,NV,2024,Fire,Washoe (County),"Washoe County, NV"
1,5529,OR,2024,Fire,Washington (County),"Washington County, OR"
2,5528,OR,2024,Fire,Jefferson (County),"Jefferson County, OR"
3,5527,OR,2024,Fire,Deschutes (County),"Deschutes County, OR"
4,5526,CO,2024,Fire,Jefferson (County),"Jefferson County, CO"
...,...,...,...,...,...,...
51574,1305,NH,2000,Hurricane,Belknap (County),"Belknap County, NH"
51575,1305,NH,2000,Hurricane,Cheshire (County),"Cheshire County, NH"
51576,1305,NH,2000,Hurricane,Grafton (County),"Grafton County, NH"
51577,1304,AZ,2000,Severe Storm,Cochise (County),"Cochise County, AZ"


In [14]:
filtered_df['disasterNumber'].nunique()

3282

In [15]:
import requests
from geopy.distance import geodesic
import pandas as pd
import time

# Replace with your filtered DataFrame
filtered_df = filtered_df.copy()  # Ensure filtered_df is defined elsewhere

# Store Google Maps API Key
api_key = "your-api-key-here"

# Function to get coordinates from Google Maps Geocoding API
def get_coordinates(address, api_key):
    try:
        url = f"https://maps.googleapis.com/maps/api/geocode/json?address={address}&key={api_key}"
        response = requests.get(url, timeout=10)  # Add timeout
        data = response.json()
        if data['status'] == 'OK':
            lat = data['results'][0]['geometry']['location']['lat']
            lng = data['results'][0]['geometry']['location']['lng']
            return lat, lng
        print(f"Geocode API error for address {address}: {data.get('error_message', 'No error message')}")
    except Exception as e:
        print(f"Exception occurred while fetching coordinates for address {address}: {e}")
    return None, None

# Function to get distance and travel time from Google Maps Directions API
def get_distance_time(origin, destination, api_key):
    try:
        url = f"https://maps.googleapis.com/maps/api/directions/json?origin={origin}&destination={destination}&key={api_key}"
        response = requests.get(url, timeout=10)  # Add timeout
        data = response.json()
        if data['status'] == 'OK':
            distance = data['routes'][0]['legs'][0]['distance']['value'] / 1000  # Convert to km
            duration = data['routes'][0]['legs'][0]['duration']['value'] / 60  # Convert to minutes
            return distance, duration
        print(f"Directions API error for origin {origin} and destination {destination}: {data.get('error_message', 'No error message')}")
    except Exception as e:
        print(f"Exception occurred while fetching distance and time for origin {origin} and destination {destination}: {e}")
    return None, None

#EOC names and addresses

eoc_addresses = {
    # California
    "Los Angeles Hub, CA": "500 E. Temple St., Los Angeles, CA 90012", 
    "San Francisco Hub, CA": "698 2nd St., San Francisco, CA 94107", 
    "Sacramento Hub, CA": "3650 Schriever Ave., Mather, CA 95655", 
    "San Diego Hub, CA": "5555 Overland Ave., San Diego, CA 92123", 
    "Fresno Hub, CA": "1221 Fulton Mall, Fresno, CA 93721", 
    "Oakland Hub, CA": "150 Frank H. Ogawa Plaza, Oakland, CA 94612", 
    "Riverside Hub, CA": "4080 Lemon St., Riverside, CA 92501", 
    "Long Beach Hub, CA": "2950 Redondo Ave., Long Beach, CA 90806",

    # Texas
    "Austin Hub, TX": "1033 La Posada Dr., Austin, TX 78752", 
    "Dallas Hub, TX": "1500 Marilla St., Dallas, TX 75201", 
    "Houston Hub, TX": "1001 Fannin St., Houston, TX 77002", 
    "San Antonio Hub, TX": "1619 San Pedro Ave., San Antonio, TX 78212", 

    # Florida
    "Tallahassee Hub, FL": "2555 Shumard Oak Blvd., Tallahassee, FL 32399", 
    "Miami Hub, FL": "4000 W Flagler St., Miami, FL 33134", 
    "Orlando Hub, FL": "400 W Robinson St., Orlando, FL 32801", 

    # New York
    "Albany Hub, NY": "1220 Washington Ave., Albany, NY 12226", 
    "New York City Hub, NY": "9 Metrotech Center, Brooklyn, NY 11201", 

    # Illinois
    "Springfield Hub, IL": "2200 S Dirksen Pkwy, Springfield, IL 62703", 
    "Chicago Hub, IL": "1300 W 22nd St., Oak Brook, IL 60523",

    # Pennsylvania
    "Harrisburg Hub, PA": "101 S. 7th St., Harrisburg, PA 17104", 
    "Philadelphia Hub, PA": "3600 Roosevelt Blvd, Philadelphia, PA 19149",

    # Georgia
    "Atlanta Hub, GA": "935 E Confederate Ave, Atlanta, GA 30316", 

    # Ohio
    "Columbus Hub, OH": "2855 W Dublin Granville Rd., Columbus, OH 43235",

    # Washington
    "Olympia Hub, WA": "106 11th Ave SW, Olympia, WA 98504", 
    "Seattle Hub, WA": "711 2nd Ave., Seattle, WA 98104",

    # Michigan
    "Lansing Hub, MI": "4000 Collins Rd, Lansing, MI 48910", 

    # Virginia
    "Richmond Hub, VA": "10501 Trade Ct, Richmond, VA 23236",

    # Arizona
    "Phoenix Hub, AZ": "5636 E McKinley St., Phoenix, AZ 85008",

    # North Carolina
    "Raleigh Hub, NC": "1636 Gold Star Dr., Raleigh, NC 27607",

    # Minnesota
    "St. Paul Hub, MN": "444 Cedar St, St. Paul, MN 55101",

    # Maine
    "Augusta Hub, ME": "45 Commerce Dr, Augusta, ME 04330",

    # Colorado
    "Denver Hub, CO": "745 W Colfax Ave, Denver, CO 80204",

    # Nevada
    "Carson City Hub, NV": "2478 Fairview Dr., Carson City, NV 89701",

    # Louisiana
    "Baton Rouge Hub, LA": "3500 N. Acadian Thruway, Baton Rouge, LA 70805",

    # Tennessee
    "Nashville Hub, TN": "600 W. Main St., Nashville, TN 37208",

    # Maryland
    "Baltimore Hub, MD": "2930 4th St., Baltimore, MD 21225",

    # Arkansas
    "Little Rock Hub, AR": "10800 Financial Center Parkway, Little Rock, AR 72211",

    # Oklahoma
    "Oklahoma City Hub, OK": "1700 SW 7th St., Oklahoma City, OK 73108",

    # Wisconsin
    "Madison Hub, WI": "2400 Wright St., Madison, WI 53704",

    # South Carolina
    "Columbia Hub, SC": "2920 Broad River Rd., Columbia, SC 29210",

    # Kansas
    "Topeka Hub, KS": "2800 SW Topeka Blvd, Topeka, KS 66611",

    # Alabama
    "Montgomery Hub, AL": "5095 Rev. Abraham Woods Jr. Blvd, Montgomery, AL 36108",

    # Connecticut
    "Hartford Hub, CT": "25 Sigourney St., Hartford, CT 06105",

    # New Jersey
    "Trenton Hub, NJ": "4001 S. Broad St., Trenton, NJ 08648",

    # South Dakota
    "Pierre Hub, SD": "500 E. Capitol Ave., Pierre, SD 57501",

    # New Mexico
    "Santa Fe Hub, NM": "6200 Brown Rd., Santa Fe, NM 87507",

    # Kentucky
    "Frankfort Hub, KY": "1311 US Hwy 127 S, Frankfort, KY 40601",

    # Nebraska
    "Lincoln Hub, NE": "1300 15th St, Lincoln, NE 68508",

    # Montana
    "Helena Hub, MT": "1201 E 6th Ave, Helena, MT 59620",

    # Wyoming
    "Cheyenne Hub, WY": "5500 Bishop Blvd., Cheyenne, WY 82006",

    # Idaho
    "Boise Hub, ID": "4040 W. Guard St., Boise, ID 83705",

    # Mississippi
    "Jackson Hub, MS": "570 E. Woodrow Wilson Dr., Jackson, MS 39216",

    # Rhode Island
    "Providence Hub, RI": "1600 Division St., Cranston, RI 02920",

    # Delaware
    "Dover Hub, DE": "165 Brick Store Landing Rd., Dover, DE 19901",

    # Vermont
    "Montpelier Hub, VT": "10 Baldwin St., Montpelier, VT 05602",

    # Hawaii
    "Honolulu Hub, HI": "925 Dillingham Blvd, Honolulu, HI 96817",

    # Alaska
    "Anchorage Hub, AK": "550 W 7th Ave, Anchorage, AK 99501",

    # Indiana
    "Indianapolis Hub, IN": "302 W Washington St., Indianapolis, IN 46204",

    # Missouri
    "Jefferson City Hub, MO": "2302 E McCarty St, Jefferson City, MO 65101",

    # Utah
    "Salt Lake City Hub, UT": "1600 W North Temple, Salt Lake City, UT 84116",

    # North Dakota
    "Bismarck Hub, ND": "4201 Coleman St, Bismarck, ND 58503",

    # West Virginia
    "Charleston Hub, WV": "1900 Kanawha Blvd. E, Charleston, WV 25305",

}


# Pre-fetch coordinates for EOCs
eoc_coordinates = {}
for hub, address in eoc_addresses.items():
    coords = get_coordinates(address, api_key)
    if coords != (None, None):
        eoc_coordinates[hub] = coords
    else:
        print(f"Skipping {hub} due to missing coordinates.")

# Add new columns to the DataFrame
filtered_df['EOC'] = None
filtered_df['Disaster Lat'] = None
filtered_df['Disaster Lng'] = None
filtered_df['EOC Lat'] = None
filtered_df['EOC Lng'] = None
filtered_df['Distance (km)'] = None
filtered_df['Duration (minutes)'] = None

# Calculate closest EOC for each disaster area
for idx, row in filtered_df.iterrows():
    disaster_area = row['disaster_area']
    print(f"Processing row {idx}: {disaster_area}")
    
    disaster_coords = get_coordinates(disaster_area, api_key)
    if disaster_coords != (None, None):
        closest_eoc = None
        min_distance = float('inf')
        for eoc, eoc_coords in eoc_coordinates.items():
            try:
                distance = geodesic(disaster_coords, eoc_coords).kilometers
                if distance < min_distance:
                    min_distance = distance
                    closest_eoc = eoc
            except Exception as e:
                print(f"Error calculating geodesic distance for {disaster_area} and {eoc}: {e}")
        
        if closest_eoc:
            travel_distance, travel_duration = get_distance_time(
                origin=f"{eoc_coordinates[closest_eoc][0]},{eoc_coordinates[closest_eoc][1]}",
                destination=f"{disaster_coords[0]},{disaster_coords[1]}",
                api_key=api_key
            )
            filtered_df.at[idx, 'EOC'] = closest_eoc
            filtered_df.at[idx, 'Disaster Lat'] = disaster_coords[0]
            filtered_df.at[idx, 'Disaster Lng'] = disaster_coords[1]
            filtered_df.at[idx, 'EOC Lat'] = eoc_coordinates[closest_eoc][0]
            filtered_df.at[idx, 'EOC Lng'] = eoc_coordinates[closest_eoc][1]
            filtered_df.at[idx, 'Distance (km)'] = travel_distance
            filtered_df.at[idx, 'Duration (minutes)'] = travel_duration
        else:
            print(f"No valid EOC found for {disaster_area}.")
    else:
        print(f"Skipping disaster area {disaster_area} due to missing coordinates.")

    # Introduce a small delay to avoid exceeding API rate limits
    time.sleep(0.2)

print(filtered_df.head())

Processing row 0: Washoe County, NV
Processing row 1: Washington County, OR
Processing row 2: Jefferson County, OR
Processing row 3: Deschutes County, OR
Processing row 4: Jefferson County, CO
Processing row 5: Boulder County, CO
Processing row 6: Larimer County, CO
Processing row 7: Larimer County, CO
Processing row 8: Platte County, WY
Processing row 9: Kern County, CA
Processing row 10: Yakima County, WA
Processing row 11: Nez Perce Indian Reservation, ID
Processing row 12: Latah County, ID
Processing row 13: Nez Perce County, ID
Processing row 14: Butte County, CA
Processing row 15: Tehama County, CA
Processing row 16: Colville Indian Reservation, WA
Processing row 17: Ferry County, WA
Processing row 18: Okanogan County, WA
Processing row 19: Yakama Reservation, WA
Processing row 20: Yakima County, WA
Processing row 21: Baker County, OR
Processing row 22: Malheur County, OR
Processing row 23: Riverside County, CA
Processing row 24: Grant County, OR
Processing row 25: Morrow County,

In [32]:
filtered_df

Unnamed: 0,disasterNumber,state,year,incidentType,designatedArea,disaster_area,EOC,Disaster Lat,Disaster Lng,EOC Lat,EOC Lng,Distance (km),Duration (minutes)
0,5530,NV,2024,Fire,Washoe (County),"Washoe County, NV","Carson City Hub, NV",40.560839,-119.603549,39.15134,-119.741247,250.342,221.866667
1,5529,OR,2024,Fire,Washington (County),"Washington County, OR","Olympia Hub, WA",45.546962,-123.138602,47.038257,-122.901219,223.795,133.516667
2,5528,OR,2024,Fire,Jefferson (County),"Jefferson County, OR","Olympia Hub, WA",44.667332,-121.178579,47.038257,-122.901219,359.142,229.633333
3,5527,OR,2024,Fire,Deschutes (County),"Deschutes County, OR","Olympia Hub, WA",43.83251,-121.261654,47.038257,-122.901219,470.411,313.866667
4,5526,CO,2024,Fire,Jefferson (County),"Jefferson County, CO","Denver Hub, CO",39.58003,-105.266293,39.740402,-104.997531,43.354,35.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51574,1305,NH,2000,Hurricane,Belknap (County),"Belknap County, NH","Montpelier Hub, VT",43.502425,-71.407394,44.262814,-72.582793,195.488,135.233333
51575,1305,NH,2000,Hurricane,Cheshire (County),"Cheshire County, NH","Albany Hub, NY",42.925938,-72.236379,42.684315,-73.816271,167.051,137.633333
51576,1305,NH,2000,Hurricane,Grafton (County),"Grafton County, NH","Montpelier Hub, VT",43.908793,-71.825994,44.262814,-72.582793,115.936,104.0
51577,1304,AZ,2000,Severe Storm,Cochise (County),"Cochise County, AZ","Phoenix Hub, AZ",31.828458,-109.949686,33.456182,-111.979376,309.532,193.95


In [42]:
filtered_df.isnull().sum()

disasterNumber           0
state                    0
year                     0
incidentType             0
designatedArea           0
disaster_area            0
EOC                    381
Disaster Lat           381
Disaster Lng           381
EOC Lat                381
EOC Lng                381
Distance (km)         2796
Duration (minutes)    2796
dtype: int64

In [64]:
filtered_df = filtered_df.dropna()

In [71]:
filtered_df.to_csv('../data/clean/fema_df_coords.csv')