In [2]:
from dotenv import load_dotenv
from sqlalchemy import create_engine
import os
import pandas as pd

load_dotenv(r"C:\Projects\storm_risk\.env")

engine = create_engine(
    f"mysql+mysqlconnector://{os.getenv('MYSQL_USER')}:{os.getenv('MYSQL_PASSWORD')}@{os.getenv('MYSQL_HOST')}/{os.getenv('MYSQL_DATABASE')}"
)

print("✅ Connected!")

✅ Connected!


In [3]:
import requests
import pandas as pd

# FEMA API base URL
base_url = "https://www.fema.gov/api/open/v2/DisasterDeclarationsSummaries"

all_records = []
skip = 0
page_size = 1000

print("Fetching FEMA data...")

while True:
    # Build the URL with pagination parameters
    url = f"{base_url}?$top={page_size}&$skip={skip}&$format=json"
    
    response = requests.get(url)
    data = response.json()
    
    # FEMA wraps records in a key called 'DisasterDeclarationsSummaries'
    records = data.get('DisasterDeclarationsSummaries', [])
    
    if not records:
        break  # no more pages, we're done
        
    all_records.extend(records)
    skip += page_size
    print(f"  Fetched {len(all_records)} records so far...")

# Convert to dataframe
fema_raw = pd.DataFrame(all_records)

print(f"\n✅ Done! Total records: {len(fema_raw)}")
print(f"Columns: {fema_raw.columns.tolist()}")
fema_raw.head()

Fetching FEMA data...
  Fetched 1000 records so far...
  Fetched 2000 records so far...
  Fetched 3000 records so far...
  Fetched 4000 records so far...
  Fetched 5000 records so far...
  Fetched 6000 records so far...
  Fetched 7000 records so far...
  Fetched 8000 records so far...
  Fetched 9000 records so far...
  Fetched 10000 records so far...
  Fetched 11000 records so far...
  Fetched 12000 records so far...
  Fetched 13000 records so far...
  Fetched 14000 records so far...
  Fetched 15000 records so far...
  Fetched 16000 records so far...
  Fetched 17000 records so far...
  Fetched 18000 records so far...
  Fetched 19000 records so far...
  Fetched 20000 records so far...
  Fetched 21000 records so far...
  Fetched 22000 records so far...
  Fetched 23000 records so far...
  Fetched 24000 records so far...
  Fetched 25000 records so far...
  Fetched 26000 records so far...
  Fetched 27000 records so far...
  Fetched 28000 records so far...
  Fetched 29000 records so far...
 

Unnamed: 0,femaDeclarationString,disasterNumber,state,declarationType,declarationDate,fyDeclared,incidentType,declarationTitle,ihProgramDeclared,iaProgramDeclared,...,placeCode,designatedArea,declarationRequestNumber,lastIAFilingDate,incidentId,region,designatedIncidentTypes,lastRefresh,hash,id
0,FM-5529-OR,5529,OR,FM,2024-08-09T00:00:00.000Z,2024,Fire,LEE FALLS FIRE,False,False,...,99067,Washington (County),24122,,2024081001,10,R,2024-08-27T18:22:14.800Z,ae87cf3c6ed795015b714af7166c7c295b2b67c7,09e3f81a-5e16-4b72-b317-1c64e0cfa59c
1,FM-5528-OR,5528,OR,FM,2024-08-06T00:00:00.000Z,2024,Fire,ELK LANE FIRE,False,False,...,99031,Jefferson (County),24116,,2024080701,10,R,2024-08-27T18:22:14.800Z,432cf0995c47e3895cea696ede5621b810460501,59983f89-30bf-4888-b21b-62e8d57d9aac
2,FM-5527-OR,5527,OR,FM,2024-08-02T00:00:00.000Z,2024,Fire,MILE MARKER 132 FIRE,False,False,...,99017,Deschutes (County),24111,,2024080301,10,R,2024-08-27T18:22:14.800Z,2f21d90cb6bc64b0d4121aa3f18d852bbb4b11fa,8d13ecf0-bc2f-496b-8c9f-b2e73da832a0
3,DR-4312-CA,4312,CA,DR,2017-05-02T00:00:00.000Z,2017,Severe Storm,FLOODING,False,False,...,60347,Resighini Rancheria (Indian Reservation),17035,,2017041001,9,,2025-03-26T20:21:32.579Z,432a3a64bdbb291ae26cf5a27a33deeabb380481,98a7c5bb-2346-45aa-a1ca-0399440d4f0b
4,DR-4251-AL,4251,AL,DR,2016-01-21T00:00:00.000Z,2016,Severe Storm,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",False,False,...,99001,Autauga (County),16003,,2015122301,4,,2025-03-27T12:21:46.559Z,dcd4ce6b37ee49875b3f1e32e9a8a16cd6a803d3,5229bbae-eee6-42b8-b277-edbafa8d6cb2


In [4]:
print(fema_raw.columns.tolist())

['femaDeclarationString', 'disasterNumber', 'state', 'declarationType', 'declarationDate', 'fyDeclared', 'incidentType', 'declarationTitle', 'ihProgramDeclared', 'iaProgramDeclared', 'paProgramDeclared', 'hmProgramDeclared', 'incidentBeginDate', 'incidentEndDate', 'disasterCloseoutDate', 'tribalRequest', 'fipsStateCode', 'fipsCountyCode', 'placeCode', 'designatedArea', 'declarationRequestNumber', 'lastIAFilingDate', 'incidentId', 'region', 'designatedIncidentTypes', 'lastRefresh', 'hash', 'id']


In [5]:
fema_raw.to_parquet(
    r"C:\Projects\storm_risk\data\raw\fema_disasters.parquet",
    index=False
)

print("✅ Raw data saved!")

✅ Raw data saved!


In [6]:
fema_clean = pd.DataFrame({
    'fips_code'        : (fema_raw['fipsStateCode'] + fema_raw['fipsCountyCode']).str.zfill(5),
    'disaster_number'  : fema_raw['disasterNumber'].astype(str),
    'declaration_date' : pd.to_datetime(fema_raw['declarationDate'], errors='coerce').dt.date,
    'disaster_type'    : fema_raw['declarationType'],
    'incident_type'    : fema_raw['incidentType'],
    'state'            : fema_raw['state'],
    'program_declared' : fema_raw['ihProgramDeclared'].astype(str)
})

# Drop rows where fips_code is null or malformed
fema_clean = fema_clean.dropna(subset=['fips_code', 'declaration_date'])
fema_clean = fema_clean[fema_clean['fips_code'].str.len() == 5]

print(f"Clean records: {len(fema_clean)}")
print(f"Date range: {fema_clean['declaration_date'].min()} to {fema_clean['declaration_date'].max()}")
fema_clean.head()

Clean records: 69634
Date range: 1953-05-02 to 2026-02-20


Unnamed: 0,fips_code,disaster_number,declaration_date,disaster_type,incident_type,state,program_declared
0,41067,5529,2024-08-09,FM,Fire,OR,False
1,41031,5528,2024-08-06,FM,Fire,OR,False
2,41017,5527,2024-08-02,FM,Fire,OR,False
3,6000,4312,2017-05-02,DR,Severe Storm,CA,False
4,1001,4251,2016-01-21,DR,Severe Storm,AL,False


In [7]:
print(fema_clean['incident_type'].value_counts().head(15))

incident_type
Severe Storm        19299
Hurricane           13721
Flood               11234
Biological           7857
Fire                 3853
Snowstorm            3707
Severe Ice Storm     2956
Tornado              1623
Drought              1292
Winter Storm         1224
Tropical Storm       1059
Coastal Storm         637
Other                 314
Freezing              301
Earthquake            228
Name: count, dtype: int64


In [8]:
counties = pd.DataFrame({
    'fips_code'   : fema_clean['fips_code'].unique()
})

# Split back into state and county components
counties['state_fips']  = counties['fips_code'].str[:2]
counties['county_fips'] = counties['fips_code'].str[2:]
counties['state_name']  = fema_clean.groupby('fips_code')['state'].first().values
counties['county_name'] = 'Unknown'  # we'll enrich this in a moment

counties = counties.dropna(subset=['fips_code'])
counties = counties[counties['fips_code'].str.len() == 5]

print(f"Unique counties: {len(counties)}")
counties.head()

Unique counties: 3324


Unnamed: 0,fips_code,state_fips,county_fips,state_name,county_name
0,41067,41,67,AL,Unknown
1,41031,41,31,AL,Unknown
2,41017,41,17,AL,Unknown
3,6000,6,0,AL,Unknown
4,1001,1,1,AL,Unknown


In [9]:
# Census Bureau county reference file
county_names_url = "https://www2.census.gov/geo/docs/reference/codes2020/national_county2020.txt"

county_ref = pd.read_csv(
    county_names_url,
    sep='|',
    dtype=str
)

print(county_ref.columns.tolist())
county_ref.head()

['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNS', 'COUNTYNAME', 'CLASSFP', 'FUNCSTAT']


Unnamed: 0,STATE,STATEFP,COUNTYFP,COUNTYNS,COUNTYNAME,CLASSFP,FUNCSTAT
0,AL,1,1,161526,Autauga County,H1,A
1,AL,1,3,161527,Baldwin County,H1,A
2,AL,1,5,161528,Barbour County,H1,A
3,AL,1,7,161529,Bibb County,H1,A
4,AL,1,9,161530,Blount County,H1,A


In [10]:
# Build fips_code from Census file the same way we did for FEMA
county_ref['fips_code'] = county_ref['STATEFP'].str.zfill(2) + county_ref['COUNTYFP'].str.zfill(3)

# Merge real county names into our counties dataframe
counties = counties.merge(
    county_ref[['fips_code', 'COUNTYNAME', 'STATE']],
    on='fips_code',
    how='left'
)

# Use the real name where available, keep 'Unknown' where not
counties['county_name'] = counties['COUNTYNAME'].fillna('Unknown')
counties['state_name']  = counties['STATE'].fillna(counties['state_name'])

# Drop the extra columns we don't need
counties = counties.drop(columns=['COUNTYNAME', 'STATE'])

print(f"Counties with real names: {counties['county_name'].ne('Unknown').sum()}")
print(f"Counties still unknown: {counties['county_name'].eq('Unknown').sum()}")
counties.head(10)

Counties with real names: 3231
Counties still unknown: 93


Unnamed: 0,fips_code,state_fips,county_fips,state_name,county_name
0,41067,41,67,OR,Washington County
1,41031,41,31,OR,Jefferson County
2,41017,41,17,OR,Deschutes County
3,6000,6,0,AL,Unknown
4,1001,1,1,AL,Autauga County
5,1005,1,5,AL,Barbour County
6,1009,1,9,AL,Blount County
7,1011,1,11,AL,Bullock County
8,1013,1,13,AL,Butler County
9,41001,41,1,OR,Baker County


In [14]:
from dotenv import load_dotenv
from sqlalchemy import create_engine
import os

load_dotenv(r"C:\Projects\storm_risk\.env")

engine = create_engine(
    "mysql+mysqlconnector://",
    connect_args={
        "host"    : os.getenv('MYSQL_HOST'),
        "user"    : os.getenv('MYSQL_USER'),
        "password": os.getenv('MYSQL_PASSWORD'),
        "database": os.getenv('MYSQL_DATABASE')
    }
)

print("✅ Connected!")

✅ Connected!


In [15]:
# Load counties first
counties.to_sql(
    name='counties',
    con=engine,
    if_exists='append',
    index=False,
    chunksize=500
)
print(f"✅ {len(counties)} counties loaded into MySQL!")

# Filter FEMA to valid FIPS only
valid_fips = set(counties['fips_code'])
fema_clean = fema_clean[fema_clean['fips_code'].isin(valid_fips)]
print(f"FEMA records after FIPS validation: {len(fema_clean)}")

# Load FEMA
fema_clean.to_sql(
    name='fema_disasters',
    con=engine,
    if_exists='append',
    index=False,
    chunksize=500
)
print(f"✅ {len(fema_clean)} FEMA disaster records loaded into MySQL!")

✅ 3324 counties loaded into MySQL!
FEMA records after FIPS validation: 69634
✅ 69634 FEMA disaster records loaded into MySQL!


In [16]:
counties_check = pd.read_sql("SELECT COUNT(*) as total FROM counties", engine)
fema_check     = pd.read_sql("SELECT COUNT(*) as total FROM fema_disasters", engine)

print(f"Counties in MySQL: {counties_check['total'].values[0]}")
print(f"FEMA records in MySQL: {fema_check['total'].values[0]}")

Counties in MySQL: 3324
FEMA records in MySQL: 69634
