# FEMA Disaster Declarations Summaries
Author: Mark Bauer

In [1]:
# import packages
import duckdb
from datetime import datetime

In [2]:
# reproducibility
%reload_ext watermark
%watermark -v -p duckdb

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.6.0

duckdb: 1.0.0



In [3]:
# data retrieved
current_date = datetime.now()
print(f"The data was retrieved on {current_date.strftime('%Y-%m-%d')}.")

The data was retrieved on 2024-10-07.


# OpenFEMA Dataset: Disaster Declarations Summaries - v2

## Dataset
Federal Emergency Management Agency (FEMA), OpenFEMA Dataset: Disaster Declarations Summaries - v2. Retrieved from https://www.fema.gov/openfema-data-page/disaster-declarations-summaries-v2. This product uses the FEMA OpenFEMA API, but is not endorsed by FEMA. The Federal Government or FEMA cannot vouch for the data or analyses derived from these data after the data have been retrieved from the Agency's website(s).

Read more about [OpenFEMA Terms and Conditions](https://www.fema.gov/about/openfema/terms-conditions).

## Dataset Description
>Disaster Declarations Summaries is a summarized dataset describing all federally declared disasters. This dataset lists all official FEMA Disaster Declarations, beginning with the first disaster declaration in 1953 and features all three disaster declaration types: major disaster, emergency, and fire management assistance. The dataset includes declared recovery programs and geographic areas (county not available before 1964; Fire Management records are considered partial due to historical nature of the dataset).

Source: [OpenFEMA Dataset: Disaster Declarations Summaries - v2](https://www.fema.gov/openfema-data-page/disaster-declarations-summaries-v2)

## Disaster Delcarations
For more information on the disaster declaration process:
- Information about disasters: https://www.fema.gov/disasters
- How a disaster is declared: https://www.fema.gov/disasters/how-declared

# Additional Resources
To examine other cool data visualizations about this data, visit FEMA's [Disaster Declarations for States and Counties](https://www.fema.gov/data-visualization/disaster-declarations-states-and-counties) page.

# Read In Data

In [4]:
# create a duckdb database connection
con = duckdb.connect()

# create disaster_declarations table from OpenFEMA parquet file
con.execute("""
    CREATE TABLE disaster_declarations
    AS FROM read_parquet('https://www.fema.gov/api/open/v2/DisasterDeclarationsSummaries.parquet')
"""
)

# sanity check of table
sql = """
    SELECT *
    FROM disaster_declarations
    LIMIT 10
"""

con.sql(sql)

┌──────────────────────┬────────────────┬─────────┬───┬──────────────────────┬──────────────────────┐
│          id          │ disasterNumber │  state  │ … │     lastRefresh      │         hash         │
│       varchar        │     int16      │ varchar │   │      timestamp       │       varchar        │
├──────────────────────┼────────────────┼─────────┼───┼──────────────────────┼──────────────────────┤
│ f15a7a79-f1c3-41bb…  │           5530 │ NV      │ … │ 2024-08-27 18:22:1…  │ 5d07e7c51bb300bfbe…  │
│ 09e3f81a-5e16-4b72…  │           5529 │ OR      │ … │ 2024-08-27 18:22:1…  │ ae87cf3c6ed795015b…  │
│ 59983f89-30bf-4888…  │           5528 │ OR      │ … │ 2024-08-27 18:22:1…  │ 432cf0995c47e3895c…  │
│ 8d13ecf0-bc2f-496b…  │           5527 │ OR      │ … │ 2024-08-27 18:22:1…  │ 2f21d90cb6bc64b0d4…  │
│ 17c24d4a-49a9-4cac…  │           5526 │ CO      │ … │ 2024-08-27 18:22:1…  │ e753ba692156f389db…  │
│ f1140a27-cb85-404c…  │           5525 │ CO      │ … │ 2024-08-27 18:22:1…  │ b1f

# Describe and Summarize Data
Basic information and summary statistics of `disaster_declarations` table.

In [5]:
# count rows
con.sql("SELECT COUNT(id) AS count_rows FROM disaster_declarations")

┌────────────┐
│ count_rows │
│   int64    │
├────────────┤
│      67040 │
└────────────┘

In [6]:
# examine column datatypes
sql = "DESCRIBE SELECT * FROM disaster_declarations"
describe_relation = con.sql(sql)

# select desired columns
(con
 .sql("SELECT column_name, column_type FROM describe_relation")
 .show(max_rows=50)
)

┌──────────────────────────┬─────────────┐
│       column_name        │ column_type │
│         varchar          │   varchar   │
├──────────────────────────┼─────────────┤
│ id                       │ VARCHAR     │
│ disasterNumber           │ SMALLINT    │
│ state                    │ VARCHAR     │
│ femaDeclarationString    │ VARCHAR     │
│ declarationType          │ VARCHAR     │
│ declarationDate          │ DATE        │
│ fyDeclared               │ SMALLINT    │
│ incidentType             │ VARCHAR     │
│ declarationTitle         │ VARCHAR     │
│ ihProgramDeclared        │ BOOLEAN     │
│ iaProgramDeclared        │ BOOLEAN     │
│ paProgramDeclared        │ BOOLEAN     │
│ hmProgramDeclared        │ BOOLEAN     │
│ incidentBeginDate        │ DATE        │
│ incidentEndDate          │ DATE        │
│ disasterCloseoutDate     │ DATE        │
│ tribalRequest            │ BOOLEAN     │
│ fipsStateCode            │ VARCHAR     │
│ fipsCountyCode           │ VARCHAR     │
│ placeCode

In [7]:
# summary statistics of columns
sql = "SUMMARIZE SELECT * FROM disaster_declarations"
summarize_relation = con.sql(sql)

# fetch rows as a pandas df for readability
summarize_relation.df()

Unnamed: 0,column_name,column_type,min,max,approx_unique,avg,std,q25,q50,q75,count,null_percentage
0,id,VARCHAR,00002fed-4c4b-49c0-8031-6e9749819fde,ffffdc28-ff85-43c7-91bb-f40fc4932471,65567,,,,,,67040,0.0
1,disasterNumber,SMALLINT,1,5538,5085,2656.4217183770884,1428.577729000742,1358.0,3133.0,3702.0,67040,0.0
2,state,VARCHAR,AK,WY,59,,,,,,67040,0.0
3,femaDeclarationString,VARCHAR,DR-1-GA,FM-5538-CA,5024,,,,,,67040,0.0
4,declarationType,VARCHAR,DR,FM,3,,,,,,67040,0.0
5,declarationDate,DATE,1953-05-02,2024-10-07,3518,,,,,,67040,0.0
6,fyDeclared,SMALLINT,1953,2025,74,2005.193675417661,14.99484505880495,1997.0,2008.0,2019.0,67040,0.0
7,incidentType,VARCHAR,Biological,Winter Storm,26,,,,,,67040,0.0
8,declarationTitle,VARCHAR,HURRICANE EARL,ZOGG FIRE,2350,,,,,,67040,0.0
9,ihProgramDeclared,BOOLEAN,false,true,2,,,,,,67040,0.0


In [8]:
# examine NULL percentage
sql = """
    SELECT column_name, column_type, null_percentage
    FROM summarize_relation
    ORDER BY null_percentage DESC
"""

con.sql(sql).show(max_rows=50)

┌──────────────────────────┬─────────────┬─────────────────┐
│       column_name        │ column_type │ null_percentage │
│         varchar          │   varchar   │  decimal(9,2)   │
├──────────────────────────┼─────────────┼─────────────────┤
│ lastIAFilingDate         │ DATE        │           71.90 │
│ designatedIncidentTypes  │ VARCHAR     │           71.32 │
│ disasterCloseoutDate     │ DATE        │           23.61 │
│ incidentEndDate          │ DATE        │            1.79 │
│ id                       │ VARCHAR     │            0.00 │
│ disasterNumber           │ SMALLINT    │            0.00 │
│ state                    │ VARCHAR     │            0.00 │
│ femaDeclarationString    │ VARCHAR     │            0.00 │
│ declarationType          │ VARCHAR     │            0.00 │
│ declarationDate          │ DATE        │            0.00 │
│ fyDeclared               │ SMALLINT    │            0.00 │
│ incidentType             │ VARCHAR     │            0.00 │
│ declarationTitle      

# Preview Values
Scan table twice to preview all columns.

In [9]:
sql = """
    SELECT *
    FROM disaster_declarations
    LIMIT 5   
"""

# examine first 15 columns
con.sql(sql).df().iloc[:, :15]

Unnamed: 0,id,disasterNumber,state,femaDeclarationString,declarationType,declarationDate,fyDeclared,incidentType,declarationTitle,ihProgramDeclared,iaProgramDeclared,paProgramDeclared,hmProgramDeclared,incidentBeginDate,incidentEndDate
0,f15a7a79-f1c3-41bb-8a5c-c05fbae34423,5530,NV,FM-5530-NV,FM,2024-08-12,2024,Fire,GOLD RANCH FIRE,False,False,True,True,2024-08-11,NaT
1,09e3f81a-5e16-4b72-b317-1c64e0cfa59c,5529,OR,FM-5529-OR,FM,2024-08-09,2024,Fire,LEE FALLS FIRE,False,False,True,True,2024-08-08,NaT
2,59983f89-30bf-4888-b21b-62e8d57d9aac,5528,OR,FM-5528-OR,FM,2024-08-06,2024,Fire,ELK LANE FIRE,False,False,True,True,2024-08-04,NaT
3,8d13ecf0-bc2f-496b-8c9f-b2e73da832a0,5527,OR,FM-5527-OR,FM,2024-08-02,2024,Fire,MILE MARKER 132 FIRE,False,False,True,True,2024-08-02,NaT
4,17c24d4a-49a9-4cac-9322-e5427c4cdfeb,5526,CO,FM-5526-CO,FM,2024-08-01,2024,Fire,QUARRY FIRE,False,False,True,True,2024-07-30,NaT


In [10]:
# examine last 15 columns
con.sql(sql).df().iloc[:, 15:]

Unnamed: 0,disasterCloseoutDate,tribalRequest,fipsStateCode,fipsCountyCode,placeCode,designatedArea,declarationRequestNumber,lastIAFilingDate,incidentId,region,designatedIncidentTypes,lastRefresh,hash
0,NaT,False,32,31,99031,Washoe (County),24123,NaT,2024081201,9,R,2024-08-27 18:22:14.800,5d07e7c51bb300bfbec94a699a1e1ab1d61a97cd
1,NaT,False,41,67,99067,Washington (County),24122,NaT,2024081001,10,R,2024-08-27 18:22:14.800,ae87cf3c6ed795015b714af7166c7c295b2b67c7
2,NaT,False,41,31,99031,Jefferson (County),24116,NaT,2024080701,10,R,2024-08-27 18:22:14.800,432cf0995c47e3895cea696ede5621b810460501
3,NaT,False,41,17,99017,Deschutes (County),24111,NaT,2024080301,10,R,2024-08-27 18:22:14.800,2f21d90cb6bc64b0d4121aa3f18d852bbb4b11fa
4,NaT,False,8,59,99059,Jefferson (County),24106,NaT,2024080102,8,R,2024-08-27 18:22:14.800,e753ba692156f389dbe19f7a1c332d04ae145f74


In [11]:
# earliest declared disaster event
sql = """
    SELECT
        disasterNumber,
        state,
        femaDeclarationString,
        declarationType,
        declarationDate,
        incidentBeginDate,
        incidentType,
        declarationTitle
    FROM disaster_declarations
    ORDER BY declarationDate ASC
    LIMIT 1
"""

con.sql(sql).df()

Unnamed: 0,disasterNumber,state,femaDeclarationString,declarationType,declarationDate,incidentBeginDate,incidentType,declarationTitle
0,1,GA,DR-1-GA,DR,1953-05-02,1953-05-02,Tornado,TORNADO


In [12]:
# latest disaster declared event
sql = """
    SELECT
        disasterNumber,
        state,
        femaDeclarationString,
        declarationType,
        declarationDate,
        incidentBeginDate,
        incidentType,
        declarationTitle
    FROM disaster_declarations
    ORDER BY declarationDate DESC
    LIMIT 1
"""

con.sql(sql).df()

Unnamed: 0,disasterNumber,state,femaDeclarationString,declarationType,declarationDate,incidentBeginDate,incidentType,declarationTitle
0,3622,FL,EM-3622-FL,EM,2024-10-07,2024-10-05,Hurricane,HURRICANE MILTON


In [13]:
# latest incident begin date
sql = """
    SELECT
        lastRefresh,
        disasterNumber,
        state,
        femaDeclarationString,
        declarationType,
        declarationDate,
        incidentBeginDate,
        incidentType,
        declarationTitle
    FROM disaster_declarations
    ORDER BY incidentBeginDate DESC
    LIMIT 1
"""

con.sql(sql).df()

Unnamed: 0,lastRefresh,disasterNumber,state,femaDeclarationString,declarationType,declarationDate,incidentBeginDate,incidentType,declarationTitle
0,2024-10-07 14:01:33.043,3622,FL,EM-3622-FL,EM,2024-10-07,2024-10-05,Hurricane,HURRICANE MILTON


In [14]:
# event last refreshed
sql = """
    SELECT
        lastRefresh,
        disasterNumber,
        state,
        femaDeclarationString,
        declarationType,
        declarationDate,
        incidentBeginDate,
        incidentType,
        declarationTitle
    FROM disaster_declarations
    ORDER BY lastRefresh DESC
    LIMIT 1
"""

con.sql(sql).df()

Unnamed: 0,lastRefresh,disasterNumber,state,femaDeclarationString,declarationType,declarationDate,incidentBeginDate,incidentType,declarationTitle
0,2024-10-07 14:01:33.043,3622,FL,EM-3622-FL,EM,2024-10-07,2024-10-05,Hurricane,HURRICANE MILTON


In [15]:
# count per declaration
sql = """
    SELECT
       femaDeclarationString,
       count(id) AS count
    FROM
        disaster_declarations
    GROUP BY femaDeclarationString
    ORDER BY count DESC   
    LIMIT 20
"""

con.sql(sql)

┌───────────────────────┬───────┐
│ femaDeclarationString │ count │
│        varchar        │ int64 │
├───────────────────────┼───────┤
│ DR-4522-ME            │   443 │
│ DR-4485-TX            │   257 │
│ DR-1239-TX            │   257 │
│ EM-3458-TX            │   257 │
│ EM-3261-TX            │   255 │
│ DR-4586-TX            │   254 │
│ EM-3554-TX            │   254 │
│ DR-1624-TX            │   254 │
│ EM-3216-TX            │   254 │
│ DR-1606-TX            │   254 │
│ EM-3284-TX            │   229 │
│ EM-3142-TX            │   227 │
│ DR-4527-SD            │   207 │
│ EM-3444-ME            │   179 │
│ EM-3464-GA            │   160 │
│ DR-4501-GA            │   160 │
│ EM-3616-GA            │   159 │
│ DR-4338-GA            │   159 │
│ EM-3387-GA            │   159 │
│ EM-3218-GA            │   159 │
├───────────────────────┴───────┤
│ 20 rows             2 columns │
└───────────────────────────────┘

One declaration has many records. Examine further.

In [16]:
# examine count unique for femaDeclarationString = 'DR-4522-ME'
(con
 .sql("""
    SELECT *
    FROM disaster_declarations
    WHERE femaDeclarationString = 'DR-4522-ME'
""")
 .df()
 .describe(include='object')
 .loc['unique']
)

id                          443
state                         1
femaDeclarationString         1
declarationType               1
incidentType                  1
declarationTitle              1
fipsStateCode                 1
fipsCountyCode               17
placeCode                   443
designatedArea              438
declarationRequestNumber      1
incidentId                    1
designatedIncidentTypes       0
hash                        443
Name: unique, dtype: object

The duplicate records for the same declaration suggest this dataset is grouped by **declaration** and **places** (i.e. placeCode) that received the declaration. Let's examine only unique declarations.

**Table xx.** Count of Disasters Declared

In [17]:
sql = """
    SELECT
        COUNT(DISTINCT femaDeclarationString) AS disasters_declared,
        strftime(MIN(declarationDate), '%Y-%m-%d') AS date_min,
        strftime(MAX(lastRefresh), '%Y-%m-%d') AS date_max
    FROM disaster_declarations  
"""

# push to pandas df
df = con.sql(sql).df()

# capture values
count = df['disasters_declared'].values[0]
date_min = df['date_min'].values[0]
date_max = df['date_max'].values[0]

print(f"{count:,} disasters declared between {date_min} and {date_max}.")

4,985 disasters declared between 1953-05-02 and 2024-10-07.


**Table xx.** Number of Disasters Declared by Declaration Type

In [18]:
sql = """
    WITH declarations AS (
    
        SELECT
            DISTINCT ON (femaDeclarationString) femaDeclarationString,
            declarationType,
            id           
        FROM
            disaster_declarations
    )
    
    SELECT
       declarationType,
       count(id) AS count
    FROM
        declarations
    GROUP BY declarationType
    ORDER BY count DESC   
"""

con.sql(sql)

┌─────────────────┬───────┐
│ declarationType │ count │
│     varchar     │ int64 │
├─────────────────┼───────┤
│ DR              │  2826 │
│ FM              │  1537 │
│ EM              │   622 │
└─────────────────┴───────┘

Declaraton Type: Two character code that defines if the disaster is a Major Disaster Declaration (DR), Emergency Declaration (EM), or Fire Management Assistance Declaration (FM).

# Analysis

**Table xx.** Number of Disaster Declarations by Incident Type

In [19]:
sql = """
    WITH declarations AS (
    
        SELECT
            DISTINCT ON (femaDeclarationString) femaDeclarationString,
            incidentType,
            id           
        FROM
            disaster_declarations
    )
    
    SELECT
       incidentType,
       count(id) AS count
    FROM
        declarations
    GROUP BY incidentType
    ORDER BY count DESC  
"""

con.sql(sql).df()

Unnamed: 0,incidentType,count
0,Fire,1636
1,Severe Storm,1093
2,Flood,898
3,Hurricane,450
4,Tornado,182
5,Snowstorm,171
6,Biological,167
7,Severe Ice Storm,74
8,Typhoon,58
9,Drought,46


**Table xx.** Top 20 Years with the Highest Number of Disaster Declarations

In [20]:
sql = """
    WITH declarations AS (
    
        SELECT
            DISTINCT ON (femaDeclarationString) femaDeclarationString,
            fyDeclared,
            id           
        FROM
            disaster_declarations
    )
    
    SELECT
        fyDeclared AS year,
        count(id) AS count
    FROM
        declarations
    GROUP BY fyDeclared
    ORDER BY count DESC
    LIMIT 20
"""

con.sql(sql).df()

Unnamed: 0,year,count
0,2020,315
1,2011,236
2,2024,160
3,2006,151
4,1996,149
5,2008,141
6,2005,139
7,2017,135
8,2007,133
9,2018,132


**Table xx.** Top 20 States with the Highest Number of Disaster Declarations

In [21]:
sql = """
    WITH declarations AS (
    
        SELECT
            DISTINCT ON (femaDeclarationString) femaDeclarationString,
            state,
            id           
        FROM
            disaster_declarations
    )
    
    SELECT
       state,
       count(id) AS count
    FROM
        declarations
    GROUP BY state
    ORDER BY count DESC   
    LIMIT 20
"""

con.sql(sql).df()

Unnamed: 0,state,count
0,CA,384
1,TX,376
2,OK,230
3,WA,208
4,FL,183
5,OR,158
6,NM,118
7,AZ,116
8,NY,116
9,LA,108


**Table xx.** Top 20 States and Incident Types with the Highest Number of Disaster Declarations

In [22]:
sql = """
    WITH declarations AS (
    
        SELECT
            DISTINCT ON (femaDeclarationString) femaDeclarationString,
            state,
            incidentType,
            id           
        FROM
            disaster_declarations
    )
    
    SELECT
       state,
       incidentType,
       count(id) AS count
    FROM
        declarations
    GROUP BY state, incidentType
    ORDER BY count DESC   
    LIMIT 20
"""

con.sql(sql).df()

Unnamed: 0,state,incidentType,count
0,CA,Fire,287
1,TX,Fire,260
2,WA,Fire,142
3,OR,Fire,115
4,OK,Fire,111
5,NV,Fire,80
6,AZ,Fire,80
7,CO,Fire,79
8,NM,Fire,71
9,FL,Fire,67


**Table xx.** Top 20 States, Places, Designated Areas, and Incident Types with the Highest Number of Disaster Declarations

In [23]:
sql = """
    SELECT
        state,
        NULLIF(placeCode, 0) AS placeCode,
        designatedArea,
        incidentType,
        count(id) AS count
    FROM
        disaster_declarations
    GROUP BY ALL
    ORDER BY count DESC  
    LIMIT 20
"""

con.sql(sql).df()

Unnamed: 0,state,placeCode,designatedArea,incidentType,count
0,TX,,Statewide,Fire,92
1,CA,99037.0,Los Angeles (County),Fire,55
2,CA,99065.0,Riverside (County),Fire,37
3,NV,99031.0,Washoe (County),Fire,34
4,CA,99071.0,San Bernardino (County),Fire,33
5,LA,99075.0,Plaquemines (Parish),Hurricane,32
6,WA,99007.0,Chelan (County),Fire,32
7,LA,99089.0,St. Charles (Parish),Hurricane,31
8,LA,99057.0,Lafourche (Parish),Hurricane,31
9,LA,99109.0,Terrebonne (Parish),Hurricane,31


**Table xx.** Number of Disaster Declarations requested by a Tribal Nation

In [24]:
sql = """
    WITH declarations AS (
    
        SELECT
            DISTINCT ON (femaDeclarationString) femaDeclarationString,
            tribalRequest,
            id           
        FROM
            disaster_declarations
    )
    
    SELECT
       tribalRequest,
       count(id) AS count
    FROM
        declarations
    GROUP BY tribalRequest
    ORDER BY count DESC  
"""

con.sql(sql)

┌───────────────┬───────┐
│ tribalRequest │ count │
│    boolean    │ int64 │
├───────────────┼───────┤
│ false         │  4878 │
│ true          │   107 │
└───────────────┴───────┘

**Table xx.** Top 20 States and Incident Types with the Highest Number of Disaster Declarations requested by a Tribal Nation

In [25]:
sql = """
    WITH declarations AS (
    
        SELECT
            DISTINCT ON (femaDeclarationString) femaDeclarationString,
            state,
            incidentType,
            id           
        FROM
            disaster_declarations
        WHERE
            tribalRequest = true
    )
    
    SELECT
       state,
       incidentType,
       count(id) AS count
    FROM
        declarations
    GROUP BY state, incidentType
    ORDER BY count DESC  
    LIMIT 20
"""

con.sql(sql).df()

Unnamed: 0,state,incidentType,count
0,OK,Biological,15
1,NM,Biological,11
2,CA,Severe Storm,9
3,AZ,Severe Storm,4
4,FL,Hurricane,4
5,OK,Severe Storm,4
6,NE,Biological,4
7,SD,Biological,3
8,KS,Biological,3
9,NM,Flood,2


# Export Data
Export data to reproduce.

In [26]:
# export to parquet
con.sql("COPY disaster_declarations TO 'data/disaster-declarations.parquet' (FORMAT PARQUET);")

In [27]:
# sanity check
%ls data/

disaster-declarations.parquet  public-assistance.parquet


In [28]:
# sanity check on exported parquet file
con.sql("SELECT * FROM read_parquet('data/disaster-declarations.parquet') LIMIT 10;")

┌──────────────────────┬────────────────┬─────────┬───┬──────────────────────┬──────────────────────┐
│          id          │ disasterNumber │  state  │ … │     lastRefresh      │         hash         │
│       varchar        │     int16      │ varchar │   │      timestamp       │       varchar        │
├──────────────────────┼────────────────┼─────────┼───┼──────────────────────┼──────────────────────┤
│ f15a7a79-f1c3-41bb…  │           5530 │ NV      │ … │ 2024-08-27 18:22:1…  │ 5d07e7c51bb300bfbe…  │
│ 09e3f81a-5e16-4b72…  │           5529 │ OR      │ … │ 2024-08-27 18:22:1…  │ ae87cf3c6ed795015b…  │
│ 59983f89-30bf-4888…  │           5528 │ OR      │ … │ 2024-08-27 18:22:1…  │ 432cf0995c47e3895c…  │
│ 8d13ecf0-bc2f-496b…  │           5527 │ OR      │ … │ 2024-08-27 18:22:1…  │ 2f21d90cb6bc64b0d4…  │
│ 17c24d4a-49a9-4cac…  │           5526 │ CO      │ … │ 2024-08-27 18:22:1…  │ e753ba692156f389db…  │
│ f1140a27-cb85-404c…  │           5525 │ CO      │ … │ 2024-08-27 18:22:1…  │ b1f