# Downloading the National Flood Insurance Program (NFIP) Claims and Policies Datasets
Author: Mark Bauer

In [1]:
# import libraries
import requests
from datetime import datetime
import duckdb

In [2]:
# reproducibility
%reload_ext watermark
%watermark -v -p requests,duckdb

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.6.0

requests: 2.28.1
duckdb  : 1.0.0



In [3]:
# data retrieved, not to be confused with data of data
current_date = datetime.now()
print(f"The data was retrieved on {current_date.strftime('%Y-%m-%d')}.")

The data was retrieved on 2025-05-18.


# NFIP Claims Dataset
Dataset page: https://www.fema.gov/openfema-data-page/fima-nfip-redacted-claims-v2

In [4]:
%%time

# URL of the Parquet file to download
url = "https://www.fema.gov/about/reports-and-data/openfema/FimaNfipClaims.parquet"

# path of saved file
file_path = "data/FimaNfipClaims.parquet"

# send an HTTP GET request to the URL
response = requests.get(url)

# check if the request was successful (status code 200)
if response.status_code == 200:
    
    # write the content of the response to a file
    with open(file_path, 'wb') as file:
        file.write(response.content)
    print("File downloaded successfully.")
    
else:
    print("Failed to download file.")

File downloaded successfully.
CPU times: user 1.39 s, sys: 960 ms, total: 2.35 s
Wall time: 30.9 s


In [5]:
# confirm file in data/ folder
%ls data/

FimaNfipClaims.parquet       nfip.db
FimaNfipPolicies.parquet     policies-nyc-year.parquet
claims-nyc-year.parquet      policies-state-year.parquet
claims-state-year.parquet


In [6]:
# preview size of file
!du -sh data/* | sort -rh

5.8G	data/nfip.db
3.2G	data/FimaNfipPolicies.parquet
194M	data/FimaNfipClaims.parquet
8.0K	data/policies-state-year.parquet
4.0K	data/policies-nyc-year.parquet
4.0K	data/claims-state-year.parquet
4.0K	data/claims-nyc-year.parquet


# Sanity check

In [7]:
# sanity check on file, count rows
duckdb.sql("""
    SELECT COUNT(*) AS count_rows
    FROM read_parquet('data/FimaNfipClaims.parquet')
""")

┌────────────┐
│ count_rows │
│   int64    │
├────────────┤
│    2712269 │
└────────────┘

In [8]:
# preview file, limit 10 rows
duckdb.sql("""
    SELECT *
    FROM read_parquet('data/FimaNfipClaims.parquet')
    LIMIT 10
""")

┌──────────────────────┬──────────────────────┬───┬──────────────┬──────────────┬──────────────────────┐
│ agricultureStructu…  │       asOfDate       │ … │   latitude   │  longitude   │          id          │
│       boolean        │ timestamp with tim…  │   │ decimal(9,1) │ decimal(9,1) │         uuid         │
├──────────────────────┼──────────────────────┼───┼──────────────┼──────────────┼──────────────────────┤
│ false                │ NULL                 │ … │         39.2 │        -74.6 │ a4edd1e3-a2cc-4ea7…  │
│ false                │ NULL                 │ … │         29.9 │        -95.3 │ 5fa56e50-7923-44f3…  │
│ false                │ NULL                 │ … │         40.0 │        -74.1 │ ee43a296-bc2b-4b49…  │
│ false                │ NULL                 │ … │         29.9 │        -95.4 │ 2d96f6b6-d33b-4eda…  │
│ false                │ NULL                 │ … │         26.4 │        -81.9 │ 37577287-ba9f-4cea…  │
│ false                │ NULL                 │ … │    

In [9]:
# preview columns
describe_rel = duckdb.sql("""
    DESCRIBE
    SELECT *
    FROM read_parquet('data/FimaNfipClaims.parquet')
""")

# select columns
duckdb.sql("""
    SELECT column_name, column_type
    FROM describe_rel
""").show(max_rows=80)

┌────────────────────────────────────────────┬──────────────────────────┐
│                column_name                 │       column_type        │
│                  varchar                   │         varchar          │
├────────────────────────────────────────────┼──────────────────────────┤
│ agricultureStructureIndicator              │ BOOLEAN                  │
│ asOfDate                                   │ TIMESTAMP WITH TIME ZONE │
│ basementEnclosureCrawlspaceType            │ SMALLINT                 │
│ policyCount                                │ SMALLINT                 │
│ crsClassificationCode                      │ SMALLINT                 │
│ dateOfLoss                                 │ DATE                     │
│ elevatedBuildingIndicator                  │ BOOLEAN                  │
│ elevationCertificateIndicator              │ VARCHAR                  │
│ elevationDifference                        │ DECIMAL(6,1)             │
│ baseFloodElevation                  

In [10]:
# summarize null statistics
summarize_rel = duckdb.sql("""
    SUMMARIZE
    SELECT *
    FROM read_parquet('data/FimaNfipClaims.parquet')
""")

# order by percentage
duckdb.sql("""
    SELECT
        column_name,
        null_percentage
    FROM summarize_rel
    ORDER BY null_percentage DESC
""").show(max_rows=80)

┌────────────────────────────────────────────┬─────────────────┐
│                column_name                 │ null_percentage │
│                  varchar                   │  decimal(9,2)   │
├────────────────────────────────────────────┼─────────────────┤
│ asOfDate                                   │           99.38 │
│ floodCharacteristicsIndicator              │           98.53 │
│ crsClassificationCode                      │           95.09 │
│ eventDesignationNumber                     │           93.23 │
│ lowestAdjacentGrade                        │           81.09 │
│ elevationCertificateIndicator              │           77.93 │
│ nonPaymentReasonBuilding                   │           77.93 │
│ lowestFloorElevation                       │           76.37 │
│ baseFloodElevation                         │           75.68 │
│ elevationDifference                        │           72.96 │
│ floodZoneCurrent                           │           71.68 │
│ nfipCommunityNumberCurr

# NFIP Policies Dataset
Dataset page: https://www.fema.gov/openfema-data-page/fima-nfip-redacted-policies-v2

In [11]:
%%time

# URL of the Parquet file to download
url = "https://www.fema.gov/about/reports-and-data/openfema/FimaNfipPolicies.parquet"

# path of saved file
file_path = "data/FimaNfipPolicies.parquet"

# send an HTTP GET request to the URL
response = requests.get(url)

# check if the request was successful (status code 200)
if response.status_code == 200:
    
    # write the content of the response to a file
    with open(file_path, 'wb') as file:
        file.write(response.content)
    print("File downloaded successfully.")
    
else:
    print("Failed to download file.")

File downloaded successfully.
CPU times: user 21.4 s, sys: 17.4 s, total: 38.8 s
Wall time: 4min 44s


In [12]:
# confirm file in data/ folder
%ls data/

FimaNfipClaims.parquet       nfip.db
FimaNfipPolicies.parquet     policies-nyc-year.parquet
claims-nyc-year.parquet      policies-state-year.parquet
claims-state-year.parquet


In [13]:
# preview size of file
!du -sh data/* | sort -rh

5.8G	data/nfip.db
3.2G	data/FimaNfipPolicies.parquet
194M	data/FimaNfipClaims.parquet
8.0K	data/policies-state-year.parquet
4.0K	data/policies-nyc-year.parquet
4.0K	data/claims-state-year.parquet
4.0K	data/claims-nyc-year.parquet


# Sanity check

In [14]:
# sanity check on file, count rows
duckdb.sql("""
    SELECT COUNT(*) AS count_rows
    FROM read_parquet('data/FimaNfipPolicies.parquet')
""")

┌────────────┐
│ count_rows │
│   int64    │
├────────────┤
│   69489458 │
└────────────┘

In [15]:
# preview file, limit 10 rows
duckdb.sql("""
    SELECT *
    FROM read_parquet('data/FimaNfipPolicies.parquet')
    LIMIT 10
""")

┌──────────────────────┬────────────────────┬───┬──────────────┬──────────────┬──────────────────────┐
│ agricultureStructu…  │ baseFloodElevation │ … │   latitude   │  longitude   │          id          │
│       boolean        │    decimal(8,2)    │   │ decimal(9,1) │ decimal(9,1) │         uuid         │
├──────────────────────┼────────────────────┼───┼──────────────┼──────────────┼──────────────────────┤
│ false                │               NULL │ … │         32.2 │       -110.9 │ c3c498e0-39ee-4642…  │
│ false                │               NULL │ … │         37.6 │       -121.0 │ 6daee4b7-308b-453c…  │
│ false                │               NULL │ … │         35.4 │       -118.9 │ d4191676-0f6d-47bf…  │
│ false                │               NULL │ … │         39.0 │       -121.4 │ 9dac717a-9a1f-4323…  │
│ false                │               NULL │ … │         34.4 │       -119.6 │ e11197ee-65ef-4630…  │
│ false                │               NULL │ … │         36.6 │       -1

In [16]:
# preview columns
describe_rel = duckdb.sql("""
    DESCRIBE
    SELECT *
    FROM read_parquet('data/FimaNfipPolicies.parquet')
""")

# select columns
duckdb.sql("""
    SELECT column_name, column_type
    FROM describe_rel
""").show(max_rows=80)

┌────────────────────────────────────────┬──────────────┐
│              column_name               │ column_type  │
│                varchar                 │   varchar    │
├────────────────────────────────────────┼──────────────┤
│ agricultureStructureIndicator          │ BOOLEAN      │
│ baseFloodElevation                     │ DECIMAL(8,2) │
│ basementEnclosureCrawlspaceType        │ SMALLINT     │
│ cancellationDateOfFloodPolicy          │ DATE         │
│ condominiumCoverageTypeCode            │ VARCHAR      │
│ construction                           │ BOOLEAN      │
│ crsClassCode                           │ SMALLINT     │
│ buildingDeductibleCode                 │ VARCHAR      │
│ contentsDeductibleCode                 │ VARCHAR      │
│ elevatedBuildingIndicator              │ BOOLEAN      │
│ elevationCertificateIndicator          │ VARCHAR      │
│ elevationDifference                    │ INTEGER      │
│ federalPolicyFee                       │ SMALLINT     │
│ ratedFloodZo

In [17]:
# summarize null statistics
summarize_rel = duckdb.sql("""
    SUMMARIZE
    SELECT *
    FROM read_parquet('data/FimaNfipPolicies.parquet')
""")

# order by percentage
duckdb.sql("""
    SELECT
        column_name,
        null_percentage
    FROM summarize_rel
    ORDER BY null_percentage DESC
""").show(max_rows=80)

┌────────────────────────────────────────┬─────────────────┐
│              column_name               │ null_percentage │
│                varchar                 │  decimal(9,2)   │
├────────────────────────────────────────┼─────────────────┤
│ cancellationDateOfFloodPolicy          │           95.70 │
│ cancellationVoidanceReasonCode         │           95.70 │
│ obstructionType                        │           81.43 │
│ elevationCertificateIndicator          │           77.96 │
│ enclosureTypeCode                      │           74.92 │
│ insuranceToValueCode                   │           69.99 │
│ lowestAdjacentGrade                    │           68.84 │
│ lowestFloorElevation                   │           67.10 │
│ baseFloodElevation                     │           66.54 │
│ elevationDifference                    │           66.12 │
│ propertyPurchaseDate                   │           59.77 │
│ basementEnclosureCrawlspaceType        │           59.02 │
│ subsidizedRateType    