# Download NFIP Claims File
Author: Mark Bauer

In [13]:
# import libraries
import requests
from datetime import datetime
import duckdb

In [14]:
# reproducibility
%reload_ext watermark
%watermark -v -p requests,duckdb

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.6.0

requests: 2.28.1
duckdb  : 1.0.0



In [15]:
# data retrieved
current_date = datetime.now()
print(f"The data was retrieved on {current_date.strftime('%Y-%m-%d')}.")

The data was retrieved on 2025-05-04.


In [16]:
# URL of the Parquet file to download
url = "https://www.fema.gov/about/reports-and-data/openfema/FimaNfipClaims.parquet"

# Send an HTTP GET request to the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    
    # Specify the file path where you want to save the downloaded file
    file_path = "data/FimaNfipClaims.parquet"
    
    # Write the content of the response to a file
    with open(file_path, 'wb') as file:
        file.write(response.content)
    print("File downloaded successfully.")
    
else:
    print("Failed to download file.")

File downloaded successfully.


In [17]:
# confirm file in data/ folder
%ls data/

FimaNfipClaims.parquet


In [18]:
# preview size of file
!du -sh data/* | sort -rh

151M	data/FimaNfipClaims.parquet


# Sanity check

In [51]:
# sanity check on file
duckdb.sql("""
    SELECT COUNT(*) AS count_rows
    FROM read_parquet('data/FimaNfipClaims.parquet')
""")

┌────────────┐
│ count_rows │
│   int64    │
├────────────┤
│    2709121 │
└────────────┘

In [52]:
# preview file
duckdb.sql("""
    SELECT *
    FROM read_parquet('data/FimaNfipClaims.parquet')
    LIMIT 10
""")

┌──────────────────────┬──────────────────────┬───┬──────────────┬──────────────┬──────────────────────┐
│ agricultureStructu…  │       asOfDate       │ … │   latitude   │  longitude   │          id          │
│       boolean        │ timestamp with tim…  │   │ decimal(9,1) │ decimal(9,1) │         uuid         │
├──────────────────────┼──────────────────────┼───┼──────────────┼──────────────┼──────────────────────┤
│ false                │ 2020-12-11 11:25:4…  │ … │         30.3 │        -87.7 │ ac2b2b72-4b71-4415…  │
│ false                │ 2020-11-13 09:50:3…  │ … │         30.3 │        -87.7 │ dd95560b-afd1-46cb…  │
│ false                │ 2022-04-21 15:56:4…  │ … │         32.5 │        -86.4 │ e434f7e3-eeff-42e4…  │
│ false                │ 2020-03-09 15:28:2…  │ … │         32.5 │        -86.4 │ 5ddd1b02-a847-4b20…  │
│ false                │ 2020-01-22 11:55:5…  │ … │         32.5 │        -86.5 │ a6a39c3c-8ee5-47e9…  │
│ false                │ 2020-01-22 11:55:5…  │ … │    

In [54]:
# preview columns
describe_rel = duckdb.sql("""
    DESCRIBE
    SELECT *
    FROM read_parquet('data/FimaNfipClaims.parquet')
""")

duckdb.sql("""
    SELECT column_name, column_type
    FROM describe_rel
""").show(max_rows=80)

┌────────────────────────────────────────────┬──────────────────────────┐
│                column_name                 │       column_type        │
│                  varchar                   │         varchar          │
├────────────────────────────────────────────┼──────────────────────────┤
│ agricultureStructureIndicator              │ BOOLEAN                  │
│ asOfDate                                   │ TIMESTAMP WITH TIME ZONE │
│ basementEnclosureCrawlspaceType            │ SMALLINT                 │
│ policyCount                                │ SMALLINT                 │
│ crsClassificationCode                      │ SMALLINT                 │
│ dateOfLoss                                 │ DATE                     │
│ elevatedBuildingIndicator                  │ BOOLEAN                  │
│ elevationCertificateIndicator              │ VARCHAR                  │
│ elevationDifference                        │ DECIMAL(6,1)             │
│ baseFloodElevation                  

In [56]:
# summarize null statistics
summarize_rel = duckdb.sql("""
    SUMMARIZE
    SELECT *
    FROM read_parquet('data/FimaNfipClaims.parquet')
""")

# order by percentage
duckdb.sql("""
    SELECT
        column_name,
        null_percentage
    FROM
        summarize_rel
    ORDER BY
        null_percentage DESC
""").show(max_rows=80)

┌────────────────────────────────────────────┬─────────────────┐
│                column_name                 │ null_percentage │
│                  varchar                   │  decimal(9,2)   │
├────────────────────────────────────────────┼─────────────────┤
│ floodCharacteristicsIndicator              │           98.53 │
│ eventDesignationNumber                     │           93.23 │
│ lowestAdjacentGrade                        │           81.10 │
│ crsClassificationCode                      │           79.69 │
│ nonPaymentReasonBuilding                   │           77.93 │
│ elevationCertificateIndicator              │           77.92 │
│ lowestFloorElevation                       │           76.36 │
│ baseFloodElevation                         │           75.67 │
│ elevationDifference                        │           72.95 │
│ floodZoneCurrent                           │           71.72 │
│ nfipCommunityNumberCurrent                 │           71.68 │
│ basementEnclosureCrawls