# Download National Flood Insurance Program (NFIP) Policies Data

Exploring the NYC Building Elevation and Subgrade (BES) Data Set in Python  

Author: Mark Bauer

In [1]:
import os
import glob
import pandas as pd
import duckdb
import requests
import json
import time

In [2]:
# Printing verions of Python modules and packages with **watermark** - the IPython magic extension.
%reload_ext watermark
%watermark -v -p duckdb

Python implementation: CPython
Python version       : 3.8.13
IPython version      : 8.4.0

duckdb: 0.10.0



# Data Set: The National Flood Insurance Program (NFIP)
The National Flood Insurance Program (NFIP) Policies Data for NYC.

Retrive data using the FEMA API.

In [3]:
# set constant parameters
BASE_URL = 'https://www.fema.gov/api/open/v2/'
FORMAT_PARAM = '$format=json'
METADATA_PARAM = '&$metadata=off'
FILTER_PARAM = '&$filter=countyCode%20eq%20%27{}%27'
SKIP_PARAM = '&$skip={}'
TOP_PARAM = '&$top=10000'

def get_api_url(dataset, county_fips, skip):
    """Generate the API URL for the given dataset, county FIPS, and skip value."""
    url_base = f'{BASE_URL}FimaNfip{dataset.capitalize()}?'
    return f'{url_base}{FORMAT_PARAM}{METADATA_PARAM}{FILTER_PARAM}{SKIP_PARAM}{TOP_PARAM}'.format(county_fips, skip)

def make_request(url):
    """Make an HTTP request and handle potential errors."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        print(f"Error making request: {e}")
        return None

def download_data(dataset, county_fips):
    """
    Download FEMA NFIP policies or claims data for a specific county and save it to a JSON file.

    Parameters:
    - county_fips (str): County FIPS code.
    - dataset (str): Either 'policies' or 'claims'.

    Returns:
    - None
    """
    if dataset not in {'policies', 'claims'}:
        raise ValueError("Invalid dataset. Pass either 'policies' or 'claims' to the dataset argument")

    if not isinstance(county_fips, str):
        raise ValueError("County FIPS code must be passed as a string")

    result_list = []
    skip = 0

    # we'll add print statements to see what's happening
    print(f"Dataset: {dataset}")
    print(f'County FIPS: {county_fips}\n------------')

    while True:
        print(f'Skip number: {skip:,}')

        # make HTTP request and handle JSON response
        url = get_api_url(dataset, county_fips, skip)
        data = make_request(url)

        dataset_name = f"FimaNfip{dataset.capitalize()}"

        # check if the response is empty or contains an error
        if not data or dataset_name not in data:
            break

        # process JSON data and extend the result list
        result_list.extend(data[dataset_name])

        rows = len(data[dataset_name])
        print(f'Number of rows: {rows:,}')
        if rows < 10000:
            break

        print(f'Result list length: {len(result_list):,}')
        skip += 10000
        time.sleep(5)

    print(f'\nLength of the full result list: {len(result_list):,}\n')

    # write the result list to a JSON file
    output_filename = f"data/{dataset}-{county_fips}.json"
    with open(output_filename, 'w') as json_file:
        json.dump(result_list, json_file, indent=2)

Retrieve by counties in NYC.

In [4]:
download_data('policies', '36005')
download_data('policies', '36047')
download_data('policies', '36061')
download_data('policies', '36081')
download_data('policies', '36085')

Dataset: policies
County FIPS: 36005
------------
Skip number: 0
Number of rows: 10,000
Result list length: 10,000
Skip number: 10,000
Number of rows: 10,000
Result list length: 20,000
Skip number: 20,000
Number of rows: 10,000
Result list length: 30,000
Skip number: 30,000
Number of rows: 3,526

Length of the full result list: 33,526

Dataset: policies
County FIPS: 36047
------------
Skip number: 0
Number of rows: 10,000
Result list length: 10,000
Skip number: 10,000
Number of rows: 10,000
Result list length: 20,000
Skip number: 20,000
Number of rows: 10,000
Result list length: 30,000
Skip number: 30,000
Number of rows: 10,000
Result list length: 40,000
Skip number: 40,000
Number of rows: 10,000
Result list length: 50,000
Skip number: 50,000
Number of rows: 10,000
Result list length: 60,000
Skip number: 60,000
Number of rows: 10,000
Result list length: 70,000
Skip number: 70,000
Number of rows: 10,000
Result list length: 80,000
Skip number: 80,000
Number of rows: 10,000
Result list le

In [5]:
ls data/

bes-data.parquet     policies-36047.json  policies-36081.json
policies-36005.json  policies-36061.json  policies-36085.json


In [6]:
# create a DuckDB database instance
con = duckdb.connect()

# create table of the bes data
con.sql(
    """
    CREATE TABLE nfip_data AS
    FROM read_json('data/*.json')
    """
)

# examine count of rows
con.sql("SELECT COUNT(*) AS count_rows FROM nfip_data").show()

┌────────────┐
│ count_rows │
│   int64    │
├────────────┤
│     522387 │
└────────────┘



In [12]:
con.sql("COPY nfip_data TO 'data/nfip-data.parquet' (FORMAT PARQUET)")

In [9]:
!du -h data/*

 96M	data/bes-data.parquet
 36M	data/nfip-data.parquet
 97M	data/policies-36005.json
401M	data/policies-36047.json
112M	data/policies-36061.json
593M	data/policies-36081.json
289M	data/policies-36085.json


In [10]:
!rm data/*.json

In [11]:
ls data/

bes-data.parquet   nfip-data.parquet
