# Downloading FEMA NFIP Data for New York City

In [1]:
# importing libraries
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import urllib
import urllib.request
import requests
import matplotlib.pyplot as plt
import json
import time
import seaborn as sns

plt.rcParams['savefig.facecolor'] = 'white'
%matplotlib inline

In [2]:
print('printing packages and versions:\n')

%reload_ext watermark
%watermark -v -p numpy,pandas,geopandas,matplotlib

printing packages and versions:

Python implementation: CPython
Python version       : 3.8.13
IPython version      : 8.4.0

numpy     : 1.23.1
pandas    : 1.4.3
geopandas : 0.11.1
matplotlib: 3.5.2



# County FIPS Codes for New York City

- The Bronx is Bronx County - FIPS 36005  
- Brooklyn is Kings County - FIPS 36047  
- Manhattan is New York County - FIPS 36061  
- Queens is Queens County - FIPS 36081  
- Staten Island is Richmond County - FIPS 36085

# Download Data by County FIPS

In [3]:
def county_download(county_fips, dataset=None):
    """
    Download FIMA NFIP policies or claims data for a specific county and save it to a JSON file.

    Parameters:
    - county_fips (str): County FIPS code.
    - data (str): Either policies or claims.

    Returns:
    - None
    """
    
    if dataset == 'policies':
        url_base = 'https://www.fema.gov/api/open/v2/FimaNfipPolicies?'
    elif dataset == 'claims':
        url_base = 'https://www.fema.gov/api/open/v2/FimaNfipClaims?'
    else:
        raise Exception("pass either 'policies' or 'claims' to dataset argument")
        
    format_param = '$format=json'
    metadata_param = '&$metadata=off'
    filter_param = '&$filter=countyCode%20eq%20%27{}%27'
    skip_param = '&$skip={}'
    top_param = '&$top=10000'

    url = url_base + format_param + metadata_param + filter_param + skip_param + top_param
    result_list = []
    skip = 0

    print(f"dataset: {dataset}")
    print('county fips: {}\n------------'.format(county_fips))

    while True:
        print('skip number: {:,}'.format(skip))
        
        # Make HTTP request and handle JSON response
        response = requests.get(url.format(county_fips, skip))
        data = response.json()
        dataset_name = f"FimaNfip{dataset.title()}"

        # Check if the response is empty or contains an error
        if not data or dataset_name not in data:
            break

        # Process JSON data and extend the result list
        result_list.extend(data[dataset_name])

        rows = len(data[dataset_name])
        print('number of rows: {:,}'.format(rows))
        if rows < 10000:
            break

        print('result list length: {:,}'.format(len(result_list)))
        skip += 10000
        time.sleep(5)

    print('\nlength of full result list: {:,}\n'.format(len(result_list)))

    # Write the result list to a JSON file
    output_filename = f"data/{dataset}-{county_fips}.json"
    with open(output_filename, 'w') as json_file:
        json.dump(result_list, json_file, indent=2)

In [4]:
%%time

county_download('36005', dataset='policies')

dataset: policies
county fips: 36005
------------
skip number: 0
number of rows: 10,000
result list length: 10,000
skip number: 10,000
number of rows: 10,000
result list length: 20,000
skip number: 20,000
number of rows: 10,000
result list length: 30,000
skip number: 30,000
number of rows: 2,487

length of full result list: 32,487

CPU times: user 4.89 s, sys: 596 ms, total: 5.49 s
Wall time: 36.1 s


In [5]:
%%time

county_download('36005', dataset='claims')

dataset: claims
county fips: 36005
------------
skip number: 0
number of rows: 1,922

length of full result list: 1,922

CPU times: user 259 ms, sys: 24.8 ms, total: 283 ms
Wall time: 2.17 s


In [6]:
%%time

county_download('36047', dataset='policies')

dataset: policies
county fips: 36047
------------
skip number: 0
number of rows: 10,000
result list length: 10,000
skip number: 10,000
number of rows: 10,000
result list length: 20,000
skip number: 20,000
number of rows: 10,000
result list length: 30,000
skip number: 30,000
number of rows: 10,000
result list length: 40,000
skip number: 40,000
number of rows: 10,000
result list length: 50,000
skip number: 50,000
number of rows: 10,000
result list length: 60,000
skip number: 60,000
number of rows: 10,000
result list length: 70,000
skip number: 70,000
number of rows: 10,000
result list length: 80,000
skip number: 80,000
number of rows: 10,000
result list length: 90,000
skip number: 90,000
number of rows: 10,000
result list length: 100,000
skip number: 100,000
number of rows: 10,000
result list length: 110,000
skip number: 110,000
number of rows: 10,000
result list length: 120,000
skip number: 120,000
number of rows: 10,000
result list length: 130,000
skip number: 130,000
number of rows: 9

In [7]:
%%time

county_download('36047', dataset='claims')

dataset: claims
county fips: 36047
------------
skip number: 0
number of rows: 6,239

length of full result list: 6,239

CPU times: user 845 ms, sys: 93.2 ms, total: 938 ms
Wall time: 4.56 s


In [8]:
%%time

county_download('36061', dataset='policies')

dataset: policies
county fips: 36061
------------
skip number: 0
number of rows: 10,000
result list length: 10,000
skip number: 10,000
number of rows: 10,000
result list length: 20,000
skip number: 20,000
number of rows: 10,000
result list length: 30,000
skip number: 30,000
number of rows: 4,061

length of full result list: 34,061

CPU times: user 4.97 s, sys: 588 ms, total: 5.56 s
Wall time: 35.6 s


In [9]:
%%time

county_download('36061', dataset='claims')

dataset: claims
county fips: 36061
------------
skip number: 0
number of rows: 1,475

length of full result list: 1,475

CPU times: user 210 ms, sys: 21.9 ms, total: 232 ms
Wall time: 3.07 s


In [10]:
%%time

county_download('36081', dataset='policies')

dataset: policies
county fips: 36081
------------
skip number: 0
number of rows: 10,000
result list length: 10,000
skip number: 10,000
number of rows: 10,000
result list length: 20,000
skip number: 20,000
number of rows: 10,000
result list length: 30,000
skip number: 30,000
number of rows: 10,000
result list length: 40,000
skip number: 40,000
number of rows: 10,000
result list length: 50,000
skip number: 50,000
number of rows: 10,000
result list length: 60,000
skip number: 60,000
number of rows: 10,000
result list length: 70,000
skip number: 70,000
number of rows: 10,000
result list length: 80,000
skip number: 80,000
number of rows: 10,000
result list length: 90,000
skip number: 90,000
number of rows: 10,000
result list length: 100,000
skip number: 100,000
number of rows: 10,000
result list length: 110,000
skip number: 110,000
number of rows: 10,000
result list length: 120,000
skip number: 120,000
number of rows: 10,000
result list length: 130,000
skip number: 130,000
number of rows: 1

In [11]:
%%time

county_download('36081', dataset='claims')

dataset: claims
county fips: 36081
------------
skip number: 0
number of rows: 10,000
result list length: 10,000
skip number: 10,000
number of rows: 4,787

length of full result list: 14,787

CPU times: user 1.98 s, sys: 222 ms, total: 2.2 s
Wall time: 14.2 s


In [12]:
%%time

county_download('36085', dataset='policies')

dataset: policies
county fips: 36085
------------
skip number: 0
number of rows: 10,000
result list length: 10,000
skip number: 10,000
number of rows: 10,000
result list length: 20,000
skip number: 20,000
number of rows: 10,000
result list length: 30,000
skip number: 30,000
number of rows: 10,000
result list length: 40,000
skip number: 40,000
number of rows: 10,000
result list length: 50,000
skip number: 50,000
number of rows: 10,000
result list length: 60,000
skip number: 60,000
number of rows: 10,000
result list length: 70,000
skip number: 70,000
number of rows: 10,000
result list length: 80,000
skip number: 80,000
number of rows: 10,000
result list length: 90,000
skip number: 90,000
number of rows: 6,688

length of full result list: 96,688

CPU times: user 14.1 s, sys: 1.66 s, total: 15.7 s
Wall time: 1min 44s


In [13]:
%%time

county_download('36085', dataset='claims')

dataset: claims
county fips: 36085
------------
skip number: 0
number of rows: 10,000
result list length: 10,000
skip number: 10,000
number of rows: 9,410

length of full result list: 19,410

CPU times: user 2.56 s, sys: 281 ms, total: 2.84 s
Wall time: 17.8 s


# Preview Full Data

## Policies

In [14]:
%%time

path = 'data/'
all_files = glob.glob(os.path.join(path, "policies-*.json"))

# concat all counties into one dataframe
df = pd.concat((pd.read_json(file) for file in all_files), ignore_index=True)
# reverse column order
df = df.loc[:, df.columns[::-1]]

# examine data
print('shape of data: {}'.format(df.shape))
df.head()

shape of data: (506278, 81)
CPU times: user 29.7 s, sys: 5.25 s, total: 35 s
Wall time: 35.4 s


Unnamed: 0,id,longitude,latitude,censusBlockGroupFips,censusTract,countyCode,reportedZipCode,reportedCity,propertyState,femaRegion,...,elevatedBuildingIndicator,contentsDeductibleCode,buildingDeductibleCode,crsClassCode,construction,condominiumCoverageTypeCode,cancellationDateOfFloodPolicy,basementEnclosureCrawlspaceType,baseFloodElevation,agricultureStructureIndicator
0,6cdba47d-1e40-4680-8ca5-f2f4a61cb174,-74.0,40.7,360610100000.0,36061010000.0,36061,10011.0,Currently Unavailable,NY,2.0,...,False,0,0,,False,N,,,,False
1,f07086ce-f925-431e-ab70-fe1d9ab4e653,-74.0,40.7,360610100000.0,36061010000.0,36061,10011.0,Currently Unavailable,NY,2.0,...,False,0,0,,False,N,,,,False
2,1ded539c-1bdd-4f20-bb3d-314a92a9f9f1,-74.0,40.7,360610100000.0,36061010000.0,36061,10012.0,Currently Unavailable,NY,2.0,...,False,0,0,,False,N,,2.0,,False
3,b3eb3d0a-446d-4493-bcba-b48dabc701aa,-73.9,40.8,,,36061,,Currently Unavailable,MS,4.0,...,False,1,1,,False,N,,,,False
4,0b1069b8-6b2b-4cb1-9ecd-af9aa8bd2170,-74.0,40.7,360610100000.0,36061010000.0,36061,10001.0,Currently Unavailable,NY,2.0,...,False,1,0,,False,N,,2.0,,False


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506278 entries, 0 to 506277
Data columns (total 81 columns):
 #   Column                                  Non-Null Count   Dtype  
---  ------                                  --------------   -----  
 0   id                                      506278 non-null  object 
 1   longitude                               506278 non-null  float64
 2   latitude                                506278 non-null  float64
 3   censusBlockGroupFips                    503883 non-null  float64
 4   censusTract                             503883 non-null  float64
 5   countyCode                              506278 non-null  int64  
 6   reportedZipCode                         506278 non-null  object 
 7   reportedCity                            506278 non-null  object 
 8   propertyState                           506278 non-null  object 
 9   femaRegion                              506277 non-null  float64
 10  floodZoneCurrent                        3753

## Claims

In [16]:
%%time

path = 'data/'
all_files = glob.glob(os.path.join(path, "claims-*.json"))

# concat all counties into one dataframe
df = pd.concat((pd.read_json(file) for file in all_files), ignore_index=True)
# reverse column order
df = df.loc[:, df.columns[::-1]]

# examine data
print('shape of data: {}'.format(df.shape))
df.head()

shape of data: (43833, 73)
CPU times: user 2.38 s, sys: 309 ms, total: 2.69 s
Wall time: 2.71 s


Unnamed: 0,id,longitude,latitude,censusBlockGroupFips,censusTract,countyCode,reportedZipCode,reportedCity,state,rentalPropertyIndicator,...,baseFloodElevation,elevationDifference,elevationCertificateIndicator,elevatedBuildingIndicator,dateOfLoss,crsClassificationCode,policyCount,basementEnclosureCrawlspaceType,asOfDate,agricultureStructureIndicator
0,7279caf2-e180-474f-9072-ce5d6382278a,-73.8,40.8,360050100000.0,36005010000.0,36005,10465,Currently Unavailable,NY,False,...,,,,False,2021-09-02T00:00:00.000Z,,1,0.0,2022-05-19T15:56:42.000Z,False
1,e5f8240c-06d7-48f9-b148-82040a1e5261,-73.8,40.8,360050100000.0,36005010000.0,36005,10465,Currently Unavailable,NY,False,...,,,,False,2012-10-29T00:00:00.000Z,,1,1.0,2020-01-22T16:55:53.000Z,False
2,abc053a4-1d37-420c-8fba-a2c97ef52dda,-73.8,40.8,360050200000.0,36005020000.0,36005,10465,Currently Unavailable,NY,False,...,,,1.0,False,2017-11-22T00:00:00.000Z,,1,1.0,2020-01-22T16:55:53.000Z,False
3,7764e429-a818-48e4-ba0a-99bb50a67f5c,-73.8,40.8,360050100000.0,36005010000.0,36005,10465,Currently Unavailable,NY,False,...,,,,False,2012-10-29T00:00:00.000Z,,1,2.0,2020-01-22T16:55:53.000Z,False
4,a5e6a2e2-4798-47b1-bfcd-cdae0cc1d11b,-73.8,40.9,360050500000.0,36005050000.0,36005,10464,Currently Unavailable,NY,False,...,,,,False,2012-10-29T00:00:00.000Z,,1,1.0,2020-01-22T16:55:53.000Z,False


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43833 entries, 0 to 43832
Data columns (total 73 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   id                                          43833 non-null  object 
 1   longitude                                   43833 non-null  float64
 2   latitude                                    43833 non-null  float64
 3   censusBlockGroupFips                        43416 non-null  float64
 4   censusTract                                 43416 non-null  float64
 5   countyCode                                  43833 non-null  int64  
 6   reportedZipCode                             43833 non-null  object 
 7   reportedCity                                43833 non-null  object 
 8   state                                       43833 non-null  object 
 9   rentalPropertyIndicator                     43833 non-null  bool   
 10  buildingDe