In [1]:
# importing libraries
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import urllib
import urllib.request
import requests
import matplotlib.pyplot as plt
import time

plt.rcParams['savefig.facecolor'] = 'white'
%matplotlib inline

In [2]:
print('printing packages and versions:\n')

%reload_ext watermark
%watermark -v -p numpy,pandas,geopandas,matplotlib

printing packages and versions:

Python implementation: CPython
Python version       : 3.8.13
IPython version      : 8.4.0

numpy     : 1.23.1
pandas    : 1.4.3
geopandas : 0.11.1
matplotlib: 3.5.2



In [3]:
url = 'https://www.fema.gov/api/open/v2/FimaNfipPolicies?$format=csv'
df = pd.read_csv(url, nrows=100)

print('shape of data: {}'.format(df.shape))
df.head()

shape of data: (100, 81)


Unnamed: 0,agricultureStructureIndicator,baseFloodElevation,basementEnclosureCrawlspaceType,cancellationDateOfFloodPolicy,condominiumCoverageTypeCode,construction,crsClassCode,buildingDeductibleCode,contentsDeductibleCode,elevatedBuildingIndicator,...,femaRegion,propertyState,reportedCity,reportedZipCode,countyCode,censusTract,censusBlockGroupFips,latitude,longitude,id
0,0,,,,N,0,5,F,F,0,...,4,FL,Currently Unavailable,32164,12035,12035060211,120350602112,29.5,-81.2,eb6e3349-0352-4308-abff-10c71a2f459f
1,0,,,,N,0,4,F,F,0,...,4,FL,Currently Unavailable,32164,12035,12035060211,120350602112,29.5,-81.2,15c9a948-05df-4905-ab2e-faf80ac2152e
2,0,,,,N,0,4,F,F,0,...,4,FL,Currently Unavailable,32164,12035,12035060211,120350602112,29.5,-81.2,d2d506a1-b3c7-48c5-85ad-c74778a7fba8
3,0,,,2019-10-28T00:00:00.000Z,N,0,4,F,F,0,...,4,FL,Currently Unavailable,32164,12035,12035060211,120350602112,29.5,-81.2,e9e77396-9250-46fb-8f11-3d222937f345
4,0,11.0,,,A,0,7,1,0,1,...,4,FL,Currently Unavailable,33767,12103,12103026002,121030260021,28.0,-82.8,f0db5ca7-d1c5-450e-a8b9-2c7b82137f51


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 81 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   agricultureStructureIndicator           100 non-null    int64  
 1   baseFloodElevation                      65 non-null     float64
 2   basementEnclosureCrawlspaceType         13 non-null     float64
 3   cancellationDateOfFloodPolicy           1 non-null      object 
 4   condominiumCoverageTypeCode             100 non-null    object 
 5   construction                            100 non-null    int64  
 6   crsClassCode                            100 non-null    int64  
 7   buildingDeductibleCode                  100 non-null    object 
 8   contentsDeductibleCode                  92 non-null     object 
 9   elevatedBuildingIndicator               100 non-null    int64  
 10  elevationCertificateIndicator           4 non-null      float64

# County FIPS Codes for New York City

- The Bronx is Bronx County - FIPS 36005  
- Brooklyn is Kings County - FIPS 36047  
- Manhattan is New York County - FIPS 36061  
- Queens is Queens County - FIPS 36081  
- Staten Island is Richmond County - FIPS 36085

In [8]:
%%time

url_base = 'https://www.fema.gov/api/open/v2/FimaNfipPolicies?'
format_param = '$format=csv'
filter_param = '&$filter=countyCode%20eq%20%27{}%27'
skip_param = '&$skip={}'
top_param = '&$top=10000'

url = url_base + format_param + filter_param + skip_param + top_param
counties = [36061, 36005, 36047, 36081, 36085]

for county in counties:
    
    print('county fips: {}\n------------'.format(county))
    df = pd.DataFrame()

    for skip in range(0, 1000000, 10000):

        print('skip number: {}'.format(skip))
        page_df = pd.read_csv(url.format(county, skip), low_memory=False)
        df = pd.concat([page_df, df]).reset_index(drop=True)

        rows = page_df.shape[0]
        print('number of rows: {}'.format(rows))
        if rows < 10000:
            break

        print('dataframe shape: {}'.format(df.shape))   
        time.sleep(5) 

    print('shape of data: {}\n'.format(df.shape))
    df.to_csv('data/policies-{}.csv'.format(county), index=False)
    
    time.sleep(5)

county fips: 36061
------------
skip number: 0
number of rows: 10000
dataframe shape: (10000, 81)
skip number: 10000
number of rows: 10000
dataframe shape: (20000, 81)
skip number: 20000
number of rows: 10000
dataframe shape: (30000, 81)
skip number: 30000
number of rows: 3858
shape of data: (33858, 81)

county fips: 36005
------------
skip number: 0
number of rows: 10000
dataframe shape: (10000, 81)
skip number: 10000
number of rows: 10000
dataframe shape: (20000, 81)
skip number: 20000
number of rows: 10000
dataframe shape: (30000, 81)
skip number: 30000
number of rows: 2314
shape of data: (32314, 81)

county fips: 36047
------------
skip number: 0
number of rows: 10000
dataframe shape: (10000, 81)
skip number: 10000
number of rows: 10000
dataframe shape: (20000, 81)
skip number: 20000
number of rows: 10000
dataframe shape: (30000, 81)
skip number: 30000
number of rows: 10000
dataframe shape: (40000, 81)
skip number: 40000
number of rows: 10000
dataframe shape: (50000, 81)
skip numbe

In [9]:
path = 'data'
all_files = glob.glob(os.path.join(path, "*.csv"))

df = pd.concat((pd.read_csv(f, low_memory=False) for f in all_files), ignore_index=True)

print(df.shape)
df.head()

(503873, 81)


Unnamed: 0,agricultureStructureIndicator,baseFloodElevation,basementEnclosureCrawlspaceType,cancellationDateOfFloodPolicy,condominiumCoverageTypeCode,construction,crsClassCode,buildingDeductibleCode,contentsDeductibleCode,elevatedBuildingIndicator,...,femaRegion,propertyState,reportedCity,reportedZipCode,countyCode,censusTract,censusBlockGroupFips,latitude,longitude,id
0,0,,1.0,,N,0,,E,,0,...,2.0,NY,Currently Unavailable,10004.0,36061,36061000000.0,360610000000.0,40.7,-74.0,73a08fb7-5e37-44ae-87f7-655aeabd4bc5
1,0,,1.0,,N,0,,F,F,0,...,2.0,NY,Currently Unavailable,10009.0,36061,36061000000.0,360610000000.0,40.7,-74.0,d1e0fcc4-d9e3-4c3e-a346-18a4ce134a0a
2,0,,1.0,,N,0,,2,1,0,...,2.0,NY,Currently Unavailable,10011.0,36061,36061010000.0,360610100000.0,40.7,-74.0,02b25b8a-fa69-4ae5-91d1-06b3f82ca602
3,0,,1.0,,U,0,,F,1,0,...,2.0,NY,Currently Unavailable,10014.0,36061,36061010000.0,360610100000.0,40.7,-74.0,409a2bca-9133-4c42-9604-4affdd7f8f53
4,0,,2.0,,U,0,,2,1,0,...,2.0,NY,Currently Unavailable,10013.0,36061,36061000000.0,360610000000.0,40.7,-74.0,04118909-710c-4f5c-82bd-d3a4110a292a


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 503873 entries, 0 to 503872
Data columns (total 81 columns):
 #   Column                                  Non-Null Count   Dtype  
---  ------                                  --------------   -----  
 0   agricultureStructureIndicator           503873 non-null  int64  
 1   baseFloodElevation                      110546 non-null  float64
 2   basementEnclosureCrawlspaceType         399204 non-null  float64
 3   cancellationDateOfFloodPolicy           13588 non-null   object 
 4   condominiumCoverageTypeCode             503732 non-null  object 
 5   construction                            503873 non-null  int64  
 6   crsClassCode                            113 non-null     float64
 7   buildingDeductibleCode                  488877 non-null  object 
 8   contentsDeductibleCode                  415800 non-null  object 
 9   elevatedBuildingIndicator               503873 non-null  int64  
 10  elevationCertificateIndicator           1855