## Prerequisites

In [42]:
!pip install pandas openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.0/250.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2


In [1]:
import pandas as pd
import os

## Global Inputs

In [26]:
# Change FILEPATH_BASE env var in docker-compose.yaml
FILEPATH_BASE = '/home/jovyan/data-laptop'
print(FILEPATH_BASE)
# FimaNfipClaimsV1 is v1
# FimaNfipClaims is v2
FILEPATH_FIMA_CLAIMS_CSV = os.path.join(FILEPATH_BASE, "fima-claims/FimaNfipClaims.csv")
print(FILEPATH_FIMA_CLAIMS_CSV)
FILEPATH_FILTERED_STATE_XLS = os.path.join(FILEPATH_BASE, "states-io-filtered/states-io-filtered.xlsx")
print(FILEPATH_FILTERED_STATE_XLS)

/home/jovyan/data-laptop
/home/jovyan/data-laptop/fima-claims/FimaNfipClaims.csv
/home/jovyan/data-laptop/states-io-filtered/states-io-filtered.xlsx


## Helper funcs

In [36]:
def print_full(x):
    pd.set_option('display.max_rows', len(x), 'display.max_columns', len(x.columns))
    print(x)
    pd.reset_option('display.max_rows', None, 'display.max_columns', None)
    
def print_full_rows(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows', None)
    
def print_full_columns(x):
    pd.set_option('display.max_columns', len(x.columns))
    print(x)
    pd.reset_option('display.max_columns', None)

## Load in FIM NFIP Claims + State Identifiers

In [4]:
%%time
claims = pd.read_csv(FILEPATH_FIMA_CLAIMS_CSV)



CPU times: user 58.1 s, sys: 34.1 s, total: 1min 32s
Wall time: 2min 11s


In [29]:
claims.columns

Index(['agricultureStructureIndicator', 'asOfDate',
       'basementEnclosureCrawlspaceType', 'policyCount',
       'crsClassificationCode', 'dateOfLoss', 'elevatedBuildingIndicator',
       'elevationCertificateIndicator', 'elevationDifference',
       'baseFloodElevation', 'ratedFloodZone', 'houseWorship',
       'locationOfContents', 'lowestAdjacentGrade', 'lowestFloorElevation',
       'numberOfFloorsInTheInsuredBuilding', 'nonProfitIndicator',
       'obstructionType', 'occupancyType', 'originalConstructionDate',
       'originalNBDate', 'amountPaidOnBuildingClaim',
       'amountPaidOnContentsClaim',
       'amountPaidOnIncreasedCostOfComplianceClaim',
       'postFIRMConstructionIndicator', 'rateMethod',
       'smallBusinessIndicatorBuilding', 'totalBuildingInsuranceCoverage',
       'totalContentsInsuranceCoverage', 'yearOfLoss',
       'primaryResidenceIndicator', 'buildingDamageAmount',
       'buildingDeductibleCode', 'netBuildingPaymentAmount',
       'buildingPropertyValu

In [38]:
print_full_rows(claims.dtypes)

agricultureStructureIndicator                   int64
asOfDate                                       object
basementEnclosureCrawlspaceType               float64
policyCount                                     int64
crsClassificationCode                         float64
dateOfLoss                                     object
elevatedBuildingIndicator                       int64
elevationCertificateIndicator                  object
elevationDifference                           float64
baseFloodElevation                            float64
ratedFloodZone                                 object
houseWorship                                    int64
locationOfContents                            float64
lowestAdjacentGrade                           float64
lowestFloorElevation                          float64
numberOfFloorsInTheInsuredBuilding            float64
nonProfitIndicator                              int64
obstructionType                               float64
occupancyType               

In [40]:
pd.set_option('display.max_columns', None)
claims.head()

Unnamed: 0,agricultureStructureIndicator,asOfDate,basementEnclosureCrawlspaceType,policyCount,crsClassificationCode,dateOfLoss,elevatedBuildingIndicator,elevationCertificateIndicator,elevationDifference,baseFloodElevation,ratedFloodZone,houseWorship,locationOfContents,lowestAdjacentGrade,lowestFloorElevation,numberOfFloorsInTheInsuredBuilding,nonProfitIndicator,obstructionType,occupancyType,originalConstructionDate,originalNBDate,amountPaidOnBuildingClaim,amountPaidOnContentsClaim,amountPaidOnIncreasedCostOfComplianceClaim,postFIRMConstructionIndicator,rateMethod,smallBusinessIndicatorBuilding,totalBuildingInsuranceCoverage,totalContentsInsuranceCoverage,yearOfLoss,primaryResidenceIndicator,buildingDamageAmount,buildingDeductibleCode,netBuildingPaymentAmount,buildingPropertyValue,causeOfDamage,condominiumCoverageTypeCode,contentsDamageAmount,contentsDeductibleCode,netContentsPaymentAmount,contentsPropertyValue,disasterAssistanceCoverageRequired,eventDesignationNumber,ficoNumber,floodCharacteristicsIndicator,floodWaterDuration,floodproofedIndicator,floodEvent,iccCoverage,netIccPaymentAmount,nfipRatedCommunityNumber,nfipCommunityNumberCurrent,nfipCommunityName,nonPaymentReasonContents,nonPaymentReasonBuilding,numberOfUnits,buildingReplacementCost,contentsReplacementCost,replacementCostBasis,stateOwnedIndicator,waterDepth,floodZoneCurrent,buildingDescriptionCode,rentalPropertyIndicator,state,reportedCity,reportedZipCode,countyCode,censusTract,censusBlockGroupFips,latitude,longitude,id
0,0,2020-01-22T16:55:53.194Z,,1,8.0,1998-02-07T00:00:00.000Z,0,,,,X,0,,,,4.0,0,10.0,1.0,1963-01-01T00:00:00.000Z,1997-01-11T00:00:00.000Z,,,,0,7,0,200000.0,50000.0,1998,0,382.0,0,0.0,937.0,1,N,,0,0.0,,0.0,,612.0,,0.0,0,Pineapple Express - Southern,15000.0,0.0,60294.0,,"OCEANSIDE, CITY OF",97.0,1.0,1.0,937.0,,A,0,0.0,,,0,CA,Currently Unavailable,92056.0,6073.0,6073019000.0,60730190000.0,33.2,-117.3,23dcb0d8-3e61-45bf-899f-b951946ce2ff
1,0,2020-01-22T16:55:53.194Z,,1,8.0,2005-08-29T00:00:00.000Z,0,,,,X,0,,,,2.0,0,,1.0,1967-07-01T00:00:00.000Z,1990-07-12T00:00:00.000Z,,,,0,7,0,100000.0,40000.0,2005,1,,0,0.0,,1,N,,0,0.0,,0.0,,654.0,,0.0,0,Hurricane Katrina,30000.0,0.0,225203.0,,NEW ORLEANS/ORLEANS PARISH*,6.0,6.0,1.0,,,A,0,0.0,,,0,LA,Currently Unavailable,70131.0,22071.0,22071000000.0,220710000000.0,29.9,-90.0,55783cdd-ccbd-4b19-930b-072def248507
2,0,2020-01-22T16:55:53.194Z,,1,9.0,1998-09-28T00:00:00.000Z,0,,,,X,0,,,,1.0,0,10.0,1.0,1972-01-01T00:00:00.000Z,1997-07-24T00:00:00.000Z,8813.21,1720.0,0.0,0,1,0,100000.0,50000.0,1998,1,9313.0,0,8813.21,80000.0,1,N,2220.0,0,1720.0,0.0,0.0,,133.0,,0.0,0,Hurricane Georges (Panhandle),15000.0,0.0,120274.0,,SANTA ROSA COUNTY *,,,1.0,100000.0,0.0,A,0,0.0,,,0,FL,Currently Unavailable,32566.0,12113.0,12113010000.0,121130100000.0,30.4,-86.9,bfb5922b-1b21-4882-b1d4-b3825ff53e37
3,0,2019-09-19T13:45:58.425Z,1.0,1,9.0,1994-10-07T00:00:00.000Z,0,,,,X,0,,,,2.0,0,10.0,1.0,1960-01-01T00:00:00.000Z,1993-10-01T00:00:00.000Z,2906.0,0.0,0.0,0,7,0,100000.0,25000.0,1994,0,4428.0,0,2906.0,100000.0,1,N,,0,0.0,,0.0,,,,0.0,0,,,0.0,450026.0,,"BEAUFORT, CITY OF",97.0,,1.0,0.0,,A,0,0.0,,,0,SC,Currently Unavailable,29902.0,45013.0,45013000000.0,450130000000.0,32.4,-80.7,c1cf6e00-1e6d-4493-93fc-eb430ef15495
4,0,2019-09-19T13:45:58.425Z,,1,8.0,1996-03-11T00:00:00.000Z,0,,,,X,0,,,,1.0,0,,1.0,1988-01-01T00:00:00.000Z,1996-01-11T00:00:00.000Z,3875.53,1545.0,0.0,1,7,0,100000.0,25000.0,1996,1,5252.0,0,3875.53,100000.0,1,N,3115.0,0,1545.0,0.0,0.0,,,,0.0,0,,,0.0,125092.0,,BREVARD COUNTY *,,,1.0,0.0,0.0,A,0,0.0,,1.0,0,FL,Currently Unavailable,32940.0,12009.0,12009060000.0,120090600000.0,28.3,-80.7,cad8334c-13f2-4837-bdcf-e09591197ff7


In [43]:
## Load in state list
states = pd.read_excel(FILEPATH_FILTERED_STATE_XLS)

In [44]:
states.head()

Unnamed: 0,wkt_geom,fid,STATEFP,STATENS,AFFGEOID,GEOID,STUSPS,NAME,LSAD,ALAND,AWATER
0,MultiPolygon (((-88.05337500000000261 30.50698...,8,1,1779775,0400000US01,1,AL,Alabama,0,131174048583,4593327154
1,MultiPolygon (((-72.76142699999999763 41.24233...,18,9,1779780,0400000US09,9,CT,Connecticut,0,12542497068,1815617571
2,MultiPolygon (((-75.56554599999999766 39.51484...,16,10,1779781,0400000US10,10,DE,Delaware,0,5045925646,1399985648
3,MultiPolygon (((-80.17627600000000143 25.52505...,6,12,294478,0400000US12,12,FL,Florida,0,138949136250,31361101223
4,MultiPolygon (((-81.27939099999998973 31.30791...,9,13,1705317,0400000US13,13,GA,Georgia,0,149482048342,4422936154


In [49]:
states_arr = states["STUSPS"].values

In [50]:
claims_in_coastal_states = claims[claims["state"].isin(states_arr)]

In [54]:
print(f'{len(claims_in_coastal_states)}/{len(claims)}')


2039059/2584242


In [28]:
claims_in_coastal_states[['policyCount', 'countyCode', 'communityRatingSystemDiscount',
       'dateOfLoss', 'elevatedBuildingIndicator',
       'elevationCertificateIndicator', 'elevationDifference', 'censusTract',
       'floodZone', 'houseWorship', 'latitude', 'locationOfContents',
       'longitude', 'lowestAdjacentGrade', 'lowestFloorElevation',
       'numberOfFloorsInTheInsuredBuilding', 'nonProfitIndicator',
       'obstructionType', 'occupancyType', 'originalConstructionDate',
       'originalNBDate', 'amountPaidOnBuildingClaim',
       'amountPaidOnContentsClaim',
       'amountPaidOnIncreasedCostOfComplianceClaim',
       'postFIRMConstructionIndicator', 'rateMethod',
       'smallBusinessIndicatorBuilding', 'state',
       'totalBuildingInsuranceCoverage', 'totalContentsInsuranceCoverage',
       'yearOfLoss', 'reportedZipcode', 'primaryResidence', 'id']]

Unnamed: 0,agricultureStructureIndicator,asOfDate,baseFloodElevation,basementEnclosureCrawlspace,reportedCity,condominiumIndicator,policyCount,countyCode,communityRatingSystemDiscount,dateOfLoss,...,postFIRMConstructionIndicator,rateMethod,smallBusinessIndicatorBuilding,state,totalBuildingInsuranceCoverage,totalContentsInsuranceCoverage,yearOfLoss,reportedZipcode,primaryResidence,id
0,0.0,2021-07-24T00:00:00.000Z,,1.0,Temporarily Unavailable,N,1.0,24033.0,5.0,2007-01-01T00:00:00.000Z,...,0.0,7,0.0,MD,100000.0,40000.0,2007,20745.0,1.0,babe8abc-eae6-49f4-a5d6-b892b1087a45
2,0.0,2021-11-20T00:00:00.000Z,50.0,,Temporarily Unavailable,N,1.0,48201.0,5.0,2001-06-09T00:00:00.000Z,...,1.0,1,0.0,TX,25000.0,25000.0,2001,77025.0,1.0,547d731b-b35f-4d60-a903-15cc8a4d12df
3,0.0,2021-11-20T00:00:00.000Z,9.0,,Temporarily Unavailable,N,1.0,12103.0,5.0,1996-10-07T00:00:00.000Z,...,0.0,1,0.0,FL,150000.0,35000.0,1996,33702.0,1.0,18e7a2e8-5b52-4cdd-82a2-9112dbbc953b
4,0.0,2021-11-20T00:00:00.000Z,,,Temporarily Unavailable,N,1.0,24029.0,,2003-09-19T00:00:00.000Z,...,0.0,1,0.0,MD,75000.0,10000.0,2003,21620.0,0.0,40c5393a-03a4-41ed-8fb3-4b3de1e28534
5,0.0,2021-11-20T00:00:00.000Z,,,Temporarily Unavailable,N,1.0,48201.0,10.0,2001-06-05T00:00:00.000Z,...,0.0,7,0.0,TX,200000.0,50000.0,2001,77401.0,1.0,6e91e76c-d1eb-4bc1-809f-d7f4583968c9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2570083,0.0,2022-04-06T00:00:00.000Z,8.0,,Temporarily Unavailable,N,1.0,37055.0,6.0,2021-11-06T00:00:00.000Z,...,1.0,1,0.0,NC,250000.0,70400.0,2021,27968.0,0.0,eb3fb78f-ee47-484f-a540-ffc76a7c12f3
2570084,0.0,2022-04-06T00:00:00.000Z,8.0,,Temporarily Unavailable,N,1.0,37055.0,6.0,2021-11-06T00:00:00.000Z,...,1.0,1,0.0,NC,250000.0,70400.0,2021,27968.0,0.0,cdd80c93-31a6-44c7-b173-4d8e27cf1d03
2570086,0.0,2022-04-02T00:00:00.000Z,,,Temporarily Unavailable,N,1.0,37191.0,8.0,2018-09-18T00:00:00.000Z,...,0.0,1,0.0,NC,177800.0,31500.0,2018,27530.0,1.0,70d56b85-759a-4642-883e-815a131b8169
2570087,0.0,2022-04-02T00:00:00.000Z,14.0,2.0,Temporarily Unavailable,N,1.0,37141.0,7.0,2018-09-14T00:00:00.000Z,...,1.0,1,0.0,NC,250000.0,100000.0,2018,28445.0,0.0,60ba5718-70bd-4d44-b7fa-af66ab67d02f


In [26]:
claims_in_coastal_states.groupby(["state", "yearOfLoss"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,agricultureStructureIndicator,asOfDate,baseFloodElevation,basementEnclosureCrawlspace,reportedCity,condominiumIndicator,policyCount,countyCode,communityRatingSystemDiscount,dateOfLoss,...,amountPaidOnContentsClaim,amountPaidOnIncreasedCostOfComplianceClaim,postFIRMConstructionIndicator,rateMethod,smallBusinessIndicatorBuilding,totalBuildingInsuranceCoverage,totalContentsInsuranceCoverage,reportedZipcode,primaryResidence,id
state,yearOfLoss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AL,1974,0.0,2021-07-24T00:00:00.000Z,0.0,4.0,Temporarily Unavailable,0,1.0,1097.0,10.0,1974-09-07T00:00:00.000Z,...,0.00,0.0,0.0,0,0.0,10000.0,0.0,36607.0,0.0,e40e9ff1-05ad-4c98-8d09-3d0be31bc963
AL,1975,0.0,2021-09-29T00:00:00.000Z,0.0,0.0,Temporarily Unavailable,0,1.0,1053.0,0.0,1975-04-10T00:00:00.000Z,...,5000.00,0.0,0.0,0,0.0,0.0,10000.0,36426.0,0.0,7e73bc45-36b9-4ad4-b9d6-cc9c0e5ce244
AL,1977,0.0,2021-09-29T00:00:00.000Z2021-09-29T00:00:00.00...,64.0,32.0,Temporarily UnavailableTemporarily Unavailable...,NNNNN,18.0,19514.0,76.0,1977-09-07T00:00:00.000Z1977-09-07T00:00:00.00...,...,19725.31,0.0,0.0,0,0.0,409100.0,129500.0,611509.0,0.0,7e346108-f109-4064-8398-26b2a26ee3c3d3394929-d...
AL,1978,0.0,2021-09-01T00:00:00.000Z2021-09-29T00:00:00.00...,243.0,187.0,Temporarily UnavailableTemporarily Unavailable...,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...,150.0,144182.0,714.0,1978-05-08T00:00:00.000Z1978-06-03T00:00:00.00...,...,77937.79,0.0,6.0,11111,0.0,3622700.0,1023800.0,4910430.0,0.0,6020d820-9f80-4895-93fd-a3d2df81ee8b5a80f831-8...
AL,1979,0.0,2021-09-01T00:00:00.000Z2021-09-29T00:00:00.00...,17104.0,248.0,Temporarily UnavailableTemporarily Unavailable...,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...,3578.0,3545876.0,16879.0,1979-07-09T00:00:00.000Z1979-04-12T00:00:00.00...,...,8661796.20,0.0,404.0,1111111111111111111111111111111111111111111111...,0.0,109465800.0,25922700.0,107904776.0,0.0,eed78993-d18d-4405-97bb-4d5c3052c57bcc0c5665-d...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VA,2018,0.0,2021-07-24T00:00:00.000Z2021-09-01T00:00:00.00...,58883.0,740.0,Temporarily UnavailableTemporarily Unavailable...,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...,892.0,44260177.0,3200.0,2018-08-03T00:00:00.000Z2018-07-21T00:00:00.00...,...,1984116.84,20800.0,256.0,7117B1127SR7777717BB77B171777777711RRB1771112B...,13.0,163185400.0,44665200.0,20120639.0,610.0,5570bd7d-f271-41c7-ba10-0154d387f31cbdaefd57-4...
VA,2019,0.0,2022-04-16T00:00:00.000Z2021-09-01T00:00:00.00...,11894.0,449.0,Temporarily UnavailableTemporarily Unavailable...,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNUNNNNNNNNNNNN...,486.0,24476849.0,2638.0,2019-07-08T00:00:00.000Z2019-06-28T00:00:00.00...,...,836206.19,60000.0,133.0,RR177717177711771WRRRWW212777771R77777B77R7177...,16.0,105791900.0,30774500.0,10812544.0,366.0,aafd9bb9-8e09-4119-b832-e3dfb60b929009b46ebd-5...
VA,2020,0.0,2021-10-13T00:00:00.000Z2021-08-03T00:00:00.00...,44237.0,855.0,Temporarily UnavailableTemporarily Unavailable...,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...,875.0,44862852.0,3339.0,2020-08-06T00:00:00.000Z2020-05-20T00:00:00.00...,...,1141674.76,0.0,214.0,71117711RRR111771WWWW171777111B1BR77777RRR1111...,18.0,169756400.0,49558000.0,20390924.0,630.0,92418b00-a2fe-4215-b51f-114893f15aa63b7c5bfe-4...
VA,2021,0.0,2021-12-08T00:00:00.000Z2021-12-01T00:00:00.00...,10430.0,276.0,Temporarily UnavailableTemporarily Unavailable...,NNNNNNNNNNNNNNNNNNUNNNNNNNNNNNUNNNNNNNNNNNNNNN...,329.0,15655953.0,1318.0,2021-08-15T00:00:00.000Z2021-02-16T00:00:00.00...,...,194684.81,0.0,81.0,7W711111177777177B77711WWRB7R11177717777777717...,1.0,61155700.0,17975800.0,7013178.0,240.0,935d9c40-1e91-4216-b44b-46e803fa2ab8455939ed-a...
