In [1]:
# import packages
import pandas as pd
import numpy as np

In [2]:
avg_tot_vol_ins_pur = pd.read_csv('../data/input/avg_tot_vol_ins_pur.csv',
                                  dtype={'countyCode': str}
                                 )

In [3]:
county_chars_all = pd.read_parquet('../data/output/county_chars_all.gzip')

In [4]:
county_chars_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3118 entries, 0 to 3117
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   countyCode            3118 non-null   object 
 1   countyName            3118 non-null   object 
 2   stateName             3118 non-null   object 
 3   pop_2021              3118 non-null   float64
 4   no_hh_2021            3118 non-null   float64
 5   med_house_price_2021  3118 non-null   float64
 6   med_HH_inc_2020       3118 non-null   float64
 7   long                  3118 non-null   float64
 8   lat                   3118 non-null   float64
dtypes: float64(6), object(3)
memory usage: 219.4+ KB


In [5]:
# sub-setting data for map analysis
adopt_rate = avg_tot_vol_ins_pur[['countyCode',
                                  'propertyState', 
                                  'year', 
                                  'buildingType', 
                                  'tot_pol_cnt'
                                 ]
                                ]

In [6]:
adopt_rate = adopt_rate[(adopt_rate.year==2021) & (adopt_rate.buildingType.str.contains('Residential'))].reset_index(drop=True)

In [7]:
gb_adopt = adopt_rate.groupby('countyCode')['tot_pol_cnt'].sum().reset_index()

In [8]:
gb_adopt2 = pd.merge(left=gb_adopt,
                     right=county_chars_all[['countyCode','countyName','stateName','no_hh_2021']],
                     on='countyCode',
                     how='inner'
                    )

In [9]:
gb_adopt2 = gb_adopt2[['countyCode', 'countyName','stateName','tot_pol_cnt','no_hh_2021']]

In [10]:
gb_adopt2['adopt_rate'] = round((gb_adopt2['tot_pol_cnt']/gb_adopt2['no_hh_2021'])*100,1)

In [11]:
gb_adopt2

Unnamed: 0,countyCode,countyName,stateName,tot_pol_cnt,no_hh_2021,adopt_rate
0,01001,Autauga County,AL,187,18880.191693,1.0
1,01003,Baldwin County,AL,20138,76451.757188,26.3
2,01005,Barbour County,AL,31,7975.718850,0.4
3,01007,Bibb County,AL,12,7181.150160,0.2
4,01009,Blount County,AL,28,18862.939297,0.1
...,...,...,...,...,...,...
2886,56037,Sweetwater County,WY,10,13295.207668,0.1
2887,56039,Teton County,WY,405,7531.948882,5.4
2888,56041,Uinta County,WY,32,6592.651757,0.5
2889,56043,Washakie County,WY,5,2461.661342,0.2


In [13]:
gb_adopt2.describe()

Unnamed: 0,tot_pol_cnt,no_hh_2021,adopt_rate
count,2891.0,2891.0,2891.0
mean,1301.600484,36309.39,2.171844
std,9943.715026,110467.0,7.497022
min,1.0,194.2492,0.0
25%,14.0,4266.454,0.2
50%,53.0,9235.144,0.5
75%,212.0,24743.13,1.0
max,309539.0,3140429.0,123.8


In [12]:
gb_adopt2.to_csv('../data/output/adopt_vol_ins_county.csv')

In [13]:
# subsetting data for map analysis - over-insurance/under-insurance
ins_rate = avg_tot_vol_ins_pur[['countyCode',
                                'propertyState', 
                                'year', 
                                'buildingType',
                                'sfha_flood_risk', 
                                'bld_ins_per_pol',
                                'con_ins_per_pol'
                               ]
                              ]

In [14]:
ins_rate = ins_rate[(ins_rate.year==2021) & (ins_rate.buildingType=='Residential')].reset_index(drop=True)

In [15]:
gb_ins = ins_rate.groupby(['countyCode', 'sfha_flood_risk'])[['bld_ins_per_pol','con_ins_per_pol']].mean().reset_index()

In [16]:
gb_ins2 = pd.merge(left=gb_ins,
                   right=county_chars_all[['countyCode','countyName','stateName','med_house_price_2021']],
                   on='countyCode',
                   how='inner'
                  )

In [17]:
# analysis on voluntary over-insurance vs. under-insurance rates

def insurance(bld_ins_per_pol, con_ins_per_pol, sfha_flood_risk, med_house_price_2021):
    if sfha_flood_risk == 1: # shfa=1 means building insurance is mandatory, just examine non-mandatory contents insurance coverage
        ins_rate = con_ins_per_pol/100000 # max fema contents coverage is $100000 per policy
    else:
        if con_ins_per_pol > 0: # sfha=0 and con_ins_per_pol>0 indicate voluntary purchases of building and contents insurance coverage
            if med_house_price_2021 < 250000: # max fema building coverage for single residential homes is $250000 per policy
                ins_rate = (bld_ins_per_pol + con_ins_per_pol)/(med_house_price_2021+100000)
            else:
                ins_rate = (bld_ins_per_pol + con_ins_per_pol)/(250000+100000)
        else:
            if med_house_price_2021 < 250000:
                ins_rate = bld_ins_per_pol/med_house_price_2021
            else:
                ins_rate = bld_ins_per_pol/250000
    return ins_rate*100
  
gb_ins2['ins_rate'] = gb_ins2.apply(lambda x: insurance(x.bld_ins_per_pol,
                                                        x.con_ins_per_pol,
                                                        x.sfha_flood_risk,
                                                        x.med_house_price_2021
                                                       ),
                                                       axis=1
                                    )

In [18]:
# final group by for map
gb_ins3 = gb_ins2.groupby(['countyCode','countyName','stateName'])['ins_rate'].mean().round(decimals=1).reset_index()

In [19]:
gb_ins3.describe()

Unnamed: 0,ins_rate
count,2889.0
mean,70.170024
std,24.860624
min,1.4
25%,55.5
50%,66.4
75%,79.5
max,223.6


In [20]:
gb_ins3.to_csv('../data/output/ins_rate_vol_ins_county.csv')