## Hypothesis: Insurance take-up has a positive effect on preparedness

In [1]:
# analytics
import pandas as pd 
import numpy as np
import scipy.stats as stats
import statsmodels.formula.api as smf
#spatial 
import osmnx as ox
import geopandas as gpd
import contextily as cx
# plotting 
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
#settings
import warnings

In [2]:
# import data
path = '/Users/philip/Documents/ESE/ESE_thesis/flood_experience/data/export/clean_n.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,id,state,zipcode,geographic_division,census_region,county,experience,supplies,insured,involved,...,sex,education,race,homeownership,income,rentmortgage,rurality,hazard_weight,geometry,zip_count
0,1,Washington,98033,Pacific,West,King,0.0,0,0,1,...,0.0,4,White,1,87500,1250.0,1,0.255097,POLYGON ((-13608075.601186747 6055357.24645823...,1
1,2,Florida,33950,South Atlantic,South,Charlotte,1.0,0,1,1,...,1.0,5,White,1,175000,3000.0,1,2.836145,POLYGON ((-9139287.781401768 3112400.961508024...,1
2,3,New Jersey,7031,Middle Atlantic,Northeast,Bergen,1.0,0,0,1,...,1.0,1,White,0,47500,1250.0,1,1.165971,POLYGON ((-8254065.171859486 4980899.960122439...,1
3,5,Pennsylvania,19148,Middle Atlantic,Northeast,Philadelphia,1.0,0,1,0,...,0.0,4,White,1,125000,1250.0,1,0.62489,POLYGON ((-8368433.592186097 4851218.465580212...,1
4,7,Florida,32976,South Atlantic,South,Brevard,0.0,1,1,0,...,0.0,2,White,1,20000,250.0,1,0.695075,POLYGON ((-8971557.027329512 3234931.888104651...,1


#### We will go through these outcome variables one by one

structural adaptation: 
- made_safer

non-structural adaptation: 
- learned_rountes
- supplies
- involved
- made_plan
- practiced_drills
- alerts
- family_communication


In [3]:
# define the regression model 
def model (function, data):
    model = smf.logit(function, data=data).fit()
    print(model.summary()) # print model summary
    print('BIC:', model.bic) # print BIC additionally to defaults model fit parameters
    # print odds ratios for output = 0 and output = 1
    print('likelihood of adaptation if household is not insured = ', 1 / (1 + np.exp(-model.params['Intercept'])))
    print('likelihood of adaptation if household is insured = ', 1 / (1 + np.exp(-(model.params['Intercept']+model.params['insured']))))

    # compute odds ratio
    params = model.params
    conf = model.conf_int()
    odds_ratios = pd.DataFrame({
        "OR": np.exp(params),
        "Lower CI": np.exp(conf[0]),
        "Upper CI": np.exp(conf[1])
        })
    odds_ratios.index.name = 'Variable'
    odds_ratios.reset_index(inplace=True)
    # compute marginal effects
    marginal_effects = model.get_margeff().summary()

    return odds_ratios, marginal_effects 
    

In [4]:
def probit (function, data):
    model = smf.probit(function, data=data).fit()
    print(model.summary())
    print('BIC=', model.bic)
    marginal = model.get_margeff().summary()
    return marginal

In [5]:
odds, marg_eff = model('made_safer ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.435705
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:             made_safer   No. Observations:                  472
Model:                          Logit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                 0.08826
Time:                        18:03:35   Log-Likelihood:                -205.65
converged:                       True   LL-Null:                       -225.56
Covariance Type:            nonrobust   LLR p-value:                 2.789e-10
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.9802      0.159    -12.452      0.000      -2.292      -1.669
insured        1.6404      0.

In [6]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.138037,0.101072,0.188521
1,insured,5.157062,3.116338,8.534147


In [7]:
marg_eff

0,1
Dep. Variable:,made_safer
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.2227,0.031,7.249,0.0,0.162,0.283


In [8]:
odds, marg_eff = model('learned_routes ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.452746
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:         learned_routes   No. Observations:                  472
Model:                          Logit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1541
Time:                        18:03:35   Log-Likelihood:                -213.70
converged:                       True   LL-Null:                       -252.64
Covariance Type:            nonrobust   LLR p-value:                 1.092e-18
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.8827      0.153    -12.278      0.000      -2.183      -1.582
insured        2.1820      0.

In [9]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.152174,0.112672,0.205525
1,insured,8.863787,5.398363,14.553806


In [10]:
marg_eff

0,1
Dep. Variable:,learned_routes
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.3108,0.025,12.307,0.0,0.261,0.36


In [11]:
odds, marg_eff = model('supplies ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.488242
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:               supplies   No. Observations:                  472
Model:                          Logit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                 0.07414
Time:                        18:03:35   Log-Likelihood:                -230.45
converged:                       True   LL-Null:                       -248.90
Covariance Type:            nonrobust   LLR p-value:                 1.240e-09
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.6858      0.143    -11.792      0.000      -1.966      -1.406
insured        1.5071      0.

In [12]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.185304,0.140023,0.245227
1,insured,4.51348,2.788629,7.305201


In [13]:
marg_eff

0,1
Dep. Variable:,supplies
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.2362,0.033,7.053,0.0,0.171,0.302


In [14]:
odds, marg_eff = model('involved ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.269919
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:               involved   No. Observations:                  472
Model:                          Logit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1008
Time:                        18:03:35   Log-Likelihood:                -127.40
converged:                       True   LL-Null:                       -141.68
Covariance Type:            nonrobust   LLR p-value:                 9.061e-08
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.9761      0.242    -12.316      0.000      -3.450      -2.502
insured        1.8103      0.

In [15]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.050992,0.031755,0.08188
1,insured,6.112554,3.162525,11.814393


In [16]:
marg_eff

0,1
Dep. Variable:,involved
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.1359,0.026,5.141,0.0,0.084,0.188


In [17]:
odds, marg_eff = model('made_plan ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.499229
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:              made_plan   No. Observations:                  472
Model:                          Logit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                 0.04845
Time:                        18:03:35   Log-Likelihood:                -235.64
converged:                       True   LL-Null:                       -247.63
Covariance Type:            nonrobust   LLR p-value:                 9.666e-07
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.6062      0.139    -11.542      0.000      -1.879      -1.333
insured        1.2254      0.

In [18]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.200647,0.15275,0.263564
1,insured,3.405645,2.103615,5.513566


In [19]:
marg_eff

0,1
Dep. Variable:,made_plan
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.1973,0.036,5.416,0.0,0.126,0.269


In [20]:
odds, marg_eff = model('practiced_drills ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.247886
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:       practiced_drills   No. Observations:                  472
Model:                          Logit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1147
Time:                        18:03:35   Log-Likelihood:                -117.00
converged:                       True   LL-Null:                       -132.16
Covariance Type:            nonrobust   LLR p-value:                 3.655e-08
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.1669      0.264    -12.015      0.000      -3.683      -2.650
insured        1.9457      0.

In [21]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.042135,0.025135,0.070632
1,insured,6.998291,3.492336,14.023873


In [22]:
marg_eff

0,1
Dep. Variable:,practiced_drills
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.1326,0.026,5.123,0.0,0.082,0.183


In [23]:
odds, marg_eff = model('alerts ~ insured', data = df)

Optimization terminated successfully.
         Current function value: 0.425405
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                 alerts   No. Observations:                  472
Model:                          Logit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1213
Time:                        18:03:35   Log-Likelihood:                -200.79
converged:                       True   LL-Null:                       -228.51
Covariance Type:            nonrobust   LLR p-value:                 9.664e-14
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.0584      0.164    -12.562      0.000      -2.380      -1.737
insured        1.9196      0.

In [24]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.12766,0.092593,0.176006
1,insured,6.817901,4.110631,11.308184


In [25]:
marg_eff

0,1
Dep. Variable:,alerts
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.2537,0.028,9.098,0.0,0.199,0.308


In [26]:
odds, marg_eff = model('family_communication ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.287127
         Iterations 7
                            Logit Regression Results                            
Dep. Variable:     family_communication   No. Observations:                  472
Model:                            Logit   Df Residuals:                      470
Method:                             MLE   Df Model:                            1
Date:                  Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1267
Time:                          18:03:35   Log-Likelihood:                -135.52
converged:                         True   LL-Null:                       -155.19
Covariance Type:              nonrobust   LLR p-value:                 3.578e-10
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.9192      0.236    -12.394      0.000      -3.381      -2.458
insured      

In [27]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.053977,0.03402,0.085643
1,insured,7.461988,3.967858,14.033081


In [28]:
marg_eff

0,1
Dep. Variable:,family_communication
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.1648,0.026,6.229,0.0,0.113,0.217


## Let's repeat with probit

In [29]:
probit('made_safer ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.435705
         Iterations 5
                          Probit Regression Results                           
Dep. Variable:             made_safer   No. Observations:                  472
Model:                         Probit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                 0.08826
Time:                        18:03:52   Log-Likelihood:                -205.65
converged:                       True   LL-Null:                       -225.56
Covariance Type:            nonrobust   LLR p-value:                 2.789e-10
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.1685      0.084    -13.896      0.000      -1.333      -1.004
insured        0.9560      0.

0,1
Dep. Variable:,made_safer
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.2312,0.033,7.007,0.0,0.167,0.296


In [30]:
probit('learned_routes ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.452746
         Iterations 5
                          Probit Regression Results                           
Dep. Variable:         learned_routes   No. Observations:                  472
Model:                         Probit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1541
Time:                        18:04:14   Log-Likelihood:                -213.70
converged:                       True   LL-Null:                       -252.64
Covariance Type:            nonrobust   LLR p-value:                 1.092e-18
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.1166      0.082    -13.586      0.000      -1.278      -0.956
insured        1.3039      0.

0,1
Dep. Variable:,learned_routes
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.3286,0.029,11.305,0.0,0.272,0.386


In [31]:
probit('supplies ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.488242
         Iterations 5
                          Probit Regression Results                           
Dep. Variable:               supplies   No. Observations:                  472
Model:                         Probit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                 0.07414
Time:                        18:04:19   Log-Likelihood:                -230.45
converged:                       True   LL-Null:                       -248.90
Covariance Type:            nonrobust   LLR p-value:                 1.240e-09
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.0096      0.079    -12.832      0.000      -1.164      -0.855
insured        0.8977      0.

0,1
Dep. Variable:,supplies
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.2453,0.036,6.789,0.0,0.174,0.316


In [32]:
probit('involved ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.269919
         Iterations 6
                          Probit Regression Results                           
Dep. Variable:               involved   No. Observations:                  472
Model:                         Probit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1008
Time:                        18:04:24   Log-Likelihood:                -127.40
converged:                       True   LL-Null:                       -141.68
Covariance Type:            nonrobust   LLR p-value:                 9.061e-08
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.6594      0.111    -14.978      0.000      -1.877      -1.442
insured        0.9454      0.

0,1
Dep. Variable:,involved
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.1374,0.026,5.258,0.0,0.086,0.189


In [33]:
probit('made_plan ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.499229
         Iterations 5
                          Probit Regression Results                           
Dep. Variable:              made_plan   No. Observations:                  472
Model:                         Probit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                 0.04845
Time:                        18:04:30   Log-Likelihood:                -235.64
converged:                       True   LL-Null:                       -247.63
Covariance Type:            nonrobust   LLR p-value:                 9.666e-07
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.9656      0.077    -12.478      0.000      -1.117      -0.814
insured        0.7276      0.

0,1
Dep. Variable:,made_plan
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.2035,0.039,5.267,0.0,0.128,0.279


In [34]:
probit('practiced_drills ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.247886
         Iterations 6
                          Probit Regression Results                           
Dep. Variable:       practiced_drills   No. Observations:                  472
Model:                         Probit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1147
Time:                        18:04:35   Log-Likelihood:                -117.00
converged:                       True   LL-Null:                       -132.16
Covariance Type:            nonrobust   LLR p-value:                 3.655e-08
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.7457      0.118    -14.839      0.000      -1.976      -1.515
insured        0.9993      0.

0,1
Dep. Variable:,practiced_drills
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.1329,0.025,5.296,0.0,0.084,0.182


In [35]:
probit('alerts ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.425405
         Iterations 5
                          Probit Regression Results                           
Dep. Variable:                 alerts   No. Observations:                  472
Model:                         Probit   Df Residuals:                      470
Method:                           MLE   Df Model:                            1
Date:                Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1213
Time:                        18:04:42   Log-Likelihood:                -200.79
converged:                       True   LL-Null:                       -228.51
Covariance Type:            nonrobust   LLR p-value:                 9.664e-14
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.2096      0.086    -14.115      0.000      -1.378      -1.042
insured        1.1227      0.

0,1
Dep. Variable:,alerts
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.2649,0.031,8.651,0.0,0.205,0.325


In [36]:
probit('family_communication ~ insured', data=df)

Optimization terminated successfully.
         Current function value: 0.287127
         Iterations 6
                           Probit Regression Results                            
Dep. Variable:     family_communication   No. Observations:                  472
Model:                           Probit   Df Residuals:                      470
Method:                             MLE   Df Model:                            1
Date:                  Tue, 08 Apr 2025   Pseudo R-squ.:                  0.1267
Time:                          18:04:48   Log-Likelihood:                -135.52
converged:                         True   LL-Null:                       -155.19
Covariance Type:              nonrobust   LLR p-value:                 3.578e-10
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.6332      0.109    -15.002      0.000      -1.847      -1.420
insured      

0,1
Dep. Variable:,family_communication
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
insured,0.1666,0.026,6.318,0.0,0.115,0.218
