## Hypothesis: Flod risk awareness has a positive effect on preparedness

In [None]:
# analytics
import pandas as pd 
import numpy as np
import scipy.stats as stats
import statsmodels.formula.api as smf
#spatial 
import osmnx as ox
import geopandas as gpd
import contextily as cx
# plotting 
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
#settings
import warnings

In [2]:
# import data
path = '/Users/philip/Documents/ESE/ESE_thesis/flood_experience/data/export/clean_k.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,id,state,zipcode,geographic_division,census_region,county,awareness,perception,experience,floodzone,...,sex,education,race,homeownership,income,rentmortgage,rurality,hazard_weight,geometry,zip_count
0,1,Washington,98033,Pacific,West,King,0.0,1.0,0.0,0.0,...,0.0,4,White,1,87500,1250.0,1,0.255097,POLYGON ((-13608075.601186747 6055357.24645823...,1
1,2,Florida,33950,South Atlantic,South,Charlotte,1.0,1.0,1.0,1.0,...,1.0,5,White,1,175000,3000.0,1,2.836145,POLYGON ((-9139287.781401768 3112400.961508024...,1
2,3,New Jersey,7031,Middle Atlantic,Northeast,Bergen,0.0,1.0,1.0,0.0,...,1.0,1,White,0,47500,1250.0,1,1.165971,POLYGON ((-8254065.171859486 4980899.960122439...,1
3,5,Pennsylvania,19148,Middle Atlantic,Northeast,Philadelphia,0.0,1.0,1.0,0.0,...,0.0,4,White,1,125000,1250.0,1,0.62489,POLYGON ((-8368433.592186097 4851218.465580212...,1
4,8,New York,11691,Middle Atlantic,Northeast,Queens,1.0,1.0,1.0,1.0,...,0.0,3,Black or African American,1,62500,1750.0,1,0.853837,POLYGON ((-8214183.071851451 4953007.332301349...,2


In [3]:
df.columns

Index(['id', 'state', 'zipcode', 'geographic_division', 'census_region',
       'county', 'awareness', 'perception', 'experience', 'floodzone',
       'efficacy', 'supplies', 'insured', 'involved', 'learned_routes',
       'made_plan', 'made_safer', 'planned_neighbors', 'practiced_drills',
       'documents', 'rainy_day', 'alerts', 'family_communication', 'none',
       'dont_know', 'age', 'sex', 'education', 'race', 'homeownership',
       'income', 'rentmortgage', 'rurality', 'hazard_weight', 'geometry',
       'zip_count'],
      dtype='object')

#### We will go through these outcome variables one by one

structural adaptation: 
- made_safer

non-structural adaptation: 
- insurance
- learned_rountes
- supplies
- involved
- made_plan
- practiced_drills
- alerts
- family_communication


In [4]:
# define the regression model 
def model (function, data):
    model = smf.logit(function, data=data).fit()
    print(model.summary()) # print model summary
    print('BIC:', model.bic) # print BIC additionally to defaults model fit parameters
    # print odds ratios for output = 0 and output = 1
    print('likelihood of adaptation when there is low disaster awareness = ', 1 / (1 + np.exp(-model.params['Intercept'])))
    print('likelihood of adaptation when there is high disaster awareness = ', 1 / (1 + np.exp(-(model.params['Intercept']+model.params['awareness']))))

    # compute odds ratio
    params = model.params
    conf = model.conf_int()
    odds_ratios = pd.DataFrame({
        "OR": np.exp(params),
        "Lower CI": np.exp(conf[0]),
        "Upper CI": np.exp(conf[1])
        })
    odds_ratios.index.name = 'Variable'
    odds_ratios.reset_index(inplace=True)
    # compute marginal effects
    marginal_effects = model.get_margeff().summary()

    return odds_ratios, marginal_effects 
    

In [5]:
odds, marg_eff = model('made_safer ~ awareness', data=df)

Optimization terminated successfully.
         Current function value: 0.465974
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:             made_safer   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.04788
Time:                        17:22:44   Log-Likelihood:                -179.40
converged:                       True   LL-Null:                       -188.42
Covariance Type:            nonrobust   LLR p-value:                 2.158e-05
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.0851      0.226     -9.224      0.000      -2.528      -1.642
awareness      1.1385      0.

In [6]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.124294,0.079804,0.193586
1,awareness,3.122117,1.807299,5.393471


In [7]:
marg_eff

0,1
Dep. Variable:,made_safer
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
awareness,0.1686,0.04,4.245,0.0,0.091,0.247


In [8]:
odds, marg_eff = model('insured ~ awareness', data=df)

Optimization terminated successfully.
         Current function value: 0.514572
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                insured   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.06421
Time:                        17:22:44   Log-Likelihood:                -198.11
converged:                       True   LL-Null:                       -211.70
Covariance Type:            nonrobust   LLR p-value:                 1.848e-07
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.8952      0.210     -9.010      0.000      -2.307      -1.483
awareness      1.2974      0.

In [9]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.150289,0.099515,0.226969
1,awareness,3.659615,2.197433,6.09474


In [10]:
marg_eff

0,1
Dep. Variable:,insured
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
awareness,0.2197,0.04,5.473,0.0,0.141,0.298


In [11]:
odds, marg_eff = model('learned_routes ~ awareness', data=df)

Optimization terminated successfully.
         Current function value: 0.513889
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:         learned_routes   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.08960
Time:                        17:22:44   Log-Likelihood:                -197.85
converged:                       True   LL-Null:                       -217.32
Covariance Type:            nonrobust   LLR p-value:                 4.362e-10
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.9867      0.218     -9.127      0.000      -2.413      -1.560
awareness      1.5498      0.

In [12]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.137143,0.089514,0.210114
1,awareness,4.710546,2.805295,7.909772


In [13]:
marg_eff

0,1
Dep. Variable:,learned_routes
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
awareness,0.2635,0.039,6.787,0.0,0.187,0.34


In [14]:
odds, marg_eff = model('supplies ~ awareness', data=df)

Optimization terminated successfully.
         Current function value: 0.489824
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:               supplies   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                  0.1043
Time:                        17:22:44   Log-Likelihood:                -188.58
converged:                       True   LL-Null:                       -210.54
Covariance Type:            nonrobust   LLR p-value:                 3.436e-11
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.1917      0.236     -9.296      0.000      -2.654      -1.730
awareness      1.7094      0.

In [15]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.111732,0.070387,0.177363
1,awareness,5.525652,3.192256,9.564658


In [16]:
marg_eff

0,1
Dep. Variable:,supplies
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
awareness,0.2748,0.039,7.102,0.0,0.199,0.351


In [17]:
odds, marg_eff = model('involved ~ awareness', data=df)

Optimization terminated successfully.
         Current function value: 0.304811
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:               involved   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.07049
Time:                        17:22:44   Log-Likelihood:                -117.35
converged:                       True   LL-Null:                       -126.25
Covariance Type:            nonrobust   LLR p-value:                 2.455e-05
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.1728      0.361     -8.792      0.000      -3.880      -2.466
awareness      1.5634      0.

In [18]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.041885,0.020648,0.084965
1,awareness,4.775,2.133558,10.686669


In [19]:
marg_eff

0,1
Dep. Variable:,involved
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
awareness,0.1361,0.037,3.64,0.0,0.063,0.209


In [20]:
odds, marg_eff = model('made_plan ~ awareness', data=df)

Optimization terminated successfully.
         Current function value: 0.506015
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:              made_plan   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.07468
Time:                        17:22:44   Log-Likelihood:                -194.82
converged:                       True   LL-Null:                       -210.54
Covariance Type:            nonrobust   LLR p-value:                 2.052e-08
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.9867      0.218     -9.127      0.000      -2.413      -1.560
awareness      1.4123      0.

In [21]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.137143,0.089514,0.210114
1,awareness,4.105392,2.437862,6.913535


In [22]:
marg_eff

0,1
Dep. Variable:,made_plan
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
awareness,0.2347,0.04,5.91,0.0,0.157,0.312


In [23]:
odds, marg_eff = model('practiced_drills ~ awareness', data=df)

Optimization terminated successfully.
         Current function value: 0.274176
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:       practiced_drills   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.06267
Time:                        17:22:44   Log-Likelihood:                -105.56
converged:                       True   LL-Null:                       -112.62
Covariance Type:            nonrobust   LLR p-value:                 0.0001719
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.3116      0.385     -8.606      0.000      -4.066      -2.557
awareness      1.4945      0.

In [24]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.036458,0.01715,0.077506
1,awareness,4.457143,1.885058,10.538731


In [25]:
marg_eff

0,1
Dep. Variable:,practiced_drills
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
awareness,0.113,0.035,3.206,0.001,0.044,0.182


In [26]:
odds, marg_eff = model('alerts ~ awareness', data = df)

Optimization terminated successfully.
         Current function value: 0.462635
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                 alerts   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.07547
Time:                        17:22:44   Log-Likelihood:                -178.11
converged:                       True   LL-Null:                       -192.65
Covariance Type:            nonrobust   LLR p-value:                 6.941e-08
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.2485      0.241     -9.321      0.000      -2.721      -1.776
awareness      1.4569      0.

In [27]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.105556,0.065789,0.169359
1,awareness,4.292763,2.438666,7.556514


In [28]:
marg_eff

0,1
Dep. Variable:,alerts
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
awareness,0.2161,0.04,5.423,0.0,0.138,0.294


In [29]:
odds, marg_eff = model('family_communication ~ awareness', data=df)

Optimization terminated successfully.
         Current function value: 0.321052
         Iterations 7
                            Logit Regression Results                            
Dep. Variable:     family_communication   No. Observations:                  385
Model:                            Logit   Df Residuals:                      383
Method:                             MLE   Df Model:                            1
Date:                  Mon, 07 Apr 2025   Pseudo R-squ.:                 0.06836
Time:                          17:22:44   Log-Likelihood:                -123.61
converged:                         True   LL-Null:                       -132.68
Covariance Type:              nonrobust   LLR p-value:                 2.053e-05
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.0498      0.341     -8.940      0.000      -3.718      -2.381
awareness    

In [30]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.047368,0.024272,0.092441
1,awareness,4.553377,2.114231,9.806519


In [31]:
marg_eff

0,1
Dep. Variable:,family_communication
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
awareness,0.1407,0.038,3.744,0.0,0.067,0.214
