## Hypothesis: Perceived efficacy has a positive effect on preparedness

In [1]:
# analytics
import pandas as pd 
import numpy as np
import scipy.stats as stats
import statsmodels.formula.api as smf
#spatial 
import osmnx as ox
import geopandas as gpd
import contextily as cx
# plotting 
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
#settings
import warnings

In [2]:
# import data
path = '/Users/philip/Documents/ESE/ESE_thesis/flood_experience/data/export/clean_k.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,id,state,zipcode,geographic_division,census_region,county,awareness,perception,experience,floodzone,...,sex,education,race,homeownership,income,rentmortgage,rurality,hazard_weight,geometry,zip_count
0,1,Washington,98033,Pacific,West,King,0.0,1.0,0.0,0.0,...,0.0,4,White,1,87500,1250.0,1,0.255097,POLYGON ((-13608075.601186747 6055357.24645823...,1
1,2,Florida,33950,South Atlantic,South,Charlotte,1.0,1.0,1.0,1.0,...,1.0,5,White,1,175000,3000.0,1,2.836145,POLYGON ((-9139287.781401768 3112400.961508024...,1
2,3,New Jersey,7031,Middle Atlantic,Northeast,Bergen,0.0,1.0,1.0,0.0,...,1.0,1,White,0,47500,1250.0,1,1.165971,POLYGON ((-8254065.171859486 4980899.960122439...,1
3,5,Pennsylvania,19148,Middle Atlantic,Northeast,Philadelphia,0.0,1.0,1.0,0.0,...,0.0,4,White,1,125000,1250.0,1,0.62489,POLYGON ((-8368433.592186097 4851218.465580212...,1
4,8,New York,11691,Middle Atlantic,Northeast,Queens,1.0,1.0,1.0,1.0,...,0.0,3,Black or African American,1,62500,1750.0,1,0.853837,POLYGON ((-8214183.071851451 4953007.332301349...,2


In [3]:
df.columns

Index(['id', 'state', 'zipcode', 'geographic_division', 'census_region',
       'county', 'awareness', 'perception', 'experience', 'floodzone',
       'efficacy', 'supplies', 'insured', 'involved', 'learned_routes',
       'made_plan', 'made_safer', 'planned_neighbors', 'practiced_drills',
       'documents', 'rainy_day', 'alerts', 'family_communication', 'none',
       'dont_know', 'age', 'sex', 'education', 'race', 'homeownership',
       'income', 'rentmortgage', 'rurality', 'hazard_weight', 'geometry',
       'zip_count'],
      dtype='object')

#### We will go through these outcome variables one by one

structural adaptation: 
- made_safer

non-structural adaptation: 
- insurance
- learned_rountes
- supplies
- involved
- made_plan
- practiced_drills
- alerts
- family_communication


In [4]:
# define the regression model 
def model (function, data):
    model = smf.logit(function, data=data).fit()
    print(model.summary()) # print model summary
    print('BIC:', model.bic) # print BIC additionally to defaults model fit parameters
    # print odds ratios for output = 0 and output = 1
    print('likelihood of adaptation when there is low perceived efficacy = ', 1 / (1 + np.exp(-model.params['Intercept'])))
    print('likelihood of adaptation when there is high perceived effifacy = ', 1 / (1 + np.exp(-(model.params['Intercept']+model.params['efficacy']))))

    # compute odds ratio
    params = model.params
    conf = model.conf_int()
    odds_ratios = pd.DataFrame({
        "OR": np.exp(params),
        "Lower CI": np.exp(conf[0]),
        "Upper CI": np.exp(conf[1])
        })
    odds_ratios.index.name = 'Variable'
    odds_ratios.reset_index(inplace=True)
    # compute marginal effects
    marginal_effects = model.get_margeff().summary()

    return odds_ratios, marginal_effects 
    

In [5]:
odds, marg_eff = model('made_safer ~ efficacy', data=df)

Optimization terminated successfully.
         Current function value: 0.467192
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:             made_safer   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.04540
Time:                        17:20:36   Log-Likelihood:                -179.87
converged:                       True   LL-Null:                       -188.42
Covariance Type:            nonrobust   LLR p-value:                 3.533e-05
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.9363      0.195     -9.915      0.000      -2.319      -1.554
efficacy       1.0858      0.

In [6]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.144231,0.09836,0.211493
1,efficacy,2.961812,1.759643,4.985292


In [7]:
marg_eff

0,1
Dep. Variable:,made_safer
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
efficacy,0.1609,0.038,4.275,0.0,0.087,0.235


In [8]:
odds, marg_eff = model('insured ~ efficacy', data=df)

Optimization terminated successfully.
         Current function value: 0.485370
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                insured   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                  0.1173
Time:                        17:20:36   Log-Likelihood:                -186.87
converged:                       True   LL-Null:                       -211.70
Covariance Type:            nonrobust   LLR p-value:                 1.817e-12
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.0149      0.201    -10.015      0.000      -2.409      -1.621
efficacy       1.7549      0.

In [9]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.133333,0.089885,0.197783
1,efficacy,5.783133,3.466995,9.646572


In [10]:
marg_eff

0,1
Dep. Variable:,insured
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
efficacy,0.2773,0.033,8.365,0.0,0.212,0.342


In [11]:
odds, marg_eff = model('learned_routes ~ efficacy', data=df)

Optimization terminated successfully.
         Current function value: 0.510286
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:         learned_routes   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.09598
Time:                        17:20:36   Log-Likelihood:                -196.46
converged:                       True   LL-Null:                       -217.32
Covariance Type:            nonrobust   LLR p-value:                 1.054e-10
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.8265      0.188     -9.738      0.000      -2.194      -1.459
efficacy       1.5665      0.

In [12]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.160976,0.111456,0.232497
1,efficacy,4.790069,2.930483,7.829688


In [13]:
marg_eff

0,1
Dep. Variable:,learned_routes
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
efficacy,0.2627,0.035,7.541,0.0,0.194,0.331


In [14]:
odds, marg_eff = model('supplies ~ efficacy', data=df)

Optimization terminated successfully.
         Current function value: 0.484659
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:               supplies   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                  0.1137
Time:                        17:20:36   Log-Likelihood:                -186.59
converged:                       True   LL-Null:                       -210.54
Covariance Type:            nonrobust   LLR p-value:                 4.511e-12
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.0149      0.201    -10.015      0.000      -2.409      -1.621
efficacy       1.7272      0.

In [15]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.133333,0.089885,0.197783
1,efficacy,5.625,3.370872,9.386482


In [16]:
marg_eff

0,1
Dep. Variable:,supplies
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
efficacy,0.2723,0.034,8.128,0.0,0.207,0.338


In [17]:
odds, marg_eff = model('involved ~ efficacy', data=df)

Optimization terminated successfully.
         Current function value: 0.297364
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:               involved   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.09320
Time:                        17:20:36   Log-Likelihood:                -114.48
converged:                       True   LL-Null:                       -126.25
Covariance Type:            nonrobust   LLR p-value:                 1.227e-06
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.1268      0.323     -9.678      0.000      -3.760      -2.494
efficacy       1.7234      0.

In [18]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.04386,0.023284,0.082619
1,efficacy,5.60339,2.640672,11.890149


In [19]:
marg_eff

0,1
Dep. Variable:,involved
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
efficacy,0.1471,0.034,4.304,0.0,0.08,0.214


In [21]:
odds, marg_eff = model('made_plan ~ efficacy', data=df)

Optimization terminated successfully.
         Current function value: 0.484659
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:              made_plan   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                  0.1137
Time:                        17:20:44   Log-Likelihood:                -186.59
converged:                       True   LL-Null:                       -210.54
Covariance Type:            nonrobust   LLR p-value:                 4.511e-12
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.0149      0.201    -10.015      0.000      -2.409      -1.621
efficacy       1.7272      0.

In [22]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.133333,0.089885,0.197783
1,efficacy,5.625,3.370872,9.386482


In [23]:
marg_eff

0,1
Dep. Variable:,made_plan
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
efficacy,0.2723,0.034,8.128,0.0,0.207,0.338


In [24]:
odds, marg_eff = model('practiced_drills ~ efficacy', data=df)

Optimization terminated successfully.
         Current function value: 0.265033
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:       practiced_drills   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.09393
Time:                        17:20:45   Log-Likelihood:                -102.04
converged:                       True   LL-Null:                       -112.62
Covariance Type:            nonrobust   LLR p-value:                 4.234e-06
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.3586      0.360     -9.339      0.000      -4.064      -2.654
efficacy       1.7735      0.

In [25]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.034783,0.017188,0.070388
1,efficacy,5.891393,2.579618,13.454904


In [26]:
marg_eff

0,1
Dep. Variable:,practiced_drills
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
efficacy,0.1312,0.033,3.92,0.0,0.066,0.197


In [27]:
odds, marg_eff = model('alerts ~ efficacy', data = df)

Optimization terminated successfully.
         Current function value: 0.470286
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                 alerts   No. Observations:                  385
Model:                          Logit   Df Residuals:                      383
Method:                           MLE   Df Model:                            1
Date:                Mon, 07 Apr 2025   Pseudo R-squ.:                 0.06018
Time:                        17:20:46   Log-Likelihood:                -181.06
converged:                       True   LL-Null:                       -192.65
Covariance Type:            nonrobust   LLR p-value:                 1.468e-06
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.9750      0.198     -9.967      0.000      -2.363      -1.587
efficacy       1.2511      0.

In [28]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.138756,0.094097,0.20461
1,efficacy,3.494253,2.07885,5.873344


In [29]:
marg_eff

0,1
Dep. Variable:,alerts
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
efficacy,0.1878,0.037,5.06,0.0,0.115,0.261


In [30]:
odds, marg_eff = model('family_communication ~ efficacy', data=df)

Optimization terminated successfully.
         Current function value: 0.320610
         Iterations 7
                            Logit Regression Results                            
Dep. Variable:     family_communication   No. Observations:                  385
Model:                            Logit   Df Residuals:                      383
Method:                             MLE   Df Model:                            1
Date:                  Mon, 07 Apr 2025   Pseudo R-squ.:                 0.06965
Time:                          17:20:46   Log-Likelihood:                -123.43
converged:                         True   LL-Null:                       -132.68
Covariance Type:              nonrobust   LLR p-value:                 1.716e-05
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.8512      0.285     -9.995      0.000      -3.410      -2.292
efficacy     

In [31]:
odds

Unnamed: 0,Variable,OR,Lower CI,Upper CI
0,Intercept,0.057778,0.033034,0.101057
1,efficacy,4.253585,2.131198,8.489586


In [32]:
marg_eff

0,1
Dep. Variable:,family_communication
Method:,dydx
At:,overall

Unnamed: 0,dy/dx,std err,z,P>|z|,[0.025,0.975]
efficacy,0.1338,0.033,3.994,0.0,0.068,0.199
