In [81]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col


import constants as c
import helpers as h 
from logger import setup_logger 
log = setup_logger('added-population-coverage')
log.setLevel('INFO')
log.info("Modules loaded.")

[34m2025-03-11 12:41:54 - added-population-coverage - INFO - Modules loaded.[0m


In [82]:
analysis_df = pd.read_csv(c.CURRENT_DF)
analysis_df = h.add_covariate_cols(analysis_df)
analysis_df = h.add_demo_cols(analysis_df)
analysis_df = h.add_estimate_cols(analysis_df)


[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Found 192 tracts with at least one FloodNet sensor.[0m
[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Found 2171 311 requests.[0m
[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Found 878 tracts with at least one 311 report.[0m
[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Found 1001 tracts with no DEP flooding.[0m
[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Added fraction white (frac_white) column.[0m
[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Added fraction black (frac_black) column.[0m
[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Added fraction hispanic (frac_hispanic) column.[0m
[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Added fraction asian (frac_asian) column.[0m
[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Added fraction high school graduates (frac_hs) column.[0m
[34m2025-03-11 12:41:54 - analysis-helpers - INFO - Added fraction bachelors degree (frac_bac

In [83]:
EST_TO_USE = c.ESTIMATE_TO_USE
log.info(f"Using estimate: {EST_TO_USE}")

[34m2025-03-11 12:41:54 - added-population-coverage - INFO - Using estimate: confirmed_or_above_thres[0m


# basic exploratory analysis

In [84]:
pd.set_option('display.max_columns', 500)
analysis_df.head()


Unnamed: 0,BoroName,BoroCT2020,NTAName,CDTANAME,PUMA,empirical_estimate_ct,p_y,p_y_CI_lower,p_y_CI_upper,empirical_estimate_p_alop,at_least_one_positive_image_by_area,at_least_one_positive_image_by_area_CI_lower,at_least_one_positive_image_by_area_CI_upper,total_population,nhl_white_alone,nhl_black_alone,hispanic_alone,nhl_asian_alone,n_children,n_elderly,total_households,num_households_with_internet,num_households_with_smartphone,median_household_income,num_high_school_graduates,num_bachelors_degree,num_graduate_degree,num_limited_english_speaking_households,ft_elevation_min,ft_elevation_max,ft_elevation_mean,area,n_floodnet_sensors,dep_moderate_1_area,dep_moderate_1_frac,dep_moderate_2_area,dep_moderate_2_frac,GEOID,sewer_backup_311c,street_flooding_311c,catch_basin_clogged/flooding_311c,manhole_overflow_311c,highway_flooding_311c,any_sensors,n_311_reports,any_311_report,no_dep_flooding,frac_white,frac_black,frac_hispanic,frac_asian,frac_hs,frac_bachelors,frac_grad,frac_children,frac_elderly,frac_internet,frac_smartphone,frac_limited_english,confirmed_flooding,above_thres,confirmed_or_above_thres
0,Manhattan,1000100,The Battery-Governors Island-Ellis Island-Libe...,MN01 Financial District-Tribeca (CD 1 Equivalent),4121,,0.055761,1.02881e-313,1.0,,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,-666666666,0,0,0,0,0.0,19.0,8.157155,1842847.0,0.0,0.0,0.0,0.0,0.0,36061000100,0,0,0,0,0,False,0,False,True,,,,,,,,,,,,,False,True,True
1,Manhattan,1000201,Chinatown-Two Bridges,MN03 Lower East Side-Chinatown (CD 3 Equivalent),4103,0.0,0.000787,5.49466e-06,0.005446,0.0,0.163558,0.001894,0.848038,2666,238,216,1747,400,1180,328,878,726,740,45582,324,120,130,155,5.0,37.0,20.221,972312.5,0.0,0.0,0.0,0.0,0.0,36061000201,0,0,0,0,0,False,0,False,True,0.089272,0.08102,0.655289,0.150038,0.12153,0.045011,0.048762,0.442611,0.123031,0.826879,0.842825,0.176538,False,False,False
2,Manhattan,1000600,Chinatown-Two Bridges,MN03 Lower East Side-Chinatown (CD 3 Equivalent),4103,0.002203,0.008106,0.0005978109,0.026526,0.002203,1.0,1.0,1.0,10751,859,654,3301,5567,1292,3340,5191,3792,3773,25655,2074,1559,268,1777,0.0,41.0,17.414628,2582706.0,0.0,22123.775465,0.008566,28743.307693,0.011129,36061000600,0,1,0,0,0,False,1,True,False,0.0799,0.060832,0.307041,0.517812,0.192912,0.14501,0.024928,0.120175,0.310669,0.730495,0.726835,0.342323,True,True,True
3,Manhattan,1001401,Lower East Side,MN03 Lower East Side-Chinatown (CD 3 Equivalent),4103,0.0,0.000387,4.974611e-06,0.002433,0.0,0.078911,0.001307,0.473059,3165,2224,85,314,251,393,1136,1648,1300,1287,89873,310,867,863,111,24.0,43.0,34.84834,1006117.0,0.0,0.0,0.0,0.0,0.0,36061001401,1,0,0,0,0,False,1,True,True,0.702686,0.026856,0.09921,0.079305,0.097946,0.273934,0.27267,0.124171,0.358926,0.788835,0.780947,0.067354,False,False,False
4,Manhattan,1001402,Lower East Side,MN03 Lower East Side-Chinatown (CD 3 Equivalent),4103,0.0,0.000285,4.41793e-06,0.001847,0.0,0.142827,0.003136,0.731406,3286,881,250,860,1099,229,858,1733,1335,1452,46615,550,896,300,545,13.0,42.0,27.555402,1226207.0,0.0,3811.63265,0.003108,7439.195282,0.006067,36061001402,0,0,0,0,0,False,0,False,False,0.268107,0.07608,0.261716,0.334449,0.167377,0.272672,0.091296,0.06969,0.261108,0.77034,0.837853,0.314484,False,False,False


In [85]:
print(analysis_df['total_population'].isna().sum())

0


In [86]:
analysis_df[['dep_moderate_1_area', 'dep_moderate_2_area']].describe()

Unnamed: 0,dep_moderate_1_area,dep_moderate_2_area
count,2325.0,2325.0
mean,34125.087468,51840.25
std,80742.531007,126533.3
min,0.0,0.0
25%,0.0,0.0
50%,5253.617806,7093.228
75%,33966.087931,46969.99
max,994791.061756,1731771.0


In [87]:
analysis_df['no_dep_flooding'] = (analysis_df['dep_moderate_1_area'] == 0) & (analysis_df['dep_moderate_2_area'] == 0)
print("Population in these locations: %2.3f" % analysis_df.loc[(analysis_df[EST_TO_USE] == 1) & (analysis_df['no_dep_flooding'] == 1), 'total_population'].sum())

Population in these locations: 293095.000


# 311

### still, our model identifies lots of high-risk areas with no 311 reports!

In [88]:
print("Population in these locations: %2.3f" % analysis_df.loc[(analysis_df[EST_TO_USE] == 1) & (analysis_df['any_311_report'] == 0), 'total_population'].sum())

Population in these locations: 433079.000


# flood sensors

In [89]:
print("Population in these locations: %2.3f" % analysis_df.loc[(analysis_df[EST_TO_USE]) & (analysis_df['any_sensors'] == 0), 'total_population'].sum())

Population in these locations: 927908.000


In [90]:
analysis_df['n_floodnet_sensors'].sum()

253.0

### Other stats 

In [91]:
# population in tracts with no other coverage except from model risk 
print("Population in these locations: %2.3f" % analysis_df.loc[(analysis_df[EST_TO_USE]) & (analysis_df['any_311_report'] == 0) & (analysis_df['any_sensors'] == 0) & (analysis_df['no_dep_flooding']), 'total_population'].sum())

Population in these locations: 113738.000


In [92]:
# total population in tracts classified as high risk 
print("Population in these locations: %2.3f" % analysis_df.loc[(analysis_df[EST_TO_USE]), 'total_population'].sum())

Population in these locations: 1167910.000


In [93]:
EST_COLS = ['above_thres', 'confirmed_flooding', 'confirmed_or_above_thres']
# generate a table of population coverage (same as above) for each estimate

def get_pop_coverage(df, est_col):
    pop_in_risk = df.loc[(df[est_col]), 'total_population'].sum()
    pop_in_risk_no_dep = df.loc[(df[est_col]) & (df['no_dep_flooding'] == 1), 'total_population'].sum()
    pop_in_risk_no_311 = df.loc[(df[est_col]) & (df['any_311_report'] == 0), 'total_population'].sum()
    pop_in_risk_no_sensors = df.loc[(df[est_col]) & (df['any_sensors'] == 0), 'total_population'].sum()
    pop_in_risk_no_other = df.loc[(df[est_col]) & (df['any_311_report'] == 0) & (df['any_sensors'] == 0), 'total_population'].sum()
    pop_in_risk_no_other_no_dep = df.loc[(df[est_col]) & (df['any_311_report'] == 0) & (df['any_sensors'] == 0) & (df['no_dep_flooding']), 'total_population'].sum()
    return pop_in_risk, pop_in_risk_no_dep, pop_in_risk_no_311, pop_in_risk_no_sensors, pop_in_risk_no_other, pop_in_risk_no_other_no_dep

# informative col names 
cols = {
    'above_thres': 'Above Threshold',
    'confirmed_flooding': 'Confirmed Flooding',
    'confirmed_or_above_thres': 'Confirmed or Above Threshold'
}

rows = {
    'pop_in_risk': 'Population in Flooded Tracts',
    'pop_in_risk_no_dep': 'Population in Flooded Tracts with No DEP Flooding',
    'pop_in_risk_no_311': 'Population in Flooded Tracts with No 311 Reports',
    'pop_in_risk_no_sensors': 'Population in Flooded Tracts with No FloodNet Sensors',
    'pop_in_risk_no_other': 'Population in Flooded Tracts with No Other Coverage',
    'pop_in_risk_no_other_no_dep': 'Population in Flooded Tracts with No Other Coverage and No DEP Flooding'
}

# make a table 
pop_coverage = pd.DataFrame(index=rows.values(), columns=cols.values())
for est_col in EST_COLS:
    pop_coverage.loc[:, cols[est_col]] = get_pop_coverage(analysis_df, est_col)

# format nicely with commas 
pop_coverage = pop_coverage.applymap(lambda x: "{:,.0f}".format(x))
pop_coverage


  pop_coverage = pop_coverage.applymap(lambda x: "{:,.0f}".format(x))


Unnamed: 0,Above Threshold,Confirmed Flooding,Confirmed or Above Threshold
Population in Flooded Tracts,1003940,597223,1167910
Population in Flooded Tracts with No DEP Flooding,245379,105603,293095
Population in Flooded Tracts with No 311 Reports,324806,232209,433079
Population in Flooded Tracts with No FloodNet Sensors,784746,459167,927908
Population in Flooded Tracts with No Other Coverage,294308,214762,390355
Population in Flooded Tracts with No Other Coverage and No DEP Flooding,87180,45229,113738
