In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col


import constants as c
import helpers as h 
from logger import setup_logger 
log = setup_logger('added-population-coverage')
log.setLevel('INFO')
log.info("Modules loaded.")

[34m2025-02-05 16:43:24 - added-population-coverage - INFO - Modules loaded.[0m


In [22]:
analysis_df = pd.read_csv(c.CURRENT_DF)
analysis_df = h.add_helper_cols(analysis_df)

[34m2025-02-05 16:43:34 - analysis-helpers - INFO - Found 192 tracts with at least one FloodNet sensor.[0m
[34m2025-02-05 16:43:34 - analysis-helpers - INFO - Found 2171 311 requests.[0m
[34m2025-02-05 16:43:34 - analysis-helpers - INFO - Found 878 tracts with at least one 311 report.[0m
[34m2025-02-05 16:43:34 - analysis-helpers - INFO - Found 1001 tracts with no DEP flooding.[0m


# basic exploratory analysis

In [24]:
pd.set_option('display.max_columns', 500)
analysis_df['confirmed_flooded_image'] = analysis_df['at_least_one_positive_image_by_area'] == 1
analysis_df['p_y_percentile'] = analysis_df['p_y'].rank(pct=True) * 100
ESTIMATE_THRES = analysis_df[analysis_df['confirmed_flooded_image']]['p_y'].quantile(0.25)

analysis_df['above_thres'] = analysis_df['p_y'] > ESTIMATE_THRES
print(analysis_df[analysis_df['above_thres']].shape[0] / analysis_df.shape[0])

analysis_df['white_frac'] = analysis_df['nhl_white_alone'] / analysis_df['total_population'] 
analysis_df['black_frac'] = analysis_df['nhl_black_alone'] / analysis_df['total_population']
analysis_df['hispanic_frac'] = analysis_df['hispanic_alone'] / analysis_df['total_population']
analysis_df['asian_frac'] = analysis_df['nhl_asian_alone'] / analysis_df['total_population']


analysis_df.head()


0.11096774193548387


Unnamed: 0,BoroName,BoroCT2020,NTAName,CDTANAME,PUMA,empirical_estimate_ct,p_y,p_y_CI_lower,p_y_CI_upper,empirical_estimate_p_alop,at_least_one_positive_image_by_area,at_least_one_positive_image_by_area_CI_lower,at_least_one_positive_image_by_area_CI_upper,total_population,nhl_white_alone,nhl_black_alone,hispanic_alone,nhl_asian_alone,n_children,n_elderly,total_households,num_households_with_internet,num_households_with_smartphone,median_household_income,num_high_school_graduates,num_bachelors_degree,num_graduate_degree,num_limited_english_speaking_households,ft_elevation_min,ft_elevation_max,ft_elevation_mean,area,n_floodnet_sensors,dep_moderate_1_area,dep_moderate_1_frac,dep_moderate_2_area,dep_moderate_2_frac,GEOID,sewer_backup_311c,street_flooding_311c,catch_basin_clogged/flooding_311c,manhole_overflow_311c,highway_flooding_311c,any_sensors,n_311_reports,any_311_report,no_dep_flooding,confirmed_flooded_image,p_y_percentile,above_thres,white_frac,black_frac,hispanic_frac,asian_frac
0,Manhattan,1000100,The Battery-Governors Island-Ellis Island-Libe...,MN01 Financial District-Tribeca (CD 1 Equivalent),4121,,0.053544,1.680531e-309,1.0,,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,-666666666,0,0,0,0,0.0,19.0,8.157155,1842847.0,0.0,0.0,0.0,0.0,0.0,36061000100,0,0,0,0,0,False,0,False,True,False,97.935484,True,,,,
1,Manhattan,1000201,Chinatown-Two Bridges,MN03 Lower East Side-Chinatown (CD 3 Equivalent),4103,0.0,0.00084,4.894745e-06,0.005778,0.0,0.170082,0.001687,0.86457,2666,238,216,1747,400,1180,328,878,726,740,45582,324,120,130,155,5.0,37.0,20.221,972312.5,0.0,0.0,0.0,0.0,0.0,36061000201,0,0,0,0,0,False,0,False,True,False,30.752688,False,0.089272,0.08102,0.655289,0.150038
2,Manhattan,1000600,Chinatown-Two Bridges,MN03 Lower East Side-Chinatown (CD 3 Equivalent),4103,0.002203,0.009041,0.0008006037,0.030372,0.002203,1.0,1.0,1.0,10751,859,654,3301,5567,1292,3340,5191,3792,3773,25655,2074,1559,268,1777,0.0,41.0,17.414628,2582706.0,0.0,22123.775465,0.008566,28743.307693,0.011129,36061000600,0,1,0,0,0,False,1,True,False,True,91.010753,True,0.0799,0.060832,0.307041,0.517812
3,Manhattan,1001401,Lower East Side,MN03 Lower East Side-Chinatown (CD 3 Equivalent),4103,0.0,0.000225,2.432554e-06,0.001518,0.0,0.04973,0.00064,0.329357,3165,2224,85,314,251,393,1136,1648,1300,1287,89873,310,867,863,111,24.0,43.0,34.84834,1006117.0,0.0,0.0,0.0,0.0,0.0,36061001401,1,0,0,0,0,False,1,True,True,False,2.107527,False,0.702686,0.026856,0.09921,0.079305
4,Manhattan,1001402,Lower East Side,MN03 Lower East Side-Chinatown (CD 3 Equivalent),4103,0.0,0.000312,4.590895e-06,0.001913,0.0,0.149912,0.003259,0.743719,3286,881,250,860,1099,229,858,1733,1335,1452,46615,550,896,300,545,13.0,42.0,27.555402,1226207.0,0.0,3811.63265,0.003108,7439.195282,0.006067,36061001402,0,0,0,0,0,False,0,False,False,False,5.032258,False,0.268107,0.07608,0.261716,0.334449


In [25]:
print(analysis_df['total_population'].isna().sum())

0


In [26]:
analysis_df[['dep_moderate_1_area', 'dep_moderate_2_area']].describe()

Unnamed: 0,dep_moderate_1_area,dep_moderate_2_area
count,2325.0,2325.0
mean,34125.087468,51840.25
std,80742.531007,126533.3
min,0.0,0.0
25%,0.0,0.0
50%,5253.617806,7093.228
75%,33966.087931,46969.99
max,994791.061756,1731771.0


In [27]:
analysis_df['no_dep_flooding'] = (analysis_df['dep_moderate_1_area'] == 0) & (analysis_df['dep_moderate_2_area'] == 0)
print("Population in these locations: %2.3f" % analysis_df.loc[(analysis_df['above_thres'] == 1) & (analysis_df['no_dep_flooding'] == 0), 'total_population'].sum())

Population in these locations: 646613.000


# 311

### still, our model identifies lots of high-risk areas with no 311 reports!

In [28]:
print("Population in these locations: %2.3f" % analysis_df.loc[(analysis_df['above_thres'] == 1) & (analysis_df['any_311_report'] == 0), 'total_population'].sum())

Population in these locations: 291702.000


# flood sensors

In [30]:
print("Population in these locations: %2.3f" % analysis_df.loc[(analysis_df['above_thres']) & (analysis_df['any_sensors'] == 0), 'total_population'].sum())

Population in these locations: 670052.000


In [33]:
analysis_df['n_floodnet_sensors'].sum()

253.0

### Other stats 

In [32]:
# population in tracts with no other coverage except from model risk 
print("Population in these locations: %2.3f" % analysis_df.loc[(analysis_df['above_thres']) & (analysis_df['any_311_report'] == 0) & (analysis_df['any_sensors'] == 0) & (analysis_df['no_dep_flooding']), 'total_population'].sum())

Population in these locations: 75559.000
