In [1]:
import geopandas as gpd
import pandas as pd
import os

DATA_RAW = os.path.join("..", "data", "raw")
DATA_PROCESSED = os.path.join("..", "data", "processed")

CRS_NYC = 2263

In [2]:
buildings = gpd.read_parquet(
    os.path.join(DATA_PROCESSED, "building_current_2263.parquet")
)

stormwater = gpd.read_parquet(
    os.path.join(DATA_PROCESSED, "stormwater_2263.parquet")
)

nta = gpd.read_file(
    f"zip://{os.path.join(DATA_RAW, 'nta', 'nyc_nta_2020.zip')}"
).to_crs(CRS_NYC)

nta_density = pd.read_parquet(
    os.path.join(DATA_PROCESSED, "nta_construction_density.parquet")
)

buildings.crs, stormwater.crs, nta.crs


(<Projected CRS: EPSG:2263>
 Name: NAD83 / New York Long Island (ftUS)
 Axis Info [cartesian]:
 - X[east]: Easting (US survey foot)
 - Y[north]: Northing (US survey foot)
 Area of Use:
 - name: United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk.
 - bounds: (-74.26, 40.47, -71.8, 41.3)
 Coordinate Operation:
 - name: SPCS83 New York Long Island zone (US survey foot)
 - method: Lambert Conic Conformal (2SP)
 Datum: North American Datum 1983
 - Ellipsoid: GRS 1980
 - Prime Meridian: Greenwich,
 <Projected CRS: EPSG:2263>
 Name: NAD83 / New York Long Island (ftUS)
 Axis Info [cartesian]:
 - X[east]: Easting (US survey foot)
 - Y[north]: Northing (US survey foot)
 Area of Use:
 - name: United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk.
 - bounds: (-74.26, 40.47, -71.8, 41.3)
 Coordinate Operation:
 - name: SPCS83 New York Long Island zone (US survey foot)
 - method: Lambert Conic Conform

In [3]:
stormwater_union = stormwater.dissolve(
    by="Flooding_Category", as_index=False
)

stormwater_flooded = stormwater_union.copy()

In [4]:
flood_geom = stormwater_flooded.geometry.union_all()

minx, miny, maxx, maxy = flood_geom.bounds
buildings_bb = buildings.cx[minx:maxx, miny:maxy]

print("Buildings total:", len(buildings))
print("Buildings in flood bbox:", len(buildings_bb))


buildings_bb = buildings_bb.copy()
buildings_bb.loc[:, "is_flooded"] = buildings_bb.geometry.intersects(flood_geom)



buildings["is_flooded"] = False
buildings.loc[buildings_bb.index, "is_flooded"] = buildings_bb["is_flooded"]

Buildings total: 1082999
Buildings in flood bbox: 1080650


In [5]:
buildings["is_flooded"].value_counts()

is_flooded
False    1061485
True       21514
Name: count, dtype: int64

In [6]:
buildings["is_flooded"].mean()

np.float64(0.019865207631770666)

In [7]:
buildings.head(10)

Unnamed: 0,name,bin,doitt_id,shape_area,base_bbl,objectid,constructi,feature_co,geom_sourc,ground_ele,height_roo,date_last_,time_last_,last_statu,mappluto_b,shape_leng,geometry,geom_area_sqft,is_flooded
0,,4451699.0,321944.0,177.746094,4075320028,507357.0,1950.0,2100.0,Other (Manual),93.0,27.0,2017-08-22,19:18:38.000,Constructed,4075327501,59.004939,"POLYGON ((1052359.953 214149.188, 1052400.668 ...",1096.759439,False
1,,4558952.0,255026.0,34.742188,4105630045,137879.0,1930.0,5110.0,Photogrammetric,72.0,13.06,2017-08-17,16:20:43.000,Constructed,4105630045,24.548387,"POLYGON ((1052764.922 201306.381, 1052748.182 ...",214.631781,False
2,,3176483.0,759005.0,180.890625,3066450044,982953.0,1915.0,2100.0,Photogrammetric,18.0,36.761589,2017-08-22,15:37:34.000,Constructed,3066450044,61.475641,"POLYGON ((988769.316 159090.345, 988746.041 15...",1121.202926,False
3,,3393369.0,949392.0,106.035156,3038010128,244121.0,1997.0,2100.0,Photogrammetric,32.0,21.95,2017-08-22,15:31:10.000,Constructed,3038010128,43.732572,"POLYGON ((1012469.24 181742.107, 1012446.631 1...",656.015069,False
4,,2019299.0,353927.0,217.175781,2033800084,229537.0,1910.0,2100.0,Photogrammetric,197.0,33.49,2017-08-22,18:57:18.000,Constructed,2033800084,63.26829,"POLYGON ((1020199.824 267310.273, 1020201.035 ...",1334.18395,False
5,,5052578.0,281098.0,240.582031,5036360058,246429.0,1960.0,2100.0,Photogrammetric,44.0,21.13,2017-08-22,18:56:39.000,Constructed,5036360058,69.030911,"POLYGON ((952462.054 148588.137, 952441.334 14...",1492.459367,False
6,,3083803.0,419689.0,98.660156,3037200028,808886.0,1901.0,2100.0,Photogrammetric,44.0,30.11,2017-08-22,15:51:01.000,Constructed,3037200028,43.248158,"POLYGON ((1013026.308 184325.289, 1012989.42 1...",610.216125,False
7,,3100941.0,533049.0,588.09375,3046410004,370632.0,1931.0,2100.0,Photogrammetric,34.0,14.31,2017-08-22,15:46:44.000,Constructed,3046410004,107.406265,"POLYGON ((1004596.249 178502.966, 1004590.431 ...",3639.355961,False
8,,2117607.0,458160.0,102.941406,2027640025,80512.0,,2100.0,Other (Manual),25.0,11.0,2017-08-10,15:05:25.000,Constructed,2027640025,47.428753,"POLYGON ((1016582.739 236282.258, 1016574.046 ...",634.043292,False
9,,5007740.0,649423.0,232.261719,5002870016,232182.0,1955.0,2100.0,Photogrammetric,220.0,17.96,2017-08-22,17:36:42.000,Constructed,5002870016,63.023228,"POLYGON ((954298.282 166992.865, 954260.635 16...",1438.693395,False


In [8]:
buildings[buildings["is_flooded"]].head(10)

Unnamed: 0,name,bin,doitt_id,shape_area,base_bbl,objectid,constructi,feature_co,geom_sourc,ground_ele,height_roo,date_last_,time_last_,last_statu,mappluto_b,shape_leng,geometry,geom_area_sqft,is_flooded
31,,5053642.0,96057.0,238.015625,5037000017,613411.0,1935.0,2100.0,Photogrammetric,9.0,20.88,2017-08-22,17:30:13.000,Constructed,5037000017,63.75568,"POLYGON ((955341.932 148874.892, 955363.559 14...",1476.515164,True
37,,1091774.0,1300615.0,433.242188,1003930059,1087553.0,2023.0,2100.0,Other (Manual),12.0,75.0,2023-09-14,18:08:49.000,Constructed,1003930059,83.40061,"POLYGON ((990006.187 204026.649, 989963.428 20...",2675.456081,True
54,,4006916.0,170637.0,1345.359375,4005760036,844649.0,1963.0,2100.0,Photogrammetric,31.0,63.01,2017-08-22,17:40:43.000,Constructed,4005760036,172.632505,"POLYGON ((1005361.62 219044.252, 1005348.138 2...",8298.019779,True
73,,3173017.0,460535.0,323.882812,3065700074,738209.0,1930.0,2100.0,Photogrammetric,31.0,32.08,2017-08-22,15:29:18.000,Constructed,3065700074,91.249948,"POLYGON ((993376.142 163764.302, 993303.032 16...",2006.774427,True
206,,4019979.0,764256.0,128.875,4008880014,1006242.0,1901.0,2100.0,Photogrammetric,26.0,12.17,2017-08-22,19:24:03.000,Constructed,4008880014,50.737649,"POLYGON ((1004791.433 221618.283, 1004764.732 ...",794.736908,True
277,,5095461.0,310122.0,99.136719,5036730014,767563.0,1989.0,2100.0,Photogrammetric,4.0,29.21,2017-08-22,18:13:55.000,Constructed,5036730014,39.949946,"POLYGON ((956856.98 150323.871, 956875.16 1503...",614.878858,True
341,,4286890.0,527208.0,245.324219,4134850036,419769.0,1955.0,2100.0,Photogrammetric,13.0,17.507202,2017-08-22,17:34:14.000,Constructed,4134850036,70.854284,"POLYGON ((1053227.737 180422.348, 1053221.666 ...",1517.957736,True
353,,4438762.0,312200.0,1186.410156,4101760012,573208.0,1961.0,2100.0,Photogrammetric,29.0,19.64,2017-08-22,18:12:01.000,Constructed,4101760012,146.1467,"POLYGON ((1043407.557 193599.092, 1043323.715 ...",7332.990651,True
434,,5060553.0,335781.0,337.9375,5044350047,901665.0,1970.0,2100.0,Photogrammetric,38.0,21.7,2017-08-22,18:42:44.000,Constructed,5044350047,77.112411,"POLYGON ((945075.275 146869.948, 945092.881 14...",2096.721109,True
444,,3327258.0,487208.0,118.261719,3027340101,36970.0,1928.0,2100.0,Photogrammetric,11.0,33.58,2024-03-21,14:43:30.000,Constructed,3027340101,43.825604,"POLYGON ((999162.887 200881.604, 999132.516 20...",730.491551,True


In [9]:
area_col = "geom_area_sqft"


buildings = buildings.copy()

# flooded area by sq ft (if flooded, if not then 0)
buildings["flooded_footprint_sqft"] = buildings[area_col].where(buildings["is_flooded"], 0.0)

buildings[[area_col, "is_flooded", "flooded_footprint_sqft"]].head(10)


Unnamed: 0,geom_area_sqft,is_flooded,flooded_footprint_sqft
0,1096.759439,False,0.0
1,214.631781,False,0.0
2,1121.202926,False,0.0
3,656.015069,False,0.0
4,1334.18395,False,0.0
5,1492.459367,False,0.0
6,610.216125,False,0.0
7,3639.355961,False,0.0
8,634.043292,False,0.0
9,1438.693395,False,0.0


In [10]:
nta_density.head(10)

Unnamed: 0,NTACode,NTAName,boroname,geometry,nta_area_sqft,nta_area_sqmi,building_count,total_footprint_sqft,median_building_sqft,built_area_ratio,buildings_per_sqmi,built_sqft_per_sqmi
0,BK0101,Greenpoint,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\xdb\x01...,35321740.0,1.266993,5147.0,13400800.0,1248.021588,0.379392,4062.373817,10576850.0
1,BK0102,Williamsburg,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x16\x01...,28852800.0,1.034952,3646.0,10377330.0,1334.824761,0.359665,3522.869432,10026870.0
2,BK0103,South Williamsburg,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00f\x00\x0...,15208960.0,0.545546,2294.0,5418461.0,1408.694795,0.356268,4204.958583,9932172.0
3,BK0104,East Williamsburg,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\xa8\x02...,52267470.0,1.874838,4939.0,17450020.0,1354.539352,0.33386,2634.361536,9307485.0
4,BK0201,Brooklyn Heights,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00~\x00\x0...,9982088.0,0.358058,1492.0,3913004.0,1358.771123,0.392003,4166.92114,10928400.0
5,BK0202,Downtown Brooklyn-DUMBO-Boerum Hill,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00*\x01\x0...,23731480.0,0.85125,2000.0,8258165.0,1152.637541,0.347984,2349.487226,9701226.0
6,BK0203,Fort Greene,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x90\x00...,17533690.0,0.628935,2130.0,5244622.0,994.499027,0.299117,3386.680069,8338900.0
7,BK0204,Clinton Hill,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00T\x00\x0...,14566610.0,0.522505,2403.0,4768999.0,947.440147,0.327392,4598.996563,9127179.0
8,BK0261,Brooklyn Navy Yard,Brooklyn,b'\x01\x06\x00\x00\x00\x02\x00\x00\x00\x01\x03...,10106870.0,0.362534,133.0,2373427.0,3920.259287,0.234833,366.861926,6546766.0
9,BK0301,Bedford-Stuyvesant (West),Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00r\x00\x0...,36906660.0,1.323844,7547.0,13014540.0,969.488934,0.352634,5700.82188,9830872.0


In [11]:
buildings.head(10)

Unnamed: 0,name,bin,doitt_id,shape_area,base_bbl,objectid,constructi,feature_co,geom_sourc,ground_ele,height_roo,date_last_,time_last_,last_statu,mappluto_b,shape_leng,geometry,geom_area_sqft,is_flooded,flooded_footprint_sqft
0,,4451699.0,321944.0,177.746094,4075320028,507357.0,1950.0,2100.0,Other (Manual),93.0,27.0,2017-08-22,19:18:38.000,Constructed,4075327501,59.004939,"POLYGON ((1052359.953 214149.188, 1052400.668 ...",1096.759439,False,0.0
1,,4558952.0,255026.0,34.742188,4105630045,137879.0,1930.0,5110.0,Photogrammetric,72.0,13.06,2017-08-17,16:20:43.000,Constructed,4105630045,24.548387,"POLYGON ((1052764.922 201306.381, 1052748.182 ...",214.631781,False,0.0
2,,3176483.0,759005.0,180.890625,3066450044,982953.0,1915.0,2100.0,Photogrammetric,18.0,36.761589,2017-08-22,15:37:34.000,Constructed,3066450044,61.475641,"POLYGON ((988769.316 159090.345, 988746.041 15...",1121.202926,False,0.0
3,,3393369.0,949392.0,106.035156,3038010128,244121.0,1997.0,2100.0,Photogrammetric,32.0,21.95,2017-08-22,15:31:10.000,Constructed,3038010128,43.732572,"POLYGON ((1012469.24 181742.107, 1012446.631 1...",656.015069,False,0.0
4,,2019299.0,353927.0,217.175781,2033800084,229537.0,1910.0,2100.0,Photogrammetric,197.0,33.49,2017-08-22,18:57:18.000,Constructed,2033800084,63.26829,"POLYGON ((1020199.824 267310.273, 1020201.035 ...",1334.18395,False,0.0
5,,5052578.0,281098.0,240.582031,5036360058,246429.0,1960.0,2100.0,Photogrammetric,44.0,21.13,2017-08-22,18:56:39.000,Constructed,5036360058,69.030911,"POLYGON ((952462.054 148588.137, 952441.334 14...",1492.459367,False,0.0
6,,3083803.0,419689.0,98.660156,3037200028,808886.0,1901.0,2100.0,Photogrammetric,44.0,30.11,2017-08-22,15:51:01.000,Constructed,3037200028,43.248158,"POLYGON ((1013026.308 184325.289, 1012989.42 1...",610.216125,False,0.0
7,,3100941.0,533049.0,588.09375,3046410004,370632.0,1931.0,2100.0,Photogrammetric,34.0,14.31,2017-08-22,15:46:44.000,Constructed,3046410004,107.406265,"POLYGON ((1004596.249 178502.966, 1004590.431 ...",3639.355961,False,0.0
8,,2117607.0,458160.0,102.941406,2027640025,80512.0,,2100.0,Other (Manual),25.0,11.0,2017-08-10,15:05:25.000,Constructed,2027640025,47.428753,"POLYGON ((1016582.739 236282.258, 1016574.046 ...",634.043292,False,0.0
9,,5007740.0,649423.0,232.261719,5002870016,232182.0,1955.0,2100.0,Photogrammetric,220.0,17.96,2017-08-22,17:36:42.000,Constructed,5002870016,63.023228,"POLYGON ((954298.282 166992.865, 954260.635 16...",1438.693395,False,0.0


In [12]:
nta = nta.rename(columns={"nta2020": "NTACode", "ntaname": "NTAName"})
nta = nta[["NTACode", "NTAName", "boroname", "geometry"]]

# Create centroids
bld = buildings.copy()
bld["centroid"] = bld.geometry.centroid
bld_pts = bld.set_geometry("centroid")

# join centroid point in polygons
bld_pts_nta = gpd.sjoin(
    bld_pts,
    nta,
    how="left",
    predicate="within"
).drop(columns=["index_right"])

print("Building→NTA match rate:", bld_pts_nta["NTACode"].notna().mean())
bld_pts_nta.head(10)

Building→NTA match rate: 0.9999187441539651


Unnamed: 0,name,bin,doitt_id,shape_area,base_bbl,objectid,constructi,feature_co,geom_sourc,ground_ele,...,mappluto_b,shape_leng,geometry,geom_area_sqft,is_flooded,flooded_footprint_sqft,centroid,NTACode,NTAName,boroname
0,,4451699.0,321944.0,177.746094,4075320028,507357.0,1950.0,2100.0,Other (Manual),93.0,...,4075327501,59.004939,"POLYGON ((1052359.953 214149.188, 1052400.668 ...",1096.759439,False,0.0,POINT (1052386.895 214157.488),QN1102,Bayside,Queens
1,,4558952.0,255026.0,34.742188,4105630045,137879.0,1930.0,5110.0,Photogrammetric,72.0,...,4105630045,24.548387,"POLYGON ((1052764.922 201306.381, 1052748.182 ...",214.631781,False,0.0,POINT (1052753.726 201306.083),QN1303,Queens Village,Queens
2,,3176483.0,759005.0,180.890625,3066450044,982953.0,1915.0,2100.0,Photogrammetric,18.0,...,3066450044,61.475641,"POLYGON ((988769.316 159090.345, 988746.041 15...",1121.202926,False,0.0,POINT (988739.603 159096.157),BK1103,Gravesend (West),Brooklyn
3,,3393369.0,949392.0,106.035156,3038010128,244121.0,1997.0,2100.0,Photogrammetric,32.0,...,3038010128,43.732572,"POLYGON ((1012469.24 181742.107, 1012446.631 1...",656.015069,False,0.0,POINT (1012449.66 181747.497),BK0503,East New York-New Lots,Brooklyn
4,,2019299.0,353927.0,217.175781,2033800084,229537.0,1910.0,2100.0,Photogrammetric,197.0,...,2033800084,63.26829,"POLYGON ((1020199.824 267310.273, 1020201.035 ...",1334.18395,False,0.0,POINT (1020192.208 267285.288),BX1203,Wakefield-Woodlawn,Bronx
5,,5052578.0,281098.0,240.582031,5036360058,246429.0,1960.0,2100.0,Photogrammetric,44.0,...,5036360058,69.030911,"POLYGON ((952462.054 148588.137, 952441.334 14...",1492.459367,False,0.0,POINT (952434.647 148591.889),SI0202,New Dorp-Midland Beach,Staten Island
6,,3083803.0,419689.0,98.660156,3037200028,808886.0,1901.0,2100.0,Photogrammetric,44.0,...,3037200028,43.248158,"POLYGON ((1013026.308 184325.289, 1012989.42 1...",610.216125,False,0.0,POINT (1013006.311 184329.664),BK0502,East New York (North),Brooklyn
7,,3100941.0,533049.0,588.09375,3046410004,370632.0,1931.0,2100.0,Photogrammetric,34.0,...,3046410004,107.406265,"POLYGON ((1004596.249 178502.966, 1004590.431 ...",3639.355961,False,0.0,POINT (1004613.443 178526.419),BK1703,East Flatbush-Rugby,Brooklyn
8,,2117607.0,458160.0,102.941406,2027640025,80512.0,,2100.0,Other (Manual),25.0,...,2027640025,47.428753,"POLYGON ((1016582.739 236282.258, 1016574.046 ...",634.043292,False,0.0,POINT (1016585.368 236305.505),BX0201,Hunts Point,Bronx
9,,5007740.0,649423.0,232.261719,5002870016,232182.0,1955.0,2100.0,Photogrammetric,220.0,...,5002870016,63.023228,"POLYGON ((954298.282 166992.865, 954260.635 16...",1438.693395,False,0.0,POINT (954283.055 167005.521),SI0104,West New Brighton-Silver Lake-Grymes Hill,Staten Island


In [13]:
import numpy as np

area_col = "geom_area_sqft"

nta_flood = (
    bld_pts_nta
    .groupby(["NTACode", "NTAName", "boroname"], dropna=False)
    .agg(
        building_count=("objectid", "size"),
        flooded_building_count=("is_flooded", "sum"),
        total_footprint_sqft=(area_col, "sum"),
        flooded_footprint_sqft=("flooded_footprint_sqft", "sum"),
    )
    .reset_index()
)

nta_flood["pct_buildings_flooded"] = np.where(
    nta_flood["building_count"] > 0,
    nta_flood["flooded_building_count"] / nta_flood["building_count"],
    np.nan
)

nta_flood["pct_footprint_flooded"] = np.where(
    nta_flood["total_footprint_sqft"] > 0,
    nta_flood["flooded_footprint_sqft"] / nta_flood["total_footprint_sqft"],
    np.nan
)

#15 most flooded
nta_flood.sort_values("pct_footprint_flooded", ascending=False).head(15)


Unnamed: 0,NTACode,NTAName,boroname,building_count,flooded_building_count,total_footprint_sqft,flooded_footprint_sqft,pct_buildings_flooded,pct_footprint_flooded
168,QN0271,Calvary & Mount Zion Cemeteries,Queens,21,10,270323.7,195960.3,0.47619,0.72491
161,QN0161,Sunnyside Yards (North),Queens,50,9,1356577.0,419818.6,0.18,0.309469
156,QN0102,Old Astoria-Hallets Point,Queens,1613,385,3268248.0,841294.2,0.238686,0.257414
257,SI9592,Miller Field,Staten Island,27,4,226616.1,57661.28,0.148148,0.254445
126,MN0401,Chelsea-Hudson Yards,Manhattan,2075,178,13187830.0,2619943.0,0.085783,0.198664
109,BX1161,Hutchinson Metro Center,Bronx,123,7,1833399.0,345546.5,0.056911,0.188473
130,MN0601,Stuyvesant Town-Peter Cooper Village,Manhattan,80,17,1137596.0,202324.7,0.2125,0.177853
2,BK0103,South Williamsburg,Brooklyn,2294,221,5418461.0,900796.0,0.096338,0.166246
224,QN1306,Springfield Gardens (South)-Brookville,Queens,5514,571,8330722.0,1122584.0,0.103555,0.134752
182,QN0601,Rego Park,Queens,3726,39,5563253.0,720711.5,0.010467,0.129549


In [14]:
nta_risk = nta_density.merge(
    nta_flood[["NTACode", "flooded_building_count", "flooded_footprint_sqft",
               "pct_buildings_flooded", "pct_footprint_flooded"]],
    on="NTACode",
    how="left"
)

nta_risk["risk_score"] = (
    nta_risk["built_area_ratio"].fillna(0) *
    nta_risk["pct_footprint_flooded"].fillna(0)
)

nta_risk.sort_values("risk_score", ascending=False).head(20)[
    ["NTACode", "NTAName", "boroname", "built_area_ratio", "pct_footprint_flooded", "risk_score"]
]


Unnamed: 0,NTACode,NTAName,boroname,built_area_ratio,pct_footprint_flooded,risk_score
162,QN0161,Sunnyside Yards (North),Queens,0.301443,0.309469,0.093287
127,MN0401,Chelsea-Hudson Yards,Manhattan,0.444458,0.198664,0.088298
157,QN0102,Old Astoria-Hallets Point,Queens,0.294674,0.257414,0.075853
2,BK0103,South Williamsburg,Brooklyn,0.356268,0.166246,0.059228
215,QN1201,Jamaica,Queens,0.3134,0.116817,0.03661
131,MN0601,Stuyvesant Town-Peter Cooper Village,Manhattan,0.205679,0.177853,0.036581
183,QN0601,Rego Park,Queens,0.275499,0.129549,0.035691
84,BX0502,Mount Hope,Bronx,0.351019,0.097271,0.034144
173,QN0401,Elmhurst,Queens,0.328939,0.098904,0.032533
123,MN0203,West Village,Manhattan,0.416286,0.073578,0.03063


In [15]:
nta_risk.head(10)

Unnamed: 0,NTACode,NTAName,boroname,geometry,nta_area_sqft,nta_area_sqmi,building_count,total_footprint_sqft,median_building_sqft,built_area_ratio,buildings_per_sqmi,built_sqft_per_sqmi,flooded_building_count,flooded_footprint_sqft,pct_buildings_flooded,pct_footprint_flooded,risk_score
0,BK0101,Greenpoint,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\xdb\x01...,35321740.0,1.266993,5147.0,13400800.0,1248.021588,0.379392,4062.373817,10576850.0,174.0,681959.369194,0.033813,0.050902,0.019312
1,BK0102,Williamsburg,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x16\x01...,28852800.0,1.034952,3646.0,10377330.0,1334.824761,0.359665,3522.869432,10026870.0,106.0,842069.864516,0.029073,0.081145,0.029185
2,BK0103,South Williamsburg,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00f\x00\x0...,15208960.0,0.545546,2294.0,5418461.0,1408.694795,0.356268,4204.958583,9932172.0,221.0,900796.021973,0.096338,0.166246,0.059228
3,BK0104,East Williamsburg,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\xa8\x02...,52267470.0,1.874838,4939.0,17450020.0,1354.539352,0.33386,2634.361536,9307485.0,91.0,736213.68624,0.018436,0.042365,0.014144
4,BK0201,Brooklyn Heights,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00~\x00\x0...,9982088.0,0.358058,1492.0,3913004.0,1358.771123,0.392003,4166.92114,10928400.0,0.0,0.0,0.0,0.0,0.0
5,BK0202,Downtown Brooklyn-DUMBO-Boerum Hill,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00*\x01\x0...,23731480.0,0.85125,2000.0,8258165.0,1152.637541,0.347984,2349.487226,9701226.0,7.0,107214.617391,0.003502,0.012984,0.004518
6,BK0203,Fort Greene,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x90\x00...,17533690.0,0.628935,2130.0,5244622.0,994.499027,0.299117,3386.680069,8338900.0,11.0,109686.611062,0.005164,0.020914,0.006256
7,BK0204,Clinton Hill,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00T\x00\x0...,14566610.0,0.522505,2403.0,4768999.0,947.440147,0.327392,4598.996563,9127179.0,4.0,7765.822371,0.001665,0.001628,0.000533
8,BK0261,Brooklyn Navy Yard,Brooklyn,b'\x01\x06\x00\x00\x00\x02\x00\x00\x00\x01\x03...,10106870.0,0.362534,133.0,2373427.0,3920.259287,0.234833,366.861926,6546766.0,0.0,0.0,0.0,0.0,0.0
9,BK0301,Bedford-Stuyvesant (West),Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00r\x00\x0...,36906660.0,1.323844,7547.0,13014540.0,969.488934,0.352634,5700.82188,9830872.0,243.0,825168.625932,0.032198,0.063404,0.022358


In [16]:
bld_pts_nta.to_parquet(
    os.path.join(DATA_PROCESSED, "building_current_2263_with_nta.parquet"),
    index=False
)

In [17]:


buildings.head(10)

Unnamed: 0,name,bin,doitt_id,shape_area,base_bbl,objectid,constructi,feature_co,geom_sourc,ground_ele,height_roo,date_last_,time_last_,last_statu,mappluto_b,shape_leng,geometry,geom_area_sqft,is_flooded,flooded_footprint_sqft
0,,4451699.0,321944.0,177.746094,4075320028,507357.0,1950.0,2100.0,Other (Manual),93.0,27.0,2017-08-22,19:18:38.000,Constructed,4075327501,59.004939,"POLYGON ((1052359.953 214149.188, 1052400.668 ...",1096.759439,False,0.0
1,,4558952.0,255026.0,34.742188,4105630045,137879.0,1930.0,5110.0,Photogrammetric,72.0,13.06,2017-08-17,16:20:43.000,Constructed,4105630045,24.548387,"POLYGON ((1052764.922 201306.381, 1052748.182 ...",214.631781,False,0.0
2,,3176483.0,759005.0,180.890625,3066450044,982953.0,1915.0,2100.0,Photogrammetric,18.0,36.761589,2017-08-22,15:37:34.000,Constructed,3066450044,61.475641,"POLYGON ((988769.316 159090.345, 988746.041 15...",1121.202926,False,0.0
3,,3393369.0,949392.0,106.035156,3038010128,244121.0,1997.0,2100.0,Photogrammetric,32.0,21.95,2017-08-22,15:31:10.000,Constructed,3038010128,43.732572,"POLYGON ((1012469.24 181742.107, 1012446.631 1...",656.015069,False,0.0
4,,2019299.0,353927.0,217.175781,2033800084,229537.0,1910.0,2100.0,Photogrammetric,197.0,33.49,2017-08-22,18:57:18.000,Constructed,2033800084,63.26829,"POLYGON ((1020199.824 267310.273, 1020201.035 ...",1334.18395,False,0.0
5,,5052578.0,281098.0,240.582031,5036360058,246429.0,1960.0,2100.0,Photogrammetric,44.0,21.13,2017-08-22,18:56:39.000,Constructed,5036360058,69.030911,"POLYGON ((952462.054 148588.137, 952441.334 14...",1492.459367,False,0.0
6,,3083803.0,419689.0,98.660156,3037200028,808886.0,1901.0,2100.0,Photogrammetric,44.0,30.11,2017-08-22,15:51:01.000,Constructed,3037200028,43.248158,"POLYGON ((1013026.308 184325.289, 1012989.42 1...",610.216125,False,0.0
7,,3100941.0,533049.0,588.09375,3046410004,370632.0,1931.0,2100.0,Photogrammetric,34.0,14.31,2017-08-22,15:46:44.000,Constructed,3046410004,107.406265,"POLYGON ((1004596.249 178502.966, 1004590.431 ...",3639.355961,False,0.0
8,,2117607.0,458160.0,102.941406,2027640025,80512.0,,2100.0,Other (Manual),25.0,11.0,2017-08-10,15:05:25.000,Constructed,2027640025,47.428753,"POLYGON ((1016582.739 236282.258, 1016574.046 ...",634.043292,False,0.0
9,,5007740.0,649423.0,232.261719,5002870016,232182.0,1955.0,2100.0,Photogrammetric,220.0,17.96,2017-08-22,17:36:42.000,Constructed,5002870016,63.023228,"POLYGON ((954298.282 166992.865, 954260.635 16...",1438.693395,False,0.0


In [18]:
import pandas as pd

# constructi is your "year built" field (float) — convert to nullable Int
bld_pts_nta = bld_pts_nta.copy()

bld_pts_nta["construction_year"] = (
    pd.to_numeric(bld_pts_nta["constructi"], errors="coerce")
    .round()
    .astype("Int64")
)

# define cohorts (change thresholds if you want)
bld_pts_nta["built_last_10yrs"] = bld_pts_nta["construction_year"] >= 2015
bld_pts_nta["built_last_20yrs"] = bld_pts_nta["construction_year"] >= 2005

# quick QA
bld_pts_nta[["constructi","construction_year","built_last_10yrs","built_last_20yrs"]].head(10)


Unnamed: 0,constructi,construction_year,built_last_10yrs,built_last_20yrs
0,1950.0,1950.0,False,False
1,1930.0,1930.0,False,False
2,1915.0,1915.0,False,False
3,1997.0,1997.0,False,False
4,1910.0,1910.0,False,False
5,1960.0,1960.0,False,False
6,1901.0,1901.0,False,False
7,1931.0,1931.0,False,False
8,,,,
9,1955.0,1955.0,False,False


In [19]:
bld_pts_nta["construction_year"].isna().mean()


np.float64(0.009344422294018738)

In [21]:
bld_pts_nta.head(10)

Unnamed: 0,name,bin,doitt_id,shape_area,base_bbl,objectid,constructi,feature_co,geom_sourc,ground_ele,...,geom_area_sqft,is_flooded,flooded_footprint_sqft,centroid,NTACode,NTAName,boroname,construction_year,built_last_10yrs,built_last_20yrs
0,,4451699.0,321944.0,177.746094,4075320028,507357.0,1950.0,2100.0,Other (Manual),93.0,...,1096.759439,False,0.0,POINT (1052386.895 214157.488),QN1102,Bayside,Queens,1950.0,False,False
1,,4558952.0,255026.0,34.742188,4105630045,137879.0,1930.0,5110.0,Photogrammetric,72.0,...,214.631781,False,0.0,POINT (1052753.726 201306.083),QN1303,Queens Village,Queens,1930.0,False,False
2,,3176483.0,759005.0,180.890625,3066450044,982953.0,1915.0,2100.0,Photogrammetric,18.0,...,1121.202926,False,0.0,POINT (988739.603 159096.157),BK1103,Gravesend (West),Brooklyn,1915.0,False,False
3,,3393369.0,949392.0,106.035156,3038010128,244121.0,1997.0,2100.0,Photogrammetric,32.0,...,656.015069,False,0.0,POINT (1012449.66 181747.497),BK0503,East New York-New Lots,Brooklyn,1997.0,False,False
4,,2019299.0,353927.0,217.175781,2033800084,229537.0,1910.0,2100.0,Photogrammetric,197.0,...,1334.18395,False,0.0,POINT (1020192.208 267285.288),BX1203,Wakefield-Woodlawn,Bronx,1910.0,False,False
5,,5052578.0,281098.0,240.582031,5036360058,246429.0,1960.0,2100.0,Photogrammetric,44.0,...,1492.459367,False,0.0,POINT (952434.647 148591.889),SI0202,New Dorp-Midland Beach,Staten Island,1960.0,False,False
6,,3083803.0,419689.0,98.660156,3037200028,808886.0,1901.0,2100.0,Photogrammetric,44.0,...,610.216125,False,0.0,POINT (1013006.311 184329.664),BK0502,East New York (North),Brooklyn,1901.0,False,False
7,,3100941.0,533049.0,588.09375,3046410004,370632.0,1931.0,2100.0,Photogrammetric,34.0,...,3639.355961,False,0.0,POINT (1004613.443 178526.419),BK1703,East Flatbush-Rugby,Brooklyn,1931.0,False,False
8,,2117607.0,458160.0,102.941406,2027640025,80512.0,,2100.0,Other (Manual),25.0,...,634.043292,False,0.0,POINT (1016585.368 236305.505),BX0201,Hunts Point,Bronx,,,
9,,5007740.0,649423.0,232.261719,5002870016,232182.0,1955.0,2100.0,Photogrammetric,220.0,...,1438.693395,False,0.0,POINT (954283.055 167005.521),SI0104,West New Brighton-Silver Lake-Grymes Hill,Staten Island,1955.0,False,False


In [22]:

nta_attrs = bld_pts_nta[[
    "objectid",
    "NTACode",
    "NTAName",
    "boroname",
    "construction_year",
    "built_last_10yrs",
    "built_last_20yrs",
]].copy()

buildings_enriched = buildings.merge(
    nta_attrs,
    on="objectid",
    how="left",
    validate="1:1"
)



In [23]:
buildings_enriched.geometry.geom_type.value_counts()
buildings_enriched[["NTACode", "NTAName", "boroname"]].head()



Unnamed: 0,NTACode,NTAName,boroname
0,QN1102,Bayside,Queens
1,QN1303,Queens Village,Queens
2,BK1103,Gravesend (West),Brooklyn
3,BK0503,East New York-New Lots,Brooklyn
4,BX1203,Wakefield-Woodlawn,Bronx


In [24]:
viz_buildings = buildings_enriched[[
    "objectid",
    "geometry",
    "NTACode",
    "NTAName",
    "boroname",
    "geom_area_sqft",
    "is_flooded",
    "flooded_footprint_sqft",
    "construction_year",
    "built_last_10yrs",
    "built_last_20yrs",
]].copy()

viz_buildings.to_parquet(
    os.path.join(DATA_PROCESSED, "buildings_viz.parquet"),
    index=False
)

print("Saved buildings_viz.parquet:", viz_buildings.shape)


Saved buildings_viz.parquet: (1082999, 11)


In [25]:
import numpy as np

b = buildings_enriched.copy()
area_col = "geom_area_sqft"

# totals
nta_flood = (
    b.groupby(["NTACode", "NTAName", "boroname"], dropna=False)
     .agg(
         building_count=("objectid", "size"),
         flooded_building_count=("is_flooded", "sum"),
         total_footprint_sqft=(area_col, "sum"),
         flooded_footprint_sqft=("flooded_footprint_sqft", "sum"),
     )
     .reset_index()
)

nta_flood["pct_buildings_flooded"] = np.where(
    nta_flood["building_count"] > 0,
    nta_flood["flooded_building_count"] / nta_flood["building_count"],
    np.nan
)

nta_flood["pct_footprint_flooded"] = np.where(
    nta_flood["total_footprint_sqft"] > 0,
    nta_flood["flooded_footprint_sqft"] / nta_flood["total_footprint_sqft"],
    np.nan
)

nta_flood.head()


Unnamed: 0,NTACode,NTAName,boroname,building_count,flooded_building_count,total_footprint_sqft,flooded_footprint_sqft,pct_buildings_flooded,pct_footprint_flooded
0,BK0101,Greenpoint,Brooklyn,5146,174,13397600.0,681959.369194,0.033813,0.050902
1,BK0102,Williamsburg,Brooklyn,3646,106,10377330.0,842069.864516,0.029073,0.081145
2,BK0103,South Williamsburg,Brooklyn,2294,221,5418461.0,900796.021973,0.096338,0.166246
3,BK0104,East Williamsburg,Brooklyn,4936,91,17377710.0,736213.68624,0.018436,0.042365
4,BK0201,Brooklyn Heights,Brooklyn,1490,0,3907187.0,0.0,0.0,0.0


In [26]:
def flood_metrics_for_subset(df, label):
    g = (df.groupby(["NTACode", "NTAName", "boroname"], dropna=False)
           .agg(
               bld_count=("objectid", "size"),
               bld_flooded=("is_flooded", "sum"),
               sqft_total=("geom_area_sqft", "sum"),
               sqft_flooded=("flooded_footprint_sqft", "sum"),
           )
           .reset_index())
    g[f"pct_bld_flooded_{label}"] = np.where(g["bld_count"]>0, g["bld_flooded"]/g["bld_count"], np.nan)
    g[f"pct_sqft_flooded_{label}"] = np.where(g["sqft_total"]>0, g["sqft_flooded"]/g["sqft_total"], np.nan)
    return g[["NTACode","NTAName","boroname", f"pct_bld_flooded_{label}", f"pct_sqft_flooded_{label}"]]

recent10 = flood_metrics_for_subset(b[b["built_last_10yrs"] == True], "last10")
recent20 = flood_metrics_for_subset(b[b["built_last_20yrs"] == True], "last20")


In [27]:
nta_viz = (
    nta_density
      .merge(
          nta_flood[[
              "NTACode","pct_buildings_flooded","pct_footprint_flooded",
              "flooded_building_count","flooded_footprint_sqft"
          ]],
          on="NTACode",
          how="left",
          validate="1:1"
      )
      .merge(recent10[["NTACode","pct_bld_flooded_last10","pct_sqft_flooded_last10"]],
             on="NTACode", how="left", validate="1:1")
      .merge(recent20[["NTACode","pct_bld_flooded_last20","pct_sqft_flooded_last20"]],
             on="NTACode", how="left", validate="1:1")
)

nta_viz["risk_score"] = nta_viz["built_area_ratio"] * nta_viz["pct_footprint_flooded"]

nta_viz.sort_values("risk_score", ascending=False).head(10)


Unnamed: 0,NTACode,NTAName,boroname,geometry,nta_area_sqft,nta_area_sqmi,building_count,total_footprint_sqft,median_building_sqft,built_area_ratio,...,built_sqft_per_sqmi,pct_buildings_flooded,pct_footprint_flooded,flooded_building_count,flooded_footprint_sqft,pct_bld_flooded_last10,pct_sqft_flooded_last10,pct_bld_flooded_last20,pct_sqft_flooded_last20,risk_score
162,QN0161,Sunnyside Yards (North),Queens,b'\x01\x03\x00\x00\x00\x01\x00\x00\x002\x00\x0...,4500282.0,0.161425,50.0,1356577.0,13113.628467,0.301443,...,8403740.0,0.18,0.309469,9.0,419818.6,0.0,0.0,0.0,0.0,0.093287
127,MN0401,Chelsea-Hudson Yards,Manhattan,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00t\x01\x0...,29671750.0,1.064328,2075.0,13187830.0,2007.850335,0.444458,...,12390760.0,0.085783,0.198664,178.0,2619943.0,0.24,0.278196,0.232143,0.291166,0.088298
157,QN0102,Old Astoria-Hallets Point,Queens,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00U\x00\x0...,11091070.0,0.397837,1613.0,3268248.0,1023.525919,0.294674,...,8215037.0,0.238686,0.257414,385.0,841294.2,0.28169,0.282242,0.29927,0.299446,0.075853
2,BK0103,South Williamsburg,Brooklyn,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00f\x00\x0...,15208960.0,0.545546,2294.0,5418461.0,1408.694795,0.356268,...,9932172.0,0.096338,0.166246,221.0,900796.0,0.380435,0.599015,0.377778,0.541016,0.059228
215,QN1201,Jamaica,Queens,b'\x01\x03\x00\x00\x00\x01\x00\x00\x007\x01\x0...,46828850.0,1.679754,7135.0,14676160.0,946.716756,0.3134,...,8737091.0,0.055501,0.116817,396.0,1714426.0,0.055556,0.0904,0.05163,0.12453,0.03661
131,MN0601,Stuyvesant Town-Peter Cooper Village,Manhattan,b'\x01\x06\x00\x00\x00\x02\x00\x00\x00\x01\x03...,5575562.0,0.199996,82.0,1146778.0,9269.694112,0.205679,...,5734009.0,0.2125,0.177853,17.0,202324.7,0.0,0.0,0.0,0.0,0.036581
183,QN0601,Rego Park,Queens,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x80\x00...,20193350.0,0.724337,3726.0,5563253.0,816.253716,0.275499,...,7680477.0,0.010467,0.129549,39.0,720711.5,0.130435,0.722847,0.081633,0.481945,0.035691
84,BX0502,Mount Hope,Bronx,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00Q\x00\x0...,12681770.0,0.454896,1139.0,4451537.0,1711.10936,0.351019,...,9785840.0,0.062335,0.097271,71.0,433005.2,0.061224,0.194435,0.053333,0.171443,0.034144
173,QN0401,Elmhurst,Queens,b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\xca\x00...,42102140.0,1.510207,9060.0,13849030.0,989.62804,0.328939,...,9170291.0,0.036534,0.098904,331.0,1369721.0,0.123288,0.180947,0.077922,0.152092,0.032533
123,MN0203,West Village,Manhattan,b'\x01\x03\x00\x00\x00\x02\x00\x00\x00\xa3\x00...,14416860.0,0.517134,2286.0,6001532.0,1266.852671,0.416286,...,11605380.0,0.037183,0.073578,85.0,441581.8,0.095238,0.084907,0.117647,0.179436,0.03063


In [28]:
nta_viz.to_parquet(
    os.path.join(DATA_PROCESSED, "nta_viz.parquet"),
    index=False
)

print("Saved nta_viz.parquet:", nta_viz.shape)


Saved nta_viz.parquet: (262, 21)
