In [None]:
import os
import pandas as pd
import geopandas as gpd

pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 160)

CRS_NYC = "EPSG:2263"

DATA_RAW = os.path.join("..", "data", "raw")
DATA_PROCESSED = os.path.join("..", "data", "processed")
os.makedirs(DATA_PROCESSED, exist_ok=True)

PATH_BUILDINGS = os.path.join(DATA_RAW, "building_current", "nyc_building_footprints_current.zip")
PATH_NTA = os.path.join(DATA_RAW, "nta", "nyc_nta_2020.zip")

In [2]:
buildings = gpd.read_file(f"zip://{PATH_BUILDINGS}")
buildings = buildings.to_crs(CRS_NYC)

buildings["footprint_sqft"] = buildings.geometry.area

buildings[["footprint_sqft"]].describe()


Unnamed: 0,footprint_sqft
count,1082999.0
mean,1609.37
std,5663.732
min,32.47651
25%,653.9524
50%,959.3253
75%,1323.959
max,1171875.0


In [3]:
nta = gpd.read_file(f"zip://{PATH_NTA}")

nta = nta.rename(columns={
    "nta2020": "NTACode",
    "ntaname": "NTAName"
})

nta = nta[["NTACode", "NTAName", "boroname", "geometry"]]
nta = nta.to_crs(CRS_NYC)

nta["nta_area_sqft"] = nta.geometry.area
nta["nta_area_sqmi"] = nta["nta_area_sqft"] / 27_878_400

nta.head()

Unnamed: 0,NTACode,NTAName,boroname,geometry,nta_area_sqft,nta_area_sqmi
0,BK0101,Greenpoint,Brooklyn,"POLYGON ((1003059.997 204572.025, 1002991.367 ...",35321740.0,1.266993
1,BK0102,Williamsburg,Brooklyn,"POLYGON ((995851.916 203199.332, 995969.193 20...",28852800.0,1.034952
2,BK0103,South Williamsburg,Brooklyn,"POLYGON ((998047.21 196303.325, 998157.901 196...",15208960.0,0.545546
3,BK0104,East Williamsburg,Brooklyn,"POLYGON ((1005302.497 199455.73, 1005307.792 1...",52267470.0,1.874838
4,BK0201,Brooklyn Heights,Brooklyn,"POLYGON ((986367.736 190549.239, 985813.836 19...",9982088.0,0.358058


In [4]:
sample = buildings.sample(10_000, random_state=42)

sample_join = gpd.sjoin(
    sample,
    nta,
    how="left",
    predicate="intersects"
)

sample_join["NTACode"].notna().mean()

np.float64(0.9999000499750125)

In [5]:
buildings_nta = gpd.sjoin(
    buildings,
    nta,
    how="left",
    predicate="intersects"
).drop(columns=["index_right"])

In [6]:
nta_density = (
    buildings_nta
    .groupby(["NTACode", "NTAName", "boroname"])
    .agg(
        building_count=("footprint_sqft", "count"),
        total_footprint_sqft=("footprint_sqft", "sum"),
        median_building_sqft=("footprint_sqft", "median")
    )
    .reset_index()
)

nta_density.head()

Unnamed: 0,NTACode,NTAName,boroname,building_count,total_footprint_sqft,median_building_sqft
0,BK0101,Greenpoint,Brooklyn,5147,13400800.0,1248.021588
1,BK0102,Williamsburg,Brooklyn,3646,10377330.0,1334.824761
2,BK0103,South Williamsburg,Brooklyn,2294,5418461.0,1408.694795
3,BK0104,East Williamsburg,Brooklyn,4939,17450020.0,1354.539352
4,BK0201,Brooklyn Heights,Brooklyn,1492,3913004.0,1358.771123


In [7]:
nta_density = nta.merge(
    nta_density,
    on=["NTACode", "NTAName", "boroname"],
    how="left"
)

nta_density[[
    "building_count",
    "total_footprint_sqft",
    "median_building_sqft"
]] = nta_density[[
    "building_count",
    "total_footprint_sqft",
    "median_building_sqft"
]].fillna(0)

nta_density["built_area_ratio"] = (
    nta_density["total_footprint_sqft"] / nta_density["nta_area_sqft"]
)

nta_density["buildings_per_sqmi"] = (
    nta_density["building_count"] / nta_density["nta_area_sqmi"]
)

nta_density["built_sqft_per_sqmi"] = (
    nta_density["total_footprint_sqft"] / nta_density["nta_area_sqmi"]
)

In [8]:
nta_density["built_area_ratio"].describe()

count    262.000000
mean       0.224841
std        0.134573
min        0.000000
25%        0.145247
50%        0.245855
75%        0.323794
max        0.549023
Name: built_area_ratio, dtype: float64

In [9]:
nta_density.sort_values("built_area_ratio", ascending=False).head(10)

Unnamed: 0,NTACode,NTAName,boroname,geometry,nta_area_sqft,nta_area_sqmi,building_count,total_footprint_sqft,median_building_sqft,built_area_ratio,buildings_per_sqmi,built_sqft_per_sqmi
130,MN0502,Midtown-Times Square,Manhattan,"POLYGON ((991725.244 217725.299, 992169.505 21...",24552540.0,0.880701,1444.0,13479900.0,5031.327741,0.549023,1639.602531,15305870.0
129,MN0501,Midtown South-Flatiron-Union Square,Manhattan,"POLYGON ((989555.974 211702.854, 989420.216 21...",14879020.0,0.533711,1306.0,7998146.0,3405.174031,0.537545,2447.015022,14985900.0
121,MN0201,SoHo-Little Italy-Hudson Square,Manhattan,"POLYGON ((983469.159 204638.902, 983496.09 204...",12916760.0,0.463325,1662.0,6500400.0,2274.76968,0.503253,3587.114384,14029890.0
134,MN0604,East Midtown-Turtle Bay,Manhattan,"POLYGON ((995598.36 215579.024, 995626.716 215...",13138090.0,0.471264,1101.0,6393684.0,2142.036303,0.486652,2336.268575,13567090.0
128,MN0402,Hell's Kitchen,Manhattan,"POLYGON ((985324.423 221001.9, 985817.17 22072...",18382320.0,0.659375,1666.0,8732165.0,1972.715628,0.475031,2526.635671,13243100.0
140,MN0802,Upper East Side-Carnegie Hill,Manhattan,"POLYGON ((998281.392 225347.968, 998145.231 22...",20065360.0,0.719746,2679.0,9498898.0,1908.545717,0.473398,3722.146907,13197570.0
122,MN0202,Greenwich Village,Manhattan,"POLYGON ((987048.27 206851.147, 987057.852 206...",10600460.0,0.380239,1224.0,4996409.0,2088.616231,0.471339,3219.02756,13140180.0
132,MN0602,Gramercy,Manhattan,"POLYGON ((990196.892 207745.371, 990187.613 20...",7526000.0,0.269958,818.0,3486676.0,1996.561202,0.463284,3030.099995,12915620.0
127,MN0401,Chelsea-Hudson Yards,Manhattan,"POLYGON ((983754.619 217001.185, 983857.657 21...",29671750.0,1.064328,2075.0,13187830.0,2007.850335,0.444458,1949.587507,12390760.0
119,MN0102,Tribeca-Civic Center,Manhattan,"POLYGON ((984440.604 200699.422, 984402.913 20...",13578250.0,0.487053,902.0,5706854.0,2960.023311,0.420294,1851.955068,11717110.0


In [10]:
borough_summary = (
    nta_density
    .groupby("boroname", as_index=False)
    .agg(
        avg_built_ratio=("built_area_ratio", "mean"),
        total_building_sqft=("total_footprint_sqft", "sum"),
        total_buildings=("building_count", "sum"),
        nta_count=("NTACode", "nunique")
    )
)

borough_summary

Unnamed: 0,boroname,avg_built_ratio,total_building_sqft,total_buildings,nta_count
0,Bronx,0.212835,231949100.0,104508.0,50
1,Brooklyn,0.253432,522567000.0,330203.0,69
2,Manhattan,0.334825,210242700.0,44981.0,38
3,Queens,0.189133,592520400.0,461139.0,82
4,Staten Island,0.110763,189362400.0,142318.0,23


In [11]:
out_path = os.path.join(DATA_PROCESSED, "nta_construction_density.parquet")
nta_density.to_parquet(out_path, index=False)

print("Saved:", out_path)

Saved: ..\data\processed\nta_construction_density.parquet
