In [None]:
!pip install geopandas osmnx folium shapely pyproj

Collecting osmnx
  Downloading osmnx-2.0.7-py3-none-any.whl.metadata (4.9 kB)
Downloading osmnx-2.0.7-py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.5/101.5 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: osmnx
Successfully installed osmnx-2.0.7


In [None]:
import pandas as pd
import geopandas as gpd
import osmnx as ox
import folium
from shapely.geometry import Point

In [None]:
# Cooling Sites
cooling = pd.read_csv("/content/Cool_It!_NYC_2020_-_Cooling_Sites_20260202.csv")

# Elderly population (ACS)
acs = pd.read_csv("/content/ACSDT5YAIAN2021.B11007-2026-02-02T125333.csv")

# Street Tree Census
trees = pd.read_csv("/content/2015_Street_Tree_Census_-_Tree_Data_20260202.csv")


In [None]:
cooling.columns = cooling.columns.str.strip().str.lower()
cooling.columns

Index(['status', 'featuretype', 'propertyname', 'subpropertyname',
       'gispropnum', 'omppropid', 'borough', 'district', 'x', 'y'],
      dtype='object')

In [None]:
# Remove commas and convert to float
cooling['x'] = cooling['x'].str.replace(',', '').astype(float)
cooling['y'] = cooling['y'].str.replace(',', '').astype(float)


In [None]:
cooling_gdf = gpd.GeoDataFrame(
    cooling,
    geometry=gpd.points_from_xy(cooling['x'], cooling['y']),
    crs="EPSG:2263"  # IMPORTANT: these are NYC projected coordinates
)

In [None]:
cooling_gdf = cooling_gdf.to_crs(epsg=4326)

“The cooling-site coordinates were provided in NYC State Plane format and were cleaned, projected, and reprojected appropriately for spatial analysis.”


In [None]:
# Normalize column names to handle case sensitivity and whitespace
trees.columns = trees.columns.str.strip().str.lower()

# Drop rows where coordinates are missing
trees = trees.dropna(subset=['latitude', 'longitude'])

# Create the GeoDataFrame using lowercase column names
trees_gdf = gpd.GeoDataFrame(
    trees,
    geometry=gpd.points_from_xy(trees['longitude'], trees['latitude']),
    crs="EPSG:4326"
)

In [None]:
# The column is typically named 'census tract' in the 2015 census after lowercasing
tract_col = [c for c in trees_gdf.columns if 'tract' in c][0]

tree_density = (
    trees_gdf
    .groupby(tract_col)
    .size()
    .reset_index(name="tree_count")
)

# Standardize name for the merge
tree_density = tree_density.rename(columns={tract_col: 'censustract_2010'})
print(f"Found and used census tract column: {tract_col}")

Found and used census tract column: census tract


In [None]:
acs = acs.rename(columns={
    acs.columns[0]: "GEOID",
    acs.columns[-1]: "elderly_population"
})

acs['GEOID'] = acs['GEOID'].astype(str)

In [None]:
# Convert to string, remove commas and decimals, then pad to 6 digits
tree_density['censustract_2010'] = (
    tree_density['censustract_2010']
    .astype(str)
    .str.replace(',', '', regex=False)
    .str.replace('.0', '', regex=False)
    .str.zfill(6)
)

print("Sample of cleaned tree tract IDs:", tree_density['censustract_2010'].head().tolist())

Sample of cleaned tree tract IDs: ['000001', '01,001', '01,002', '01,004', '01,006']


In [None]:
# Ensure the 'GEOID' column exists and create the 6-digit tract identifier
if 'GEOID' not in acs.columns:
    acs = acs.rename(columns={acs.columns[0]: 'GEOID', acs.columns[-1]: 'elderly_population'})

acs['tract_2010'] = acs['GEOID'].astype(str).str[-6:]
print("Sample ACS tract IDs:", acs['tract_2010'].head().tolist())

In [None]:
# Merge the datasets using the cleaned 6-digit tract identifiers
analysis_df = acs.merge(
    tree_density,
    left_on='tract_2010',
    right_on='censustract_2010',
    how='left'
)

# Fill missing tree counts with 0 and verify the merge
analysis_df['tree_count'] = analysis_df['tree_count'].fillna(0)
print(f"Merged data shape: {analysis_df.shape}")
print(f"Rows with successful tree matches: {analysis_df[analysis_df['tree_count'] > 0].shape[0]}")

Merged data shape: (11, 6)
Rows with successful tree matches: 0


In [None]:
analysis_df['tree_count'] = analysis_df['tree_count'].fillna(0)

In [None]:
analysis_df[['GEOID', 'tract_2010', 'tree_count']].head()

Unnamed: 0,GEOID,tract_2010,tree_count
0,Total:,Total:,0.0
1,Households with one or more people 65 year...,over:,0.0
2,1-person household,sehold,0.0
3,2-or-more-person household:,ehold:,0.0
4,Family households,eholds,0.0


In [None]:
cooling_projected = cooling_gdf.to_crs(epsg=2263)  # NYC CRS
cooling_projected['buffer'] = cooling_projected.geometry.buffer(800)

In [None]:
cooling_buffers = cooling_projected[['buffer']].copy()
cooling_buffers = gpd.GeoDataFrame(
    geometry=cooling_buffers['buffer'],
    crs=cooling_projected.crs
)

In [None]:
analysis_df['heat_risk_index'] = (
    analysis_df['elderly_population'].rank(pct=True) * 0.6 +
    (1 / (analysis_df['tree_count'] + 1)).rank(pct=True) * 0.4
)

In [None]:
priority_areas = analysis_df[
    analysis_df['heat_risk_index'] >
    analysis_df['heat_risk_index'].quantile(0.75)
]

In [None]:
m = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

for _, row in cooling_gdf.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        radius=3,
        color='blue',
        fill=True
    ).add_to(m)

m

In [None]:
cooling_buffer = cooling_gdf.to_crs(epsg=2263)
cooling_buffer['buffer'] = cooling_buffer.geometry.buffer(800)  # ~10 min walk
cooling_buffer = cooling_buffer.to_crs(epsg=4326)
cooling_buffer = cooling_buffer.set_geometry('buffer')
cooling_buffer = cooling_buffer[~cooling_buffer.geometry.isna()]



In [None]:
folium.GeoJson(
    data=cooling_buffer.__geo_interface__,
    style_function=lambda x: {
        'fillColor': 'blue',
        'color': 'blue',
        'weight': 1,
        'fillOpacity': 0.15
    },
    name="10-min Walk Buffer"
).add_to(m)

<folium.features.GeoJson at 0x79055a51d940>

“A 10-minute walk-access buffer was generated around public cooling sites using projected coordinates and visualized to identify spatial gaps in cooling infrastructure.”