### 311 QUALITY OF LIFE BY TRACTS (SUMMER, 2025)

In [1]:
# Modules.
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
import requests, time
from pathlib import Path

In [2]:
# Paths.
nyc_311_dir = Path("data/nyc_311")
nyc_311 = nyc_311_dir / "311_raw"
nyc_panel = nyc_311_dir / "panel"
nyc_311.mkdir(parents = True, exist_ok = True)
nyc_panel.mkdir(parents = True, exist_ok = True)

# NYC 2020 census tracts shapefile.
tracts_path = Path("data/nyc_tracts_2020/nyc_tracts_2020.shp")

# Output.
raw_path = nyc_311 / "nyc_311_summer_2025.csv"
panel_path = nyc_panel / "nyc_311_tract_day_2025.csv"
point_path_geojson = nyc_311_dir / "nyc_311_points_2025.geojson"

In [3]:
qol_complaints = [
    "HEAT", "HOT WATER", "WATER LEAK", "PLUMBING",
    "GAS", "ELECTRIC", "ELEVATOR", "BUILDING CONDITION",
    "NOISE - RESIDENTIAL", "NOISE - STREET", "NOISE - PARK",
    "RODENT", "UNSANITARY CONDITION", "SEWER",
    "STREET CONDITION", "SIDEWALK CONDITION", "POTHOLE",
    "ILLEGAL PARKING", "BLOCKED DRIVEWAY"
]

In [4]:
# Download helper.
def download_311_jfk_2025(token = None):
    base = "https://data.cityofnewyork.us/resource/erm2-nwe9.json"
    headers = {"X-App-Token": token} if token else {}
    limit = 50000

    start = "2025-06-01T00:00:00"
    end = "2025-08-22T23:59:59"

    where_clause = (
        f"created_date between '{start}' and '{end}' "
        "AND latitude IS NOT NULL AND longitude IS NOT NULL"
    )

    cols = [
        "unique_key", "created_date", "complaint_type",
        "descriptor", "latitude", "longitude", "borough"
    ]

    offset = 0
    frames = []

    while True:
        params = {
            "$select": ",".join(cols),
            "$where": where_clause,
            "$limit": limit,
            "$offset": offset,
            "$order": "created_date"
        }
        r = requests.get(base, params = params, headers = headers)
        data = r.json()
        if len(data) == 0:
            break
        frames.append(pd.DataFrame(data))
        print("Fetched:", len(data), "offset:", offset)
        offset += limit
        time.sleep(0.3)

    calls_311 = pd.concat(frames, ignore_index = True)
    calls_311.to_csv(raw_path, index = False)
    print("Saved:", raw_path)

    return calls_311

In [5]:
# Download.
calls_311 = download_311_jfk_2025(token = None)
calls_311["created_date"] = pd.to_datetime(calls_311["created_date"], errors = "coerce")
calls_311["latitude"] = pd.to_numeric(calls_311["latitude"])
calls_311["longitude"] = pd.to_numeric(calls_311["longitude"])

Fetched: 50000 offset: 0
Fetched: 50000 offset: 50000
Fetched: 50000 offset: 100000
Fetched: 50000 offset: 150000
Fetched: 50000 offset: 200000
Fetched: 50000 offset: 250000
Fetched: 50000 offset: 300000
Fetched: 50000 offset: 350000
Fetched: 50000 offset: 400000
Fetched: 50000 offset: 450000
Fetched: 50000 offset: 500000
Fetched: 50000 offset: 550000
Fetched: 50000 offset: 600000
Fetched: 50000 offset: 650000
Fetched: 26191 offset: 700000
Saved: data\nyc_311\311_raw\nyc_311_summer_2025.csv


In [6]:
# Spatial join to tracts.
gdf_tracts = gpd.read_file(tracts_path)

nyc_prefixes = ("36005", "36047", "36061", "36081", "36085")
gdf_tracts = gdf_tracts[gdf_tracts["geoid"].str.startswith(nyc_prefixes)].copy()

gdf_311 = gpd.GeoDataFrame(
    calls_311,
    geometry=[Point(xy) for xy in zip(calls_311.longitude, calls_311.latitude)],
    crs = "EPSG:4326"
).to_crs(gdf_tracts.crs)

joined_gdf = gpd.sjoin(
    gdf_311,
    gdf_tracts[["geoid","geometry"]],
    how = "left",
    predicate = "within"
)

joined_gdf = joined_gdf.dropna(subset = ["geoid"]).copy()
joined_gdf.rename(columns = {"geoid":"GEOID"}, inplace = True)

In [7]:
# Build tract by day.
joined_gdf["date"] = joined_gdf["created_date"].dt.date
joined_gdf["ct_norm"] = joined_gdf["complaint_type"].str.upper().str.strip()
joined_gdf["qol_flag"] = joined_gdf["ct_norm"].isin(qol_complaints)

panel = (
    joined_gdf.groupby(["GEOID","date"], as_index = False)
    .agg(
        total_calls = ("unique_key","count"),
        qol_calls = ("qol_flag","sum"),
        #mean_latitude = ("latitude", "mean"),
        #mean_longitude = ("longitude", "mean")
    )
)

panel["qol_pct"] = np.where(
    panel["total_calls"] > 0,
    panel["qol_calls"]/panel["total_calls"],
    np.nan
)

In [8]:
panel.to_csv(panel_path, index = False)
print("Saved panel:", panel_path)

panel

Saved panel: data\nyc_311\panel\nyc_311_tract_day_2025.csv


Unnamed: 0,GEOID,date,total_calls,qol_calls,qol_pct
0,36005000100,2025-06-30,1,1,1.000000
1,36005000100,2025-07-23,1,0,0.000000
2,36005000100,2025-08-04,1,1,1.000000
3,36005000200,2025-06-01,8,6,0.750000
4,36005000200,2025-06-02,3,1,0.333333
...,...,...,...,...,...
162339,36085032300,2025-08-15,1,0,0.000000
162340,36085032300,2025-08-18,2,0,0.000000
162341,36085032300,2025-08-19,4,4,1.000000
162342,36085032300,2025-08-20,2,1,0.500000


In [9]:
panel.columns

Index(['GEOID', 'date', 'total_calls', 'qol_calls', 'qol_pct'], dtype='object')

In [10]:
# Save point aggregated data joined_gdf with GEOID assignment.

point_cols = [
    "unique_key", "created_date", "complaint_type", "descriptor",
    "latitude", "longitude", "borough", "GEOID", 
    "ct_norm", "qol_flag", "date", "geometry"
]

# Ensure only the necessary columns are kept and the index is dropped for clean output.
point_data_gdf = joined_gdf[point_cols].copy()

# Save GeoDataFrame to GeoJSON file.
point_data_gdf.to_file(point_path_geojson, driver = "GeoJSON")

print("Saved point data as GeoJSON:", point_path_geojson)

Saved point data as GeoJSON: data\nyc_311\nyc_311_points_2025.geojson
