# Download NYC 311 Street Flooding Complaints
Author: Mark Bauer

In [1]:
# import libaries
import duckdb
import pandas as pd
from datetime import datetime
from sodapy import Socrata
import os

In [2]:
# reproducibility
%reload_ext watermark
%watermark -v -p duckdb,pandas,sodapy

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.6.0

duckdb: 1.0.0
pandas: 1.5.1
sodapy: 2.2.0



In [3]:
# data retrieved
current_date = datetime.now()
print(f"The data was retrieved on {current_date.strftime('%Y-%m-%d')}.")

The data was retrieved on 2025-03-27.


In [4]:
# nyc open data domain and 311 dataset id
socrata_domain = 'data.cityofnewyork.us'
socrata_dataset_identifier = 'erm2-nwe9'

# Socrata object to fetch data
client = Socrata(
    domain=socrata_domain,
    app_token=None,
    timeout=1000
)

# sanity check
print(client)



<sodapy.socrata.Socrata object at 0x168b31450>


In [5]:
# query
query = """
    SELECT *
    WHERE descriptor == 'Street Flooding (SJ)'
    LIMIT 100000 -- manually make this a high number, much above the results
"""

# get data from client
results = client.get(socrata_dataset_identifier, query=query)

# transform into dataframe
results_df = pd.DataFrame(results)

# close client
client.close()

print(f'shape of data: {results_df.shape}')
results_df.head()

shape of data: (41617, 32)


Unnamed: 0,unique_key,created_date,agency,agency_name,complaint_type,descriptor,incident_zip,incident_address,street_name,cross_street_1,...,latitude,longitude,location,closed_date,intersection_street_1,intersection_street_2,facility_type,resolution_description,resolution_action_updated_date,due_date
0,60700670,2024-03-27T16:32:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11236.0,1381 EAST 105 STREET,EAST 105 STREET,FLATLANDS 8 ST,...,40.63963974342525,-73.88587974249154,"{'latitude': '40.63963974342525', 'longitude':...",,,,,,,
1,18265181,2010-07-14T08:38:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),,,,PELHAM PKWY,...,,,,2010-07-14T08:38:00.000,PELHAM PKWY,STILLWELL AVE,,The Department of Environmental Protection inv...,2010-07-14T08:30:00.000,
2,21549616,2011-09-29T10:34:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),,,,,...,,,,2011-09-30T10:40:00.000,THURSBY AVE,GOVENER AVE,,The Department of Environmental Protection inv...,2011-09-30T10:40:00.000,
3,35839080,2017-03-31T20:24:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),10029.0,EAST 106 STREET,EAST 106 STREET,3 AVENUE,...,,,,2017-04-01T02:25:00.000,,,,Please call 311 for further information. If yo...,2017-04-01T02:25:00.000,
4,29443390,2014-12-06T10:23:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),,,,NAGLE AVE,...,,,,2014-12-06T11:30:00.000,NAGLE AVE,DYCKMAN ST,,The Department of Environmental Protection inv...,2014-12-06T11:30:00.000,


In [6]:
# save results as CSV file
results_df.to_csv('data/complaints.csv', index=False)