# Querying 311 Street Flooding Complaints Using sodapy 

Mark Bauer

In [1]:
# importing libraries
import os
import numpy as np
import pandas as pd
from sodapy import Socrata

In [2]:
# nyc open data domain and 311 dataset id
socrata_domain = 'data.cityofnewyork.us'
socrata_dataset_identifier = 'erm2-nwe9'

# If you choose to use a token, run the following command on the terminal (or add it to your .bashrc)
# $ export SODAPY_APPTOKEN=<token>
socrata_token = os.environ.get("SODAPY_APPTOKEN")

# Preview and Explore the Dataset
Group and count 311 complaints by `complaint_type`

Practice query using the sodapy client and basic query format manually force limit rows to high value that includes ~all rows.

In [3]:
client = Socrata(
    domain=socrata_domain,
    app_token=socrata_token,
    timeout=10000
)

query = """
SELECT 
    complaint_type, 
    count(complaint_type)   
GROUP BY 
    complaint_type   
ORDER BY 
    count(complaint_type) DESC
LIMIT
    100
"""

# Returned as JSON from API, converted to Python list of dictionaries by sodapy
results = client.get(socrata_dataset_identifier, query=query)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

print('shape of data: {}'.format(results_df.shape))
results_df.head(10)



shape of data: (100, 2)


Unnamed: 0,complaint_type,count_complaint_type
0,Noise - Residential,3211181
1,Illegal Parking,2234269
2,HEAT/HOT WATER,2020234
3,Blocked Driveway,1483630
4,Street Condition,1226558
5,Noise - Street/Sidewalk,1153651
6,Street Light Condition,1123234
7,Request Large Bulky Item Collection,1073753
8,PLUMBING,906993
9,HEATING,887869


Group and count 311 complaints by `descriptor`.

In [4]:
query = """
SELECT 
    descriptor, 
    count(descriptor)   
GROUP BY 
    descriptor    
ORDER BY 
    count(descriptor) DESC
LIMIT
    100
"""

results = client.get(socrata_dataset_identifier, query=query)
results_df = pd.DataFrame.from_records(results)

print('shape of data: {}'.format(results_df.shape))
results_df.head(10)

shape of data: (100, 2)


Unnamed: 0,descriptor,count_descriptor
0,Loud Music/Party,3542455
1,ENTIRE BUILDING,1319346
2,No Access,1106122
3,Request Large Bulky Item Collection,1073753
4,Banging/Pounding,878006
5,HEAT,868960
6,Street Light Out,826920
7,Pothole,742923
8,APARTMENT ONLY,700888
9,Blocked Hydrant,624807


Group `complaint_type` where type has the word `flood`.

In [5]:
query = """
SELECT 
    complaint_type, 
    count(complaint_type)
WHERE 
    LOWER(complaint_type) LIKE '%flood%'   
GROUP BY 
    complaint_type
ORDER BY 
    count(complaint_type) DESC
LIMIT
    100
"""

results = client.get(socrata_dataset_identifier, query=query)
results_df = pd.DataFrame.from_records(results)

print('shape of data: {}'.format(results_df.shape))
results_df

shape of data: (0, 0)


Group `descriptor` where type has the word `flood`.

In [6]:
query = """
SELECT 
    descriptor, 
    count(descriptor) 
WHERE 
    LOWER(descriptor) LIKE '%flood%' 
GROUP BY 
    descriptor  
ORDER BY 
    count(descriptor) DESC
LIMIT
    100  
"""

results = client.get(socrata_dataset_identifier, query=query)
results_df = pd.DataFrame.from_records(results)

print('shape of data: {}'.format(results_df.shape))
results_df

shape of data: (11, 2)


Unnamed: 0,descriptor,count_descriptor
0,Catch Basin Clogged/Flooding (Use Comments) (SC),111526
1,Street Flooding (SJ),37749
2,Flood Light Lamp Out,6456
3,Highway Flooding (SH),3113
4,Flood Light Lamp Cycling,2584
5,Ready NY - Flooding,271
6,Flood Light Lamp Dayburning,223
7,Flood Light Lamp Missing,211
8,Flood Light Lamp Dim,185
9,RAIN GARDEN FLOODING (SRGFLD),152


Select all rows where `descriptor` is `Street Flooding (SJ)` and `created_date` between 2010 and 2021.

In [7]:
query = """
SELECT 
    *
WHERE 
    descriptor == 'Street Flooding (SJ)'
    AND created_date BETWEEN '2010' AND '2021'
LIMIT
    40000
"""

results = client.get(socrata_dataset_identifier, query=query)
results_df = pd.DataFrame.from_records(results)
client.close()

print('shape of data: {}'.format(results_df.shape))
results_df.head()

shape of data: (27902, 32)


Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,incident_zip,incident_address,street_name,...,open_data_channel_type,park_facility_name,park_borough,latitude,longitude,location,intersection_street_1,intersection_street_2,facility_type,due_date
0,48542220,2020-12-31T15:41:00.000,2021-01-01T00:20:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11420,117-17 135 STREET,135 STREET,...,PHONE,Unspecified,QUEENS,40.67703755925495,-73.80441718054371,"{'latitude': '40.67703755925495', 'longitude':...",,,,
1,48536430,2020-12-31T14:49:00.000,2021-01-04T10:15:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11357,20-24 150 STREET,150 STREET,...,ONLINE,Unspecified,QUEENS,40.78072630540092,-73.81428794578581,"{'latitude': '40.78072630540092', 'longitude':...",,,,
2,48539361,2020-12-31T14:03:00.000,2021-01-02T11:25:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11228,7223 8 AVENUE,8 AVENUE,...,ONLINE,Unspecified,BROOKLYN,40.62849640806448,-74.01680967626773,"{'latitude': '40.62849640806448', 'longitude':...",,,,
3,48543132,2020-12-31T13:48:00.000,2020-12-31T14:50:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),10032,,,...,PHONE,Unspecified,MANHATTAN,40.84105168954552,-73.9446789892306,"{'latitude': '40.841051689545516', 'longitude'...",RIVERSIDE DRIVE,WEST 165 STREET,,
4,48536441,2020-12-31T13:10:00.000,2021-01-03T10:45:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11234,3123 FILLMORE AVENUE,FILLMORE AVENUE,...,PHONE,Unspecified,BROOKLYN,40.60920344739991,-73.93654793950026,"{'latitude': '40.609203447399906', 'longitude'...",,,,


In [8]:
# sanity checks
print('Number of total records: {:,}.\n'.format(results_df.shape[0]))
      
print('min date:', results_df['created_date'].min())
print('max date:', results_df['created_date'].max())

Number of total records: 27,902.

min date: 2010-01-02T08:26:00.000
max date: 2020-12-31T15:41:00.000


In [9]:
# writing output file as a csv
results_df.to_csv('data/street-flooding-complaints.csv', index=False)

# listing items in data folder
%ls data/

README.md                       street-flooding-complaints.csv
[34mdata-dictionaries[m[m/              streets-clipped.json
