# Querying 311 street flooding complaints data from NYC Open Data using the Python library sodapy
Mark Bauer

In [1]:
# importing libraries
import pandas as pd
import numpy as np
from sodapy import Socrata
import os

In [2]:
socrata_domain = 'data.cityofnewyork.us'
socrata_dataset_identifier = 'erm2-nwe9'

# If you choose to use a token, run the following command on the terminal (or add it to your .bashrc)
# $ export SODAPY_APPTOKEN=<token>
socrata_token = os.environ.get("SODAPY_APPTOKEN")

In [3]:
client = Socrata(socrata_domain, socrata_token)

In [4]:
metadata = client.get_metadata(socrata_dataset_identifier)
[x['name'] for x in metadata['columns']]

['Unique Key',
 'Created Date',
 'Closed Date',
 'Agency',
 'Agency Name',
 'Complaint Type',
 'Descriptor',
 'Location Type',
 'Incident Zip',
 'Incident Address',
 'Street Name',
 'Cross Street 1',
 'Cross Street 2',
 'Intersection Street 1',
 'Intersection Street 2',
 'Address Type',
 'City',
 'Landmark',
 'Facility Type',
 'Status',
 'Due Date',
 'Resolution Description',
 'Resolution Action Updated Date',
 'Community Board',
 'BBL',
 'Borough',
 'X Coordinate (State Plane)',
 'Y Coordinate (State Plane)',
 'Open Data Channel Type',
 'Park Facility Name',
 'Park Borough',
 'Vehicle Type',
 'Taxi Company Borough',
 'Taxi Pick Up Location',
 'Bridge Highway Name',
 'Bridge Highway Direction',
 'Road Ramp',
 'Bridge Highway Segment',
 'Latitude',
 'Longitude',
 'Location',
 'Zip Codes',
 'Community Districts',
 'Borough Boundaries',
 'City Council Districts',
 'Police Precincts']

In [5]:
meta_amount = [x for x in metadata['columns'] if x['name'] == 'Complaint Type'][0]
meta_amount

{'id': 354922035,
 'name': 'Complaint Type',
 'dataTypeName': 'text',
 'description': 'This is the first level of a hierarchy identifying the topic of the incident or condition. Complaint Type may have a corresponding Descriptor (below) or may stand alone.',
 'fieldName': 'complaint_type',
 'position': 6,
 'renderTypeName': 'text',
 'tableColumnId': 1567792,
 'width': 268,
 'cachedContents': {'largest': 'ZTESTINT',
  'non_null': '25093886',
  'null': '0',
  'top': [{'item': 'Noise - Residential', 'count': '2272076'},
   {'item': 'HEAT/HOT WATER', 'count': '1469657'},
   {'item': 'Illegal Parking', 'count': '1154532'},
   {'item': 'Blocked Driveway', 'count': '1063932'},
   {'item': 'Street Condition', 'count': '1028708'},
   {'item': 'Street Light Condition', 'count': '990689'},
   {'item': 'HEATING', 'count': '887869'},
   {'item': 'PLUMBING', 'count': '753700'},
   {'item': 'Water System', 'count': '692428'},
   {'item': 'Noise - Street/Sidewalk', 'count': '688245'},
   {'item': 'Gen

In [6]:
[x['fieldName'] for x in metadata['columns']]

['unique_key',
 'created_date',
 'closed_date',
 'agency',
 'agency_name',
 'complaint_type',
 'descriptor',
 'location_type',
 'incident_zip',
 'incident_address',
 'street_name',
 'cross_street_1',
 'cross_street_2',
 'intersection_street_1',
 'intersection_street_2',
 'address_type',
 'city',
 'landmark',
 'facility_type',
 'status',
 'due_date',
 'resolution_description',
 'resolution_action_updated_date',
 'community_board',
 'bbl',
 'borough',
 'x_coordinate_state_plane',
 'y_coordinate_state_plane',
 'open_data_channel_type',
 'park_facility_name',
 'park_borough',
 'vehicle_type',
 'taxi_company_borough',
 'taxi_pick_up_location',
 'bridge_highway_name',
 'bridge_highway_direction',
 'road_ramp',
 'bridge_highway_segment',
 'latitude',
 'longitude',
 'location',
 ':@computed_region_efsh_h5xi',
 ':@computed_region_f5dn_yrer',
 ':@computed_region_yeji_bk3q',
 ':@computed_region_92fq_4b7q',
 ':@computed_region_sbqj_enih']

In [7]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofnewyork.us", socrata_token, timeout=1000)

query = """
SELECT 
    complaint_type, count(complaint_type)
GROUP BY 
    complaint_type
ORDER BY 
    count(complaint_type) DESC
LIMIT 
    1000000
"""

# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("erm2-nwe9", 
                     query=query)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

print(results_df.shape)
results_df.head(10)

(445, 2)


Unnamed: 0,complaint_type,count_complaint_type
0,Noise - Residential,2272076
1,HEAT/HOT WATER,1469657
2,Illegal Parking,1154532
3,Blocked Driveway,1063932
4,Street Condition,1028708
5,Street Light Condition,990689
6,HEATING,887869
7,PLUMBING,753700
8,Water System,692428
9,Noise - Street/Sidewalk,688245


In [8]:
results_df.shape

(445, 2)

In [9]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofnewyork.us", socrata_token, timeout=1000)

query = """
SELECT 
    descriptor, count(descriptor)
WHERE 
    LOWER(descriptor) LIKE '%flood%'
GROUP BY 
    descriptor
ORDER BY 
    count(descriptor) DESC
LIMIT 
    1000000
"""

# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("erm2-nwe9", 
                     query=query)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

print(results_df.shape)
results_df

(11, 2)


Unnamed: 0,descriptor,count_descriptor
0,Catch Basin Clogged/Flooding (Use Comments) (SC),90181
1,Street Flooding (SJ),27691
2,Flood Light Lamp Out,5982
3,Highway Flooding (SH),2841
4,Flood Light Lamp Cycling,2515
5,Ready NY - Flooding,271
6,Flood Light Lamp Dayburning,205
7,Flood Light Lamp Missing,192
8,Flood Light Lamp Dim,177
9,RAIN GARDEN FLOODING (SRGFLD),80


In [10]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofnewyork.us", socrata_token, timeout=1000)

query = """
SELECT 
    *
WHERE 
    LOWER(descriptor) LIKE '%flood%'
LIMIT 
    1000000
"""

# Returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("erm2-nwe9", 
                     query=query)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

print(results_df.shape)
results_df.head()

(130183, 34)


Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,incident_zip,intersection_street_1,intersection_street_2,...,longitude,location,incident_address,street_name,cross_street_1,cross_street_2,bbl,due_date,location_type,landmark
0,19042361,2010-11-03T09:07:00.000,2010-11-03T09:07:00.000,DOT,Department of Transportation,Street Light Condition,Flood Light Lamp Cycling,11420,127 STREET,HAWTREE CREEK ROAD,...,-73.8146352911792,"{'latitude': '40.68319180156645', 'longitude':...",,,,,,,,
1,19050759,2010-11-04T13:41:00.000,2011-01-03T10:20:00.000,DEP,Department of Environmental Protection,Sewer,Catch Basin Clogged/Flooding (Use Comments) (SC),11416,,,...,-73.85870454354932,"{'latitude': '40.68043821412754', 'longitude':...",101-34 80 ST,80 ST,101 AVE,LIBERTY AVE,4090790017.0,,,
2,19050059,2010-11-04T11:38:00.000,2010-11-04T11:38:00.000,DOT,Department of Transportation,Street Light Condition,Flood Light Lamp Dim,11232,41 STREET,6 AVENUE,...,-74.00286853366998,"{'latitude': '40.648819999233694', 'longitude'...",,,,,,,,
3,19050060,2010-11-04T11:39:00.000,2010-11-04T11:39:00.000,DOT,Department of Transportation,Street Light Condition,Flood Light Lamp Cycling,11232,41 STREET,5 AVENUE,...,-74.00507409002483,"{'latitude': '40.650145653291624', 'longitude'...",,,,,,,,
4,19050058,2010-11-04T11:37:00.000,2010-11-04T11:37:00.000,DOT,Department of Transportation,Street Light Condition,Flood Light Lamp Out,11220,44 STREET,5 AVENUE,...,-74.00682174046591,"{'latitude': '40.64846575564213', 'longitude':...",,,,,,,,


In [11]:
results_df['descriptor'].value_counts()

Catch Basin Clogged/Flooding (Use Comments) (SC)    90181
Street Flooding (SJ)                                27691
Flood Light Lamp Out                                 5982
Highway Flooding (SH)                                2841
Flood Light Lamp Cycling                             2515
Ready NY - Flooding                                   271
Flood Light Lamp Dayburning                           205
Flood Light Lamp Missing                              192
Flood Light Lamp Dim                                  177
RAIN GARDEN FLOODING (SRGFLD)                          80
Flooded                                                48
Name: descriptor, dtype: int64

In [12]:
results_df = results_df.loc[results_df['descriptor'] == 'Street Flooding (SJ)']

In [13]:
results_df['descriptor'].value_counts()

Street Flooding (SJ)    27691
Name: descriptor, dtype: int64

In [14]:
results_df['complaint_type'].value_counts()

Sewer    27691
Name: complaint_type, dtype: int64

In [15]:
results_df.shape

(27691, 34)

In [16]:
results_df.head()

Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,incident_zip,intersection_street_1,intersection_street_2,...,longitude,location,incident_address,street_name,cross_street_1,cross_street_2,bbl,due_date,location_type,landmark
47,19224496,2010-11-29T20:01:00.000,2010-11-30T08:50:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11210.0,,,...,-73.95407184174927,"{'latitude': '40.61622473490424', 'longitude':...",AVENUE N,AVENUE N,OCEAN AVENUE,EAST 21 STREET,,,,
98,38610039,2018-03-04T06:10:00.000,2018-03-07T10:00:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),,ESSEX ST,ATLANTIC AVE,...,,,,,ESSEX ST,ATLANTIC AVE,,,,
116,19379336,2010-12-18T11:01:00.000,2011-01-10T15:00:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),10304.0,,,...,-74.09900706133925,"{'latitude': '40.60226417502854', 'longitude':...",111 MEDFORD RD,MEDFORD RD,KEUNE CT,FOREST RD,5008420073.0,,,
135,19406934,2010-12-22T08:39:00.000,2011-01-03T09:55:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11416.0,,,...,-73.84208638981947,"{'latitude': '40.688417629825395', 'longitude'...",95-20 102 ST,102 ST,95 AVE,97 AVE,4093790013.0,,,
145,19424458,2010-12-26T10:44:00.000,2010-12-26T11:55:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11357.0,6 AVE,CLINTONVILLE ST,...,-73.81470923549934,"{'latitude': '40.796720571511926', 'longitude'...",,,6 AVE,CLINTONVILLE ST,,,,


# Deleting records greater than year 2020

In [17]:
print('Number of total records:', len(results_df))
      
print('Min:', results_df.created_date.min())

print('Max:', results_df.created_date.max())

Number of total records: 27691
Min: 2010-01-02T08:26:00.000
Max: 2021-03-08T18:49:00.000


In [18]:
results_df.loc[results_df.created_date < '2021'].sort_values(by='created_date', ascending=False).head()

Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,incident_zip,intersection_street_1,intersection_street_2,...,longitude,location,incident_address,street_name,cross_street_1,cross_street_2,bbl,due_date,location_type,landmark
2797,48542220,2020-12-31T15:41:00.000,2021-01-01T00:20:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11420,,,...,-73.80441718054371,"{'latitude': '40.67703755925495', 'longitude':...",117-17 135 STREET,135 STREET,FOCH BLVD,120 AVE,4116990057.0,,,
6205,48536430,2020-12-31T14:49:00.000,2021-01-04T10:15:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11357,,,...,-73.81428794578581,"{'latitude': '40.78072630540092', 'longitude':...",20-24 150 STREET,150 STREET,20 AVE,20 RD,4046700029.0,,,
6887,48539361,2020-12-31T14:03:00.000,2021-01-02T11:25:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11228,,,...,-74.01680967626773,"{'latitude': '40.62849640806448', 'longitude':...",7223 8 AVENUE,8 AVENUE,72 ST,7 AVE,3059120001.0,,,
8040,48543132,2020-12-31T13:48:00.000,2020-12-31T14:50:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),10032,RIVERSIDE DRIVE,WEST 165 STREET,...,-73.9446789892306,"{'latitude': '40.841051689545516', 'longitude'...",,,,,,,,
6206,48536441,2020-12-31T13:10:00.000,2021-01-03T10:45:00.000,DEP,Department of Environmental Protection,Sewer,Street Flooding (SJ),11234,,,...,-73.93654793950026,"{'latitude': '40.609203447399906', 'longitude'...",3123 FILLMORE AVENUE,FILLMORE AVENUE,E 31 ST,E 32 ST,3084750001.0,,,


In [19]:
results_df = results_df.loc[results_df.created_date < '2021']

print(len(results_df))

print('Min:', results_df.created_date.min())

print('Max:', results_df.created_date.max())

27428
Min: 2010-01-02T08:26:00.000
Max: 2020-12-31T15:41:00.000


In [20]:
# writing output file as a csv
results_df.to_csv('../data-raw/raw_street_flooding_complaints.csv', index=False)

# listing items in data folder
%ls ../data-raw/

README.md                           raw_street_flooding_complaints.csv
[34mdata-dictionaries[m[m/                  raw_streets_clipped.json


# Briefly reviewing what descriptors are in the complaint_type='Sewer'

In [21]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cityofnewyork.us", socrata_token, timeout=1000)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cityofnewyork.us,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

query = """
SELECT 
    descriptor, count(descriptor)
WHERE 
    complaint_type='Sewer'
GROUP BY 
    descriptor
ORDER BY 
    count(descriptor) DESC
"""

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("erm2-nwe9", 
                     query=query)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

print(results_df.shape)
results_df.head(len(results_df))

(27, 2)


Unnamed: 0,descriptor,count_descriptor
0,Sewer Backup (Use Comments) (SA),149981
1,Catch Basin Clogged/Flooding (Use Comments) (SC),90181
2,Catch Basin Sunken/Damaged/Raised (SC1),28632
3,Street Flooding (SJ),27691
4,Manhole Cover Broken/Making Noise (SB),19780
5,Manhole Cover Missing (Emergency) (SA3),17649
6,Sewer Odor (SA2),15453
7,Defective/Missing Curb Piece (SC4),8526
8,Manhole Overflow (Use Comments) (SA1),6928
9,Catch Basin Search (SC2),4154
