In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
import json
import re

In [35]:
#Read in the original csv file
Syr311 = pd.read_csv('../data/SyracruseCityline_Calls_for_Service.csv', low_memory=False)

##Data source: https://data.syrgov.net/datasets/0aa5fcd76dbd4f2cabf2aeb1ddd0179e_0/about

In [36]:
Syr311.info() #57110 rows

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57110 entries, 0 to 57109
Data columns (total 18 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   complaint_number               57110 non-null  object 
 1   open_date                      57110 non-null  object 
 2   close_date                     57110 non-null  object 
 3   complaint_type_name            57110 non-null  object 
 4   status_type_name               57110 non-null  object 
 5   department_name                57110 non-null  object 
 6   received_via_type_name         57110 non-null  object 
 7   identifier                     50250 non-null  object 
 8   number                         50248 non-null  object 
 9   address                        50250 non-null  object 
 10  zip                            50248 non-null  float64
 11  location                       6867 non-null   object 
 12  nearest_address                1807 non-null  

In [33]:
#convert times to datetimes
Syr311['open_date'] = pd.to_datetime(Syr311['open_date'])

In [34]:
#what is the date range covered in this current dataframe?
Syr311 = Syr311.sort_values(['open_date'],ascending=True)
#2017/01/09 - 2019-06-26
Syr311.tail()

Unnamed: 0,complaint_number,open_date,close_date,complaint_type_name,status_type_name,department_name,received_via_type_name,identifier,number,address,zip,location,nearest_address,nearest_address1,cross_street_1,cross_street_2,user_defined_action_type_name,ObjectId
4,2019-17440,2019-06-23 14:22:41+00:00,2019/06/23 04:00:00+00,Sewer Back Up,Completed,DPW - Sewers,Cityline,024.-18-25.0,169.0,Forest Hill Dr,13206.0,,,,,,Crew Responded,5
3,2019-17408,2019-06-24 02:41:55.790000+00:00,2019/06/26 04:00:00+00,Pot Holes in Road,Completed,DPW - Street Repair,Online,101.-18-01.4,110.0,Fabius St,13204.0,,,,,,Repaired Hot Patch,4
2,2019-17480,2019-06-24 16:01:21.833000+00:00,2019/06/25 04:00:00+00,Street Paving-Patch/Misc,Completed,DPW - Street Repair,Cityline,,,,,"Teall Ave, Syracuse",,,,,Repaired Hot Patch,3
1,2019-17544,2019-06-24 18:33:32.877000+00:00,2019/06/26 04:00:00+00,Traffic Sign Req -General,Completed,DPW - Transportation,Cityline,,,,,"Fayette St E, Syracuse",1107.0,,,,Crew Responded,2
0,2019-17565,2019-06-24 19:08:39.597000+00:00,2019/06/26 04:00:00+00,Traffic Signl Special Req,Completed,DPW - Transportation,Cityline,,,,,"James St & Marlborough Rd, Syracuse",,,,,Repaired / Restored,1


In [14]:
#Breaking apart the datetime to date and time (don't need to include the time)

Syr311['Date'] = [d.date() for d in Syr311['open_date']]
Syr311['Date'] = pd.to_datetime(Syr311['Date'])
Syr311.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 57110 entries, 57032 to 0
Data columns (total 19 columns):
 #   Column                         Non-Null Count  Dtype              
---  ------                         --------------  -----              
 0   complaint_number               57110 non-null  object             
 1   open_date                      57110 non-null  datetime64[ns, UTC]
 2   close_date                     57110 non-null  object             
 3   complaint_type_name            57110 non-null  object             
 4   status_type_name               57110 non-null  object             
 5   department_name                57110 non-null  object             
 6   received_via_type_name         57110 non-null  object             
 7   identifier                     50250 non-null  object             
 8   number                         50248 non-null  object             
 9   address                        50250 non-null  object             
 10  zip                   

In [15]:
#Periodical Cicadas are confirmed in Onondaga County for Brood VII (2018), Syracuse is in Onondaga County.
#Trimming to 2017 - 2019 relevant months


Syr311trim = Syr311[Syr311['Date'] > dt.datetime(2017,5,1)]
Syr311trim = Syr311trim[Syr311trim['Date'] < dt.datetime(2019,7,1)]

In [17]:
Syr311trim.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 48612 entries, 48614 to 0
Data columns (total 19 columns):
 #   Column                         Non-Null Count  Dtype              
---  ------                         --------------  -----              
 0   complaint_number               48612 non-null  object             
 1   open_date                      48612 non-null  datetime64[ns, UTC]
 2   close_date                     48612 non-null  object             
 3   complaint_type_name            48612 non-null  object             
 4   status_type_name               48612 non-null  object             
 5   department_name                48612 non-null  object             
 6   received_via_type_name         48612 non-null  object             
 7   identifier                     42897 non-null  object             
 8   number                         42895 non-null  object             
 9   address                        42897 non-null  object             
 10  zip                   

# Isolating data to months of May and June for each year

In [18]:
#2017
Syr3112017 = Syr311trim[Syr311trim['Date'] > dt.datetime(2017,5,1)]
Syr3112017 = Syr3112017[Syr3112017['Date'] < dt.datetime(2017,7,1)]

In [20]:
Syr3112017.tail(20)

Unnamed: 0,complaint_number,open_date,close_date,complaint_type_name,status_type_name,department_name,received_via_type_name,identifier,number,address,zip,location,nearest_address,nearest_address1,cross_street_1,cross_street_2,user_defined_action_type_name,ObjectId,Date
43942,2017-18796,2017-06-30 18:15:10.180000+00:00,2017/07/12 04:00:00+00,CleanUp Rqst: Public Prop,Completed,DPW - ISR,Cityline,,,,,"Stedman St, Syracuse",,,,,Cleaned Up,43943,2017-06-30
43941,2017-18800,2017-06-30 18:19:42.950000+00:00,2017/07/10 04:00:00+00,Trash Skip,Completed,DPW - Sanitation,Cityline,105.-01-38.0,438,Wilkinson St,13204.0,,,,,,Picked Up,43942,2017-06-30
43940,2017-18801,2017-06-30 18:20:07.713000+00:00,2017/07/06 04:00:00+00,Blue Bin: request new BB,Completed,DPW - Sanitation,Cityline,113.-13-05.0,111,Morgan Ave,13204.0,,,,,,Delivered,43941,2017-06-30
43939,2017-18802,2017-06-30 18:20:33.767000+00:00,2017/07/05 04:00:00+00,Bulk Household Items,Completed,DPW - Sanitation,Cityline,045.-02-14.0,250-52,Fellows Ave,13210.0,,,,,,Picked Up,43940,2017-06-30
43938,2017-18803,2017-06-30 18:21:04.817000+00:00,2017/07/05 04:00:00+00,Sanitation/Special Reqst,Completed,DPW - Sanitation,Cityline,,,,,"Euclid Terr, Syracuse",,,,,Picked Up,43939,2017-06-30
43937,2017-18805,2017-06-30 18:22:08.257000+00:00,2017/07/10 04:00:00+00,Bulk Household Items,Completed,DPW - Sanitation,Cityline,007.-23-09.0,1013,First North St,13208.0,,,,,,Picked Up,43938,2017-06-30
43936,2017-18808,2017-06-30 18:37:25.520000+00:00,2017/07/03 04:00:00+00,Blue Bin: request new BB,Completed,DPW - Sanitation,Cityline,059.-05-14.0,327,Thurber St,13210.0,,,,,,Delivered,43937,2017-06-30
43933,2017-18810,2017-06-30 18:39:25.490000+00:00,2017/07/06 04:00:00+00,Bulk Household Items,Completed,DPW - Sanitation,Cityline,061.-01-05.2,316-24,Brighton Ave E,13210.0,,,,,,Picked Up,43934,2017-06-30
43930,2017-18811,2017-06-30 18:51:30.260000+00:00,2017/07/05 04:00:00+00,Bulk Household Items,Completed,DPW - Sanitation,Cityline,053.-16-06.0,1120,Westmoreland Ave,13210.0,,,,,,Picked Up,43931,2017-06-30
43907,2017-18812,2017-06-30 18:52:33.503000+00:00,2017/07/12 04:00:00+00,Illegal Trash Set Out,Completed,DPW - ISR,Cityline,020.-08-11.0,240,Stafford Ave,13206.0,,,,,,Compliance,43908,2017-06-30


In [21]:
#2018 - cicada year
Syr3112018 = Syr311trim[Syr311trim['Date'] > dt.datetime(2018,5,1)]
Syr3112018 = Syr3112018[Syr3112018['Date'] < dt.datetime(2018,7,1)]

In [22]:
#2019
Syr3112019 = Syr311trim[Syr311trim['Date'] > dt.datetime(2019,5,1)]
Syr3112019 = Syr3112019[Syr3112019['Date'] < dt.datetime(2019,7,1)]

In [23]:
#Bring them back to a single dataset
years = [Syr3112017, Syr3112018, Syr3112019]
Syr311_final = pd.concat(years)

In [None]:
Syr311_final.info #12440 rows

## Getting relevant requests / types of calls

In [25]:
Syr311_final['complaint_type_name'].unique()

array(['Pot Holes in Road', 'Tire Pickup (4/yr)', 'Sewer Back Up',
       'Blue Bin: request new BB', 'Trash Skip', 'Bulk Household Items',
       'Traffic Sign Req -General', 'Quad 4 ConstrDebrisPickUp',
       'Metal Household Items', 'Tree/Limb/Stump Removal',
       'Blue Bin Skip', 'Sewer Dept/Special Reqst',
       'Tree Trimming Request', 'Illegal Trash Set Out',
       'Illegal TV Set Out', 'Transport Special Request',
       'Catch Basin: Clean', 'Overgrown Veg - Public',
       'Street Repair Special Req', 'Street Paving-Patch/Misc',
       'Sidewalk Condition', 'Sanitation/Special Reqst',
       'Repair ManHoleCover/Grate', 'Driveway Damage', 'Jet Lateral/Main',
       'Repair Sewer Pipes-Caps', 'Tree Inspect/Problem Req',
       'CleanUp Rqst: Public Prop', 'Sewer Cave-in',
       'Emergency UFPO/NAT.GRID', 'Traffic Signl Special Req',
       'Parks Maint/General', 'Barricade Pickup',
       'Trash/Debris-Private, Occ', 'Repair Creek Fencing',
       'Creeks Repair Cleaning

In [26]:
# What types of Phone Calls indicate bad behavior and/or an annoyed populace?
relevant_requests = ['Pot Holes in Road',
                    'Trash Skip', 
                     'Bulk Household Items', ##look into definition of household items complaints
                     'Metal Household Items'
                     'Traffic Sign Req -General',
                     'Blue Bin Skip',
                     'Illegal Trash Set Out',
                    'Illegal TV Set Out',
                     'Catch Basin: Clean', 
                     'Overgrown Veg - Public',
                   'Street Repair Special Req',
                     'Sidewalk Condition',
                     'Repair ManHoleCover/Grate', 
                     'Driveway Damage',
                     'Barricade Pickup',
                     'Trash/Debris-Private, Occ', 'Repair Creek Fencing',
       'Creeks Repair Cleaning', 'Complaint Reqst - General', 'Trash Can Taken / Damaged', 'Street Clean Special Req',
       'DeadAnimal in Right ofWay',
                     'Corners Need Snow Removal' #why would this be happening in summer months??
                     
                    ]

In [27]:
print(relevant_requests)

['Pot Holes in Road', 'Trash Skip', 'Bulk Household Items', 'Metal Household ItemsTraffic Sign Req -General', 'Blue Bin Skip', 'Illegal Trash Set Out', 'Illegal TV Set Out', 'Catch Basin: Clean', 'Overgrown Veg - Public', 'Street Repair Special Req', 'Sidewalk Condition', 'Repair ManHoleCover/Grate', 'Driveway Damage', 'Barricade Pickup', 'Trash/Debris-Private, Occ', 'Repair Creek Fencing', 'Creeks Repair Cleaning', 'Complaint Reqst - General', 'Trash Can Taken / Damaged', 'Street Clean Special Req', 'DeadAnimal in Right ofWay', 'Corners Need Snow Removal']


In [28]:
#trim dataframe to match the list of relevant request types

Syr311_final = Syr311_final[(Syr311_final['complaint_type_name'].isin(relevant_requests))]

In [29]:
Syr311_final.info() #6869 rows

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6869 entries, 48614 to 3
Data columns (total 19 columns):
 #   Column                         Non-Null Count  Dtype              
---  ------                         --------------  -----              
 0   complaint_number               6869 non-null   object             
 1   open_date                      6869 non-null   datetime64[ns, UTC]
 2   close_date                     6869 non-null   object             
 3   complaint_type_name            6869 non-null   object             
 4   status_type_name               6869 non-null   object             
 5   department_name                6869 non-null   object             
 6   received_via_type_name         6869 non-null   object             
 7   identifier                     5898 non-null   object             
 8   number                         5898 non-null   object             
 9   address                        5898 non-null   object             
 10  zip                    

## Export

In [37]:
#Save as a csv
Syr311_final.to_csv('Syr311_final.csv', index = False, header=True)