In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
import json
import seaborn as sns
import re

In [2]:
#Read in the original csv file
Pitt311 = pd.read_csv('../data/Pittsburg311 data.csv', low_memory=False)

In [None]:
Pitt311.info()

In [3]:
#convert times to datetimes
Pitt311['CREATED_ON'] = pd.to_datetime(Pitt311['CREATED_ON'])

In [4]:
#what is the date range covered in this current dataframe?
Pitt311 = Pitt311.sort_values(['CREATED_ON'],ascending=True)
#2/20/2015 - 5/22/2021

In [None]:
Pitt311.head()

In [5]:
#Periodical Cicadas are confirmed in Pittsburgh for Brood V (2016), but there are nearby confirmations 
#for Brood VIII (2019).  Including data around both years in this dataframe.
Pitt311trim = Pitt311[Pitt311['CREATED_ON'] > dt.datetime(2015,5,1)]
Pitt311trim = Pitt311trim[Pitt311trim['CREATED_ON'] < dt.datetime(2020,7,1)]

In [None]:
Pitt311trim.info()

# Isolating data to months of May and June for each year

In [11]:
#2015
#8462 entries pre-cleaned
Pitt3112015 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2015,5,1)]
Pitt3112015 = Pitt3112015[Pitt3112015['CREATED_ON'] < dt.datetime(2015,7,1)]

In [None]:
Pitt3112015.tail()

In [12]:
#2016 - cicada emergence year
#16795 entries pre-cleaned
Pitt3112016 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2016,5,1)]
Pitt3112016 = Pitt3112016[Pitt3112016['CREATED_ON'] < dt.datetime(2016,7,1)]

In [13]:
#2017
#17100 entries pre-cleaned
Pitt3112017 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2017,5,1)]
Pitt3112017 = Pitt3112017[Pitt3112017['CREATED_ON'] < dt.datetime(2017,7,1)]

In [14]:
#2018
#20982 entries pre-cleaned
Pitt3112018 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2018,5,1)]
Pitt3112018 = Pitt3112018[Pitt3112018['CREATED_ON'] < dt.datetime(2018,7,1)]

In [15]:
#2019
#19664 entries pre-cleaned
Pitt3112019 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2019,5,1)]
Pitt3112019 = Pitt3112019[Pitt3112019['CREATED_ON'] < dt.datetime(2019,7,1)]

In [16]:
#2020
#15991 entries pre-cleaned
Pitt3112020 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2020,5,1)]
Pitt3112020 = Pitt3112020[Pitt3112020['CREATED_ON'] < dt.datetime(2020,7,1)]

In [17]:
#Bring them back to a single dataset
years = [Pitt3112015, Pitt3112016, Pitt3112017, Pitt3112018, Pitt3112019, Pitt3112020]
Pitt311_final = pd.concat(years)

## Getting relevant requests / types of calls

In [None]:
Pitt311_final['REQUEST_TYPE'].unique()

In [6]:
# What types of Phone Calls indicate bad behavior and/or an annoyed populace?
relevant_requests = ['Litter',
'Dumping (Private Property)/DO NOT USE',
'Dumping, Private Property',
'Refuse Violations',
'Graffiti, Removal',
'Graffiti, Documentation',
'Illegal Dumping',
'Dumpster (on Street)',
'Missed Pick Up',
'Abandoned Vehicle (parked on street)',
'Excessive Noise/Disturbances',
'Noise',
'Building Without a Permit',
'Barking Dog',
'Unpermitted Electrical Work',
'Weeds/Debris',
'Illegal Parking',
'Drug Enforcement',
'Junk Vehicles',
'Question',
'Broken Sidewalk',
'Replace/Repair a Sign',
'Street Light - Repair',
'Street Light - Request',
'Request New Sign',
'City Steps, Need Cleared',
'Electrical Violation',
'Loose Dog(s)',
'Potholes',
'Sidewalk Obstruction',
'Paving Request',
'Couch on Porch']

In [None]:
print(relevant_requests)

In [18]:
#trim dataframe to match the list of relevant request types
#trim dataframe to match the list of indivents/event types

Pitt311_final = Pitt311[(Pitt311['REQUEST_TYPE'].isin(relevant_requests))]

In [19]:
Pitt311_final

Unnamed: 0,REQUEST_ID,CREATED_ON,REQUEST_TYPE,REQUEST_ORIGIN,STATUS,DEPARTMENT,NEIGHBORHOOD,COUNCIL_DISTRICT,WARD,TRACT,PUBLIC_WORKS_DIVISION,PLI_DIVISION,POLICE_ZONE,FIRE_ZONE,X,Y,GEO_ACCURACY
186866,538.0,2015-02-20 09:53:00,Barking Dog,Call Center,1,Animal Care & Control,Chateau,6.0,21.0,4.200398e+10,1.0,21.0,1.0,1-8,-80.030956,40.453466,APPROXIMATE
191007,836.0,2015-04-20 07:37:00,Replace/Repair a Sign,Call Center,1,DOMI - TrafficShop,Central Lawrenceville,7.0,9.0,4.200309e+10,2.0,9.0,2.0,3-3,-79.961318,40.474617,EXACT
150009,838.0,2015-04-20 07:40:00,Litter,Call Center,1,DPW - Street Maintenance,Allentown,3.0,18.0,4.200318e+10,5.0,18.0,3.0,4-5,-79.996103,40.424091,EXACT
189977,839.0,2015-04-20 07:41:00,Potholes,Call Center,1,DPW - Street Maintenance,North Oakland,8.0,4.0,4.200304e+10,3.0,4.0,4.0,2-9,-79.949757,40.450091,EXACT
51091,840.0,2015-04-20 07:46:00,Litter,Call Center,1,DPW - Street Maintenance,East Hills,9.0,13.0,4.200313e+10,2.0,13.0,5.0,3-17,-79.884480,40.455174,EXACT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
508225,526002.0,2021-05-22 12:11:00,Excessive Noise/Disturbances,Website,0,Police - Zones 1-6,East Liberty,9.0,8.0,4.200311e+10,2.0,8.0,5.0,3-23,-79.930262,40.461270,APPROXIMATE
508226,526003.0,2021-05-22 12:27:00,Weeds/Debris,Website,0,"Permits, Licenses and Inspections",Squirrel Hill South,5.0,14.0,4.200314e+10,3.0,14.0,4.0,2-20,-79.914701,40.424822,EXACT
508234,526011.0,2021-05-22 14:49:00,Potholes,Report2Gov iOS,0,DPW - Street Maintenance,Highland Park,7.0,11.0,4.200398e+10,2.0,11.0,5.0,3-15,-79.910690,40.485622,EXACT
508239,526016.0,2021-05-22 15:39:00,Replace/Repair a Sign,Website,0,DOMI - TrafficShop,Carrick,4.0,29.0,4.200329e+10,3.0,29.0,3.0,4-23,-79.989618,40.396760,EXACT


## Export

In [20]:
#Save as a csv
Pitt311_final.to_csv('Pitt311_final.csv', index = False, header=True)