In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
import json
import re

In [3]:
#Read in the original csv file
Pitt311 = pd.read_csv('../data/Pittsburg311 data.csv', low_memory=False)

In [None]:
Pitt311.info()

In [5]:
#convert times to datetimes
Pitt311['CREATED_ON'] = pd.to_datetime(Pitt311['CREATED_ON'])

In [6]:
#what is the date range covered in this current dataframe?
Pitt311 = Pitt311.sort_values(['CREATED_ON'],ascending=True)
#2/20/2015 - 5/22/2021

In [8]:
Pitt311.tail()

Unnamed: 0,REQUEST_ID,CREATED_ON,REQUEST_TYPE,REQUEST_ORIGIN,STATUS,DEPARTMENT,NEIGHBORHOOD,COUNCIL_DISTRICT,WARD,TRACT,PUBLIC_WORKS_DIVISION,PLI_DIVISION,POLICE_ZONE,FIRE_ZONE,X,Y,GEO_ACCURACY
508239,526016.0,2021-05-22 15:39:00,Replace/Repair a Sign,Website,0,DOMI - TrafficShop,Carrick,4.0,29.0,42003290000.0,3.0,29.0,3.0,4-23,-79.989618,40.39676,EXACT
508240,526017.0,2021-05-22 15:55:00,Parks Trails,Website,0,DPW - Park Maintenance,Squirrel Hill South,5.0,14.0,42003980000.0,3.0,14.0,4.0,2-8,-79.942605,40.431075,EXACT
508242,526019.0,2021-05-22 16:28:00,Illegal Parking,Website,0,Police - Zones 1-6,Friendship,9.0,8.0,42003080000.0,2.0,8.0,5.0,3-23,-79.933788,40.462582,APPROXIMATE
508241,526018.0,2021-05-22 16:28:00,CitiParks Programs,Website,0,Parks & Recs-Programs,Squirrel Hill North,8.0,14.0,42003140000.0,3.0,14.0,4.0,2-18,-79.923242,40.442697,APPROXIMATE
508243,526020.0,2021-05-22 17:09:00,Street Cleaning/Sweeping,Report2Gov iOS,0,DPW - Street Maintenance,Perry North,1.0,26.0,42003980000.0,1.0,26.0,1.0,1-15,-80.022789,40.486198,EXACT


In [9]:
#Periodical Cicadas are confirmed in Pittsburgh for Brood V (2016), but there are nearby confirmations 
#for Brood VIII (2019).  Including data around both years in this dataframe.
Pitt311trim = Pitt311[Pitt311['CREATED_ON'] > dt.datetime(2015,5,1)]
Pitt311trim = Pitt311trim[Pitt311trim['CREATED_ON'] < dt.datetime(2020,7,1)]

In [None]:
Pitt311trim.info()

# Isolating data to months of May and June for each year

In [10]:
#2015
#8462 entries pre-cleaned
Pitt3112015 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2015,5,1)]
Pitt3112015 = Pitt3112015[Pitt3112015['CREATED_ON'] < dt.datetime(2015,7,1)]

In [None]:
Pitt3112015.tail()

In [11]:
#2016 - cicada emergence year
#16795 entries pre-cleaned
Pitt3112016 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2016,5,1)]
Pitt3112016 = Pitt3112016[Pitt3112016['CREATED_ON'] < dt.datetime(2016,7,1)]

In [12]:
#2017
#17100 entries pre-cleaned
Pitt3112017 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2017,5,1)]
Pitt3112017 = Pitt3112017[Pitt3112017['CREATED_ON'] < dt.datetime(2017,7,1)]

In [13]:
#2018
#20982 entries pre-cleaned
Pitt3112018 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2018,5,1)]
Pitt3112018 = Pitt3112018[Pitt3112018['CREATED_ON'] < dt.datetime(2018,7,1)]

In [14]:
#2019
#19664 entries pre-cleaned
Pitt3112019 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2019,5,1)]
Pitt3112019 = Pitt3112019[Pitt3112019['CREATED_ON'] < dt.datetime(2019,7,1)]

In [15]:
#2020
#15991 entries pre-cleaned
Pitt3112020 = Pitt311trim[Pitt311trim['CREATED_ON'] > dt.datetime(2020,5,1)]
Pitt3112020 = Pitt3112020[Pitt3112020['CREATED_ON'] < dt.datetime(2020,7,1)]

In [16]:
#Bring them back to a single dataset
years = [Pitt3112015, Pitt3112016, Pitt3112017, Pitt3112018, Pitt3112019, Pitt3112020]
Pitt311_final = pd.concat(years)

## Getting relevant requests / types of calls

In [None]:
Pitt311_final['REQUEST_TYPE'].unique()

In [18]:
# What types of Phone Calls indicate bad behavior and/or an annoyed populace?
relevant_requests = ['Litter',
'Dumping (Private Property)/DO NOT USE',
'Dumping, Private Property',
'Refuse Violations',
'Graffiti, Removal',
'Graffiti, Documentation',
'Illegal Dumping',
'Dumpster (on Street)',
'Missed Pick Up',
'Abandoned Vehicle (parked on street)',
'Excessive Noise/Disturbances',
'Noise',
'Building Without a Permit',
'Barking Dog',
'Unpermitted Electrical Work',
'Weeds/Debris',
'Illegal Parking',
'Drug Enforcement',
'Junk Vehicles',
'Question',
'Broken Sidewalk',
'Replace/Repair a Sign',
'Street Light - Repair',
'Street Light - Request',
'Request New Sign',
'City Steps, Need Cleared',
'Electrical Violation',
'Loose Dog(s)',
'Potholes',
'Sidewalk Obstruction',
'Paving Request',
'Couch on Porch']

In [None]:
print(relevant_requests)

In [19]:
#trim dataframe to match the list of relevant request types

Pitt311_final = Pitt311[(Pitt311['REQUEST_TYPE'].isin(relevant_requests))]

In [None]:
Pitt311_final

## Adding a Date column and removing some extras

In [20]:
Pitt311_final['Date'] = [d.date() for d in Pitt311_final['CREATED_ON']]
Pitt311_final['Date'] = pd.to_datetime(Pitt311_final['Date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Pitt311_final['Date'] = [d.date() for d in Pitt311_final['CREATED_ON']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Pitt311_final['Date'] = pd.to_datetime(Pitt311_final['Date'])


In [21]:
Pitt311_final = Pitt311_final.drop(columns=['NEIGHBORHOOD', 'TRACT', 'PUBLIC_WORKS_DIVISION', 'PLI_DIVISION','POLICE_ZONE','WARD','X','Y','GEO_ACCURACY'])

In [23]:
Pitt311_final.tail()

Unnamed: 0,REQUEST_ID,CREATED_ON,REQUEST_TYPE,REQUEST_ORIGIN,STATUS,DEPARTMENT,COUNCIL_DISTRICT,FIRE_ZONE,Date
508225,526002.0,2021-05-22 12:11:00,Excessive Noise/Disturbances,Website,0,Police - Zones 1-6,9.0,3-23,2021-05-22
508226,526003.0,2021-05-22 12:27:00,Weeds/Debris,Website,0,"Permits, Licenses and Inspections",5.0,2-20,2021-05-22
508234,526011.0,2021-05-22 14:49:00,Potholes,Report2Gov iOS,0,DPW - Street Maintenance,7.0,3-15,2021-05-22
508239,526016.0,2021-05-22 15:39:00,Replace/Repair a Sign,Website,0,DOMI - TrafficShop,4.0,4-23,2021-05-22
508242,526019.0,2021-05-22 16:28:00,Illegal Parking,Website,0,Police - Zones 1-6,9.0,3-23,2021-05-22


## Export

In [24]:
#Save as a csv
Pitt311_final.to_csv('Pitt311_final.csv', index = False, header=True)