#### Dummy encoding Crime 2019 dataset

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
dfsr = pd.read_csv(r'C:\Documents\projects\HackLA\311\data\WorkedonData\SR_2019.csv', index_col=0)

In [3]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [4]:
dfsr.head(3)

Unnamed: 0,SRNumber,CreatedDate,UpdatedDate,NC_ID,RequestType,RequestSource,Address,ZipCode,PolicePrecinct
0,1-1262692791,01/01/2019 12:02:00 AM,01/04/2019 11:03:00 AM,119,Bulky Items,Self Service,"616 N GRAMERCY PL, 90004",90004,OLYMPIC
2,1-1262693571,01/01/2019 12:10:00 AM,01/03/2019 12:27:00 AM,113,Graffiti Removal,Self Service,"9167 N RESEDA BLVD, 91324",91324,DEVONSHIRE
3,1-1262692831,01/01/2019 12:19:00 AM,01/07/2019 09:43:00 AM,124,Illegal Dumping Pickup,Self Service,"8752 N YOLANDA AVE, 91324",91324,DEVONSHIRE


### Explore which columns to make dummies and clean them up

#### Drop SRNumber, CreatedDate, & Address so can groupby into less rows and dummy without a huge number of variables

In [5]:
dfsr.drop(['SRNumber', 'UpdatedDate', 'Address'], axis=1, inplace=True)

In [6]:
dfsr.head(2)

Unnamed: 0,CreatedDate,NC_ID,RequestType,RequestSource,ZipCode,PolicePrecinct
0,01/01/2019 12:02:00 AM,119,Bulky Items,Self Service,90004,OLYMPIC
2,01/01/2019 12:10:00 AM,113,Graffiti Removal,Self Service,91324,DEVONSHIRE


In [7]:
dfsr['CreatedDate'] = pd.to_datetime(dfsr['CreatedDate']).dt.normalize()

In [8]:
dfsr.dtypes

CreatedDate       datetime64[ns]
NC_ID                      int64
RequestType               object
RequestSource             object
ZipCode                    int64
PolicePrecinct            object
dtype: object

In [9]:
dfsr['CreatedDate'] = dfsr['CreatedDate'].dt.month

In [10]:
dfsr.head(2)

Unnamed: 0,CreatedDate,NC_ID,RequestType,RequestSource,ZipCode,PolicePrecinct
0,1,119,Bulky Items,Self Service,90004,OLYMPIC
2,1,113,Graffiti Removal,Self Service,91324,DEVONSHIRE


In [11]:

dfsr.RequestType.value_counts()

Bulky Items                   591007
Graffiti Removal              320761
Illegal Dumping Pickup        120932
Metal/Household Appliances    101799
Homeless Encampment            54922
Electronic Waste               38056
Dead Animal Removal            25059
Other                          17620
Single Streetlight Issue       11908
Multiple Streetlight Issue      7892
Report Water Waste              1044
Feedback                         644
Name: RequestType, dtype: int64

In [12]:
dfsr.RequestSource.value_counts()

Call                             601086
Mobile App                       318003
Driver Self Report               198142
Self Service                     168679
Email                              4295
Council's Office                    841
Voicemail                           316
Twitter                             108
Walk-in                              79
Web Form                             26
Fax                                  25
Mayor's Office                       25
Queue Initiated Customer Call        10
City Attorney                         4
Letter                                2
Radio                                 1
TTY/ NexTalk                          1
Social                                1
Name: RequestSource, dtype: int64

#### drop rows with frequency less than 10

In [13]:
dfsr = dfsr[dfsr.groupby('RequestSource')['RequestSource'].transform('size') > 9]

In [14]:
dfsr.RequestSource.value_counts()

Call                             601086
Mobile App                       318003
Driver Self Report               198142
Self Service                     168679
Email                              4295
Council's Office                    841
Voicemail                           316
Twitter                             108
Walk-in                              79
Web Form                             26
Fax                                  25
Mayor's Office                       25
Queue Initiated Customer Call        10
Name: RequestSource, dtype: int64

In [15]:
dfsr.head(2)

Unnamed: 0,CreatedDate,NC_ID,RequestType,RequestSource,ZipCode,PolicePrecinct
0,1,119,Bulky Items,Self Service,90004,OLYMPIC
2,1,113,Graffiti Removal,Self Service,91324,DEVONSHIRE


### Although NC_ID & ZipCode aren't columns which will be merged with Service Request dataset, don't want to dummify because they are identifiers, not features.  
#### The 1s & 0s are summed.  

In [37]:
dfsr_d1 = pd.concat([dfsr, pd.get_dummies(dfsr, columns=['RequestType', 'RequestSource'])]).groupby(['CreatedDate','PolicePrecinct','NC_ID','ZipCode']).sum().reset_index()
dfsr_d1.head()

  dfsr_d1 = pd.concat([dfsr, pd.get_dummies(dfsr, columns=['RequestType', 'RequestSource'])]).groupby(['CreatedDate','PolicePrecinct','NC_ID','ZipCode']).sum().reset_index()


Unnamed: 0,CreatedDate,PolicePrecinct,NC_ID,ZipCode,RequestType_Bulky Items,RequestType_Dead Animal Removal,RequestType_Electronic Waste,RequestType_Feedback,RequestType_Graffiti Removal,RequestType_Homeless Encampment,RequestType_Illegal Dumping Pickup,RequestType_Metal/Household Appliances,RequestType_Multiple Streetlight Issue,RequestType_Other,RequestType_Report Water Waste,RequestType_Single Streetlight Issue,RequestSource_Call,RequestSource_Council's Office,RequestSource_Driver Self Report,RequestSource_Email,RequestSource_Fax,RequestSource_Mayor's Office,RequestSource_Mobile App,RequestSource_Queue Initiated Customer Call,RequestSource_Self Service,RequestSource_Twitter,RequestSource_Voicemail,RequestSource_Walk-in,RequestSource_Web Form
0,1,77TH STREET,80,90043,478.0,19.0,36.0,0.0,28.0,3.0,68.0,91.0,0.0,10.0,0.0,10.0,590.0,0.0,5.0,1.0,0.0,0.0,91.0,0.0,56.0,0.0,0.0,0.0,0.0
1,1,77TH STREET,80,90047,12.0,0.0,1.0,0.0,1.0,1.0,3.0,3.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,77TH STREET,80,90056,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,77TH STREET,80,90062,0.0,0.0,0.0,0.0,1.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0
4,1,77TH STREET,80,90305,0.0,1.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
dfsr_d1.shape

(5943, 29)

In [43]:
dfsr_d1.to_csv(r'C:\Documents\projects\HackLA\311\data\WorkedonData\sr_2019_grpby_dummies2.csv')