#### Dummy encoding Crime 2019 dataset

In [31]:
import pandas as pd
import numpy as np
import datetime as dt

In [32]:
dfsr = pd.read_csv(r'C:\Documents\projects\HackLA\311\data\WorkedonData\SR_2019.csv', index_col=0)

In [33]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [34]:
dfsr.head(3)

Unnamed: 0,SRNumber,CreatedDate,UpdatedDate,NC_ID,RequestType,RequestSource,Address,ZipCode,PolicePrecinct
0,1-1262692791,01/01/2019 12:02:00 AM,01/04/2019 11:03:00 AM,119,Bulky Items,Self Service,"616 N GRAMERCY PL, 90004",90004,OLYMPIC
2,1-1262693571,01/01/2019 12:10:00 AM,01/03/2019 12:27:00 AM,113,Graffiti Removal,Self Service,"9167 N RESEDA BLVD, 91324",91324,DEVONSHIRE
3,1-1262692831,01/01/2019 12:19:00 AM,01/07/2019 09:43:00 AM,124,Illegal Dumping Pickup,Self Service,"8752 N YOLANDA AVE, 91324",91324,DEVONSHIRE


### Explore which columns to make dummies and clean them up

#### Drop SRNumber, CreatedDate, & Address so can groupby into less rows and dummy without a huge number of variables

In [35]:
dfsr.drop(['SRNumber', 'UpdatedDate', 'Address'], axis=1, inplace=True)

In [36]:
dfsr.head(2)

Unnamed: 0,CreatedDate,NC_ID,RequestType,RequestSource,ZipCode,PolicePrecinct
0,01/01/2019 12:02:00 AM,119,Bulky Items,Self Service,90004,OLYMPIC
2,01/01/2019 12:10:00 AM,113,Graffiti Removal,Self Service,91324,DEVONSHIRE


In [37]:
dfsr['CreatedDate'] = pd.to_datetime(dfsr['CreatedDate']).dt.normalize()

In [38]:
dfsr.dtypes

CreatedDate       datetime64[ns]
NC_ID                      int64
RequestType               object
RequestSource             object
ZipCode                    int64
PolicePrecinct            object
dtype: object

In [39]:
dfsr['CreatedDate'] = dfsr['CreatedDate'].dt.month

In [40]:
dfsr.head(2)

Unnamed: 0,CreatedDate,NC_ID,RequestType,RequestSource,ZipCode,PolicePrecinct
0,1,119,Bulky Items,Self Service,90004,OLYMPIC
2,1,113,Graffiti Removal,Self Service,91324,DEVONSHIRE


In [41]:

dfsr.RequestType.value_counts()

Bulky Items                   591007
Graffiti Removal              320761
Illegal Dumping Pickup        120932
Metal/Household Appliances    101799
Homeless Encampment            54922
Electronic Waste               38056
Dead Animal Removal            25059
Other                          17620
Single Streetlight Issue       11908
Multiple Streetlight Issue      7892
Report Water Waste              1044
Feedback                         644
Name: RequestType, dtype: int64

In [42]:
dfsr.RequestSource.value_counts()

Call                             601086
Mobile App                       318003
Driver Self Report               198142
Self Service                     168679
Email                              4295
Council's Office                    841
Voicemail                           316
Twitter                             108
Walk-in                              79
Web Form                             26
Fax                                  25
Mayor's Office                       25
Queue Initiated Customer Call        10
City Attorney                         4
Letter                                2
Radio                                 1
TTY/ NexTalk                          1
Social                                1
Name: RequestSource, dtype: int64

#### drop rows with frequency less than 10

In [43]:
dfsr = dfsr[dfsr.groupby('RequestSource')['RequestSource'].transform('size') > 9]

In [44]:
dfsr.RequestSource.value_counts()

Call                             601086
Mobile App                       318003
Driver Self Report               198142
Self Service                     168679
Email                              4295
Council's Office                    841
Voicemail                           316
Twitter                             108
Walk-in                              79
Web Form                             26
Fax                                  25
Mayor's Office                       25
Queue Initiated Customer Call        10
Name: RequestSource, dtype: int64

In [45]:
dfsr.head(2)

Unnamed: 0,CreatedDate,NC_ID,RequestType,RequestSource,ZipCode,PolicePrecinct
0,1,119,Bulky Items,Self Service,90004,OLYMPIC
2,1,113,Graffiti Removal,Self Service,91324,DEVONSHIRE


In [46]:
dfsr_d = pd.get_dummies(dfsr, columns=['NC_ID', 'RequestType', 'RequestSource', 'ZipCode']).groupby(['CreatedDate', 'PolicePrecinct'], as_index=False).sum()

dfsr_d.head()

Unnamed: 0,CreatedDate,PolicePrecinct,NC_ID_4,NC_ID_5,NC_ID_6,NC_ID_7,NC_ID_8,NC_ID_9,NC_ID_10,NC_ID_11,NC_ID_13,NC_ID_14,NC_ID_15,NC_ID_16,NC_ID_17,NC_ID_18,NC_ID_19,NC_ID_20,NC_ID_21,NC_ID_22,NC_ID_23,NC_ID_24,NC_ID_25,NC_ID_26,NC_ID_27,NC_ID_28,NC_ID_29,NC_ID_30,NC_ID_32,NC_ID_33,NC_ID_34,NC_ID_36,NC_ID_37,NC_ID_38,NC_ID_39,NC_ID_40,NC_ID_41,NC_ID_42,NC_ID_43,NC_ID_44,NC_ID_46,NC_ID_47,NC_ID_48,NC_ID_50,NC_ID_52,NC_ID_53,NC_ID_54,NC_ID_55,NC_ID_58,NC_ID_60,NC_ID_61,NC_ID_62,NC_ID_63,NC_ID_64,NC_ID_66,NC_ID_67,NC_ID_68,NC_ID_70,NC_ID_71,NC_ID_73,NC_ID_74,NC_ID_75,NC_ID_76,NC_ID_77,NC_ID_78,NC_ID_79,NC_ID_80,NC_ID_81,NC_ID_84,NC_ID_86,NC_ID_87,NC_ID_88,NC_ID_90,NC_ID_91,NC_ID_92,NC_ID_93,NC_ID_94,NC_ID_95,NC_ID_96,NC_ID_97,NC_ID_99,NC_ID_100,NC_ID_101,NC_ID_102,NC_ID_104,NC_ID_109,NC_ID_110,NC_ID_111,NC_ID_112,NC_ID_113,NC_ID_114,NC_ID_115,NC_ID_117,NC_ID_118,NC_ID_119,NC_ID_120,NC_ID_121,NC_ID_122,NC_ID_124,NC_ID_125,NC_ID_126,NC_ID_127,NC_ID_128,RequestType_Bulky Items,RequestType_Dead Animal Removal,RequestType_Electronic Waste,RequestType_Feedback,RequestType_Graffiti Removal,RequestType_Homeless Encampment,RequestType_Illegal Dumping Pickup,RequestType_Metal/Household Appliances,RequestType_Multiple Streetlight Issue,RequestType_Other,RequestType_Report Water Waste,RequestType_Single Streetlight Issue,RequestSource_Call,RequestSource_Council's Office,RequestSource_Driver Self Report,RequestSource_Email,RequestSource_Fax,RequestSource_Mayor's Office,RequestSource_Mobile App,RequestSource_Queue Initiated Customer Call,RequestSource_Self Service,RequestSource_Twitter,RequestSource_Voicemail,RequestSource_Walk-in,RequestSource_Web Form,ZipCode_0,ZipCode_90001,ZipCode_90002,ZipCode_90003,ZipCode_90004,ZipCode_90005,ZipCode_90006,ZipCode_90007,ZipCode_90008,ZipCode_90010,ZipCode_90011,ZipCode_90012,ZipCode_90013,ZipCode_90014,ZipCode_90015,ZipCode_90016,ZipCode_90017,ZipCode_90018,ZipCode_90019,ZipCode_90020,ZipCode_90021,ZipCode_90023,ZipCode_90024,ZipCode_90025,ZipCode_90026,ZipCode_90027,ZipCode_90028,ZipCode_90029,ZipCode_90031,ZipCode_90032,ZipCode_90033,ZipCode_90034,ZipCode_90035,ZipCode_90036,ZipCode_90037,ZipCode_90038,ZipCode_90039,ZipCode_90041,ZipCode_90042,ZipCode_90043,ZipCode_90044,ZipCode_90045,ZipCode_90046,ZipCode_90047,ZipCode_90048,ZipCode_90049,ZipCode_90056,ZipCode_90057,ZipCode_90058,ZipCode_90059,ZipCode_90061,ZipCode_90062,ZipCode_90063,ZipCode_90064,ZipCode_90065,ZipCode_90066,ZipCode_90067,ZipCode_90068,ZipCode_90069,ZipCode_90071,ZipCode_90077,ZipCode_90089,ZipCode_90094,ZipCode_90095,ZipCode_90210,ZipCode_90211,ZipCode_90212,ZipCode_90230,ZipCode_90232,ZipCode_90245,ZipCode_90247,ZipCode_90248,ZipCode_90272,ZipCode_90275,ZipCode_90290,ZipCode_90291,ZipCode_90292,ZipCode_90293,ZipCode_90301,ZipCode_90302,ZipCode_90304,ZipCode_90305,ZipCode_90402,ZipCode_90404,ZipCode_90405,ZipCode_90501,ZipCode_90502,ZipCode_90504,ZipCode_90710,ZipCode_90717,ZipCode_90731,ZipCode_90732,ZipCode_90744,ZipCode_90745,ZipCode_90810,ZipCode_91030,ZipCode_91040,ZipCode_91042,ZipCode_91105,ZipCode_91201,ZipCode_91202,ZipCode_91204,ZipCode_91214,ZipCode_91236,ZipCode_91302,ZipCode_91303,ZipCode_91304,ZipCode_91306,ZipCode_91307,ZipCode_91311,ZipCode_91316,ZipCode_91324,ZipCode_91325,ZipCode_91326,ZipCode_91330,ZipCode_91331,ZipCode_91335,ZipCode_91340,ZipCode_91342,ZipCode_91343,ZipCode_91344,ZipCode_91345,ZipCode_91352,ZipCode_91356,ZipCode_91364,ZipCode_91367,ZipCode_91401,ZipCode_91402,ZipCode_91403,ZipCode_91405,ZipCode_91406,ZipCode_91411,ZipCode_91423,ZipCode_91436,ZipCode_91504,ZipCode_91505,ZipCode_91506,ZipCode_91601,ZipCode_91602,ZipCode_91604,ZipCode_91605,ZipCode_91606,ZipCode_91607,ZipCode_91608,ZipCode_91803,ZipCode_92605
0,1,77TH STREET,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,778,1153,876,634,506,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2372,130,175,0,1098,96,805,335,38,63,0,35,3241,0,863,8,0,0,684,0,351,0,0,0,0,0,222,0,652,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,902,0,0,0,0,751,997,0,0,1166,0,0,4,0,0,0,0,445,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,CENTRAL,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,47,815,0,0,0,1010,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,317,11,21,2,991,161,119,31,47,41,1,130,432,0,434,11,0,0,721,0,273,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,853,251,185,370,0,77,0,0,0,44,0,0,0,57,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,DEVONSHIRE,527,2,0,0,0,0,0,0,72,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,856,0,0,0,0,0,0,520,0,458,388,0,0,577,0,531,0,0,603,0,0,0,0,2625,150,213,2,529,112,400,530,51,55,1,44,2924,6,169,15,1,0,785,0,810,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,76,162,0,866,0,594,659,645,0,0,0,0,2,617,1091,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,FOOTHILL,0,0,48,1557,1185,436,1042,0,0,0,0,0,0,0,0,0,0,49,414,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2486,110,232,2,664,194,470,405,44,86,0,38,2472,29,209,11,0,0,1399,0,610,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,508,626,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,1496,0,113,232,0,0,0,1231,0,0,0,0,0,0,0,0,0,0,0,52,10,0,0,0,0,458,0,0,0,0,0
4,1,HARBOR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,819,474,1624,452,877,631,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2200,114,193,1,1274,107,473,402,18,67,2,26,2521,1,1073,5,0,0,820,0,456,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,65,0,0,0,0,0,0,0,0,0,0,0,0,0,747,8,0,450,25,1565,393,1615,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [47]:
dfsr_d.shape

(252, 274)

In [48]:
dfsr_d.to_csv(r'C:\Documents\projects\HackLA\311\data\WorkedonData\sr_2019_grpby_dummies.csv')