# Module 3 Final Project:

-  Student Names: Kenny Oh & Moses Lin
-  Student Pace: Full Time
-  Schedule Project Review Date/Time: 8/21/2020
-  Instructor Name: Sean Abu Wilson

Dataset from: https://catalog.data.gov/dataset/terry-stops

# Importing Data and Looking at Dataset

In [1]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import sqlite3 
import matplotlib.pyplot as plt
import itertools
import warnings; warnings.simplefilter('ignore')

from pandas import set_option
plt.style.use('ggplot')
pd.set_option('display.max_columns', 1000)
%matplotlib inline 

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import TomekLinks
from collections import Counter
from IPython.display import Image  
import pydotplus

from scipy.stats import randint

from sklearn import metrics

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import accuracy_score, f1_score, recall_score
from sklearn.metrics import make_scorer, accuracy_score 
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc

from sklearn.neighbors import KNeighborsClassifier

from sklearn.tree import DecisionTreeClassifier 
from sklearn.tree import export_graphviz

from sklearn.externals.six import StringIO  

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV  
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import KFold 

from sklearn.linear_model import LogisticRegression 

from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectFromModel

from sklearn.pipeline import Pipeline 

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

from sklearn.utils import resample

In [2]:
df = pd.read_csv('Terry_Stops.csv')
df.head(3)

Unnamed: 0,Subject Age Group,Subject ID,GO / SC Num,Terry Stop ID,Stop Resolution,Weapon Type,Officer ID,Officer YOB,Officer Gender,Officer Race,Subject Perceived Race,Subject Perceived Gender,Reported Date,Reported Time,Initial Call Type,Final Call Type,Call Type,Officer Squad,Arrest Flag,Frisk Flag,Precinct,Sector,Beat
0,-,-1,20140000120677,92317,Arrest,,7500,1984,M,Black or African American,Asian,Male,2015-10-16T00:00:00,11:32:00,-,-,-,SOUTH PCT 1ST W - ROBERT,N,N,South,O,O2
1,-,-1,20150000001463,28806,Field Contact,,5670,1965,M,White,-,-,2015-03-19T00:00:00,07:59:00,-,-,-,,N,N,-,-,-
2,-,-1,20150000001516,29599,Field Contact,,4844,1961,M,White,White,Male,2015-03-21T00:00:00,19:12:00,-,-,-,,N,-,-,-,-


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43496 entries, 0 to 43495
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Subject Age Group         43496 non-null  object
 1   Subject ID                43496 non-null  int64 
 2   GO / SC Num               43496 non-null  int64 
 3   Terry Stop ID             43496 non-null  int64 
 4   Stop Resolution           43496 non-null  object
 5   Weapon Type               43496 non-null  object
 6   Officer ID                43496 non-null  object
 7   Officer YOB               43496 non-null  int64 
 8   Officer Gender            43496 non-null  object
 9   Officer Race              43496 non-null  object
 10  Subject Perceived Race    43496 non-null  object
 11  Subject Perceived Gender  43496 non-null  object
 12  Reported Date             43496 non-null  object
 13  Reported Time             43496 non-null  object
 14  Initial Call Type     

# Cleaning Dataset

In [4]:
# Dropping columns that provide no real information to us, or is too difficult to work with.

# Subject ID, GO/SC Num, Terry Stop ID, Officer ID are not relevant at all
# Call Types has too many null values
# Initial Call Type and Final Call Type have too many different values.
# Officer Squad, Precinct, Sector, Beat are too specific and is similar to ID in not being too relevant

df.drop(columns=['Subject ID', 'GO / SC Num', 'Terry Stop ID', 'Officer ID', 'Call Type', 'Initial Call Type', 'Final Call Type', 'Officer Squad', 'Precinct', 'Sector', 'Beat'], inplace=True)
df

Unnamed: 0,Subject Age Group,Stop Resolution,Weapon Type,Officer YOB,Officer Gender,Officer Race,Subject Perceived Race,Subject Perceived Gender,Reported Date,Reported Time,Arrest Flag,Frisk Flag
0,-,Arrest,,1984,M,Black or African American,Asian,Male,2015-10-16T00:00:00,11:32:00,N,N
1,-,Field Contact,,1965,M,White,-,-,2015-03-19T00:00:00,07:59:00,N,N
2,-,Field Contact,,1961,M,White,White,Male,2015-03-21T00:00:00,19:12:00,N,-
3,-,Field Contact,,1963,M,White,-,-,2015-04-01T00:00:00,04:55:00,N,N
4,-,Field Contact,,1977,M,White,Black or African American,Male,2015-04-03T00:00:00,00:41:00,N,N
...,...,...,...,...,...,...,...,...,...,...,...,...
43491,56 and Above,Arrest,-,1992,M,White,White,Male,2020-07-12T00:00:00,08:20:33,Y,N
43492,56 and Above,Field Contact,-,1995,F,Hispanic or Latino,White,Male,2020-07-21T00:00:00,00:41:13,N,N
43493,56 and Above,Arrest,Firearm,1994,M,Asian,Black or African American,Male,2020-07-26T00:00:00,20:37:41,Y,Y
43494,56 and Above,Field Contact,Knife/Cutting/Stabbing Instrument,1983,M,Two or More Races,Black or African American,Male,2020-08-04T00:00:00,23:07:39,N,Y


In [5]:
# Renaming columns so that they will be easier to work with.

df.columns = ['subject_age_group', 'stop_resolution', 'weapon_type', 'officer_yob', 'officer_gender', 'officer_race', 'subject_perceived_race', 'subject_perceived_gender','reported_date', 'reported_time', 'arrest_flag', 'frisk_flag']
df

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_yob,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,reported_date,reported_time,arrest_flag,frisk_flag
0,-,Arrest,,1984,M,Black or African American,Asian,Male,2015-10-16T00:00:00,11:32:00,N,N
1,-,Field Contact,,1965,M,White,-,-,2015-03-19T00:00:00,07:59:00,N,N
2,-,Field Contact,,1961,M,White,White,Male,2015-03-21T00:00:00,19:12:00,N,-
3,-,Field Contact,,1963,M,White,-,-,2015-04-01T00:00:00,04:55:00,N,N
4,-,Field Contact,,1977,M,White,Black or African American,Male,2015-04-03T00:00:00,00:41:00,N,N
...,...,...,...,...,...,...,...,...,...,...,...,...
43491,56 and Above,Arrest,-,1992,M,White,White,Male,2020-07-12T00:00:00,08:20:33,Y,N
43492,56 and Above,Field Contact,-,1995,F,Hispanic or Latino,White,Male,2020-07-21T00:00:00,00:41:13,N,N
43493,56 and Above,Arrest,Firearm,1994,M,Asian,Black or African American,Male,2020-07-26T00:00:00,20:37:41,Y,Y
43494,56 and Above,Field Contact,Knife/Cutting/Stabbing Instrument,1983,M,Two or More Races,Black or African American,Male,2020-08-04T00:00:00,23:07:39,N,Y


In [20]:
# Seeing what values we have for each column to work with.

#df['subject_age_group'].value_counts()
#df['stop_resolution'].value_counts()
#df['weapon_type'].value_counts()
#df['officer_yob'].value_counts()
#df['officer_race'].value_counts()
#df['subject_perceived_race'].value_counts()
#df['subject_perceived_gender'].value_counts()
#df['arrest_flag'].value_counts()
#df['frisk_flag'].value_counts()

White                                        17051
Black or African American                    10309
Hispanic                                      1639
Unknown                                       1418
American Indian or Alaska Native              1068
Asian                                         1022
Multi-Racial                                   787
Other                                          146
Native Hawaiian or Other Pacific Islander        1
Name: subject_perceived_race, dtype: int64

In [7]:
# 1371 Entries without an age group. Will remove.

df = df[df.subject_age_group != '-']
df

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_yob,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,reported_date,reported_time,arrest_flag,frisk_flag
128,36 - 45,Offense Report,,1985,M,White,Unknown,Male,2018-09-23T00:00:00,18:35:00,N,N
130,56 and Above,Field Contact,-,1995,M,White,White,Male,2019-12-11T00:00:00,01:25:31,N,N
175,36 - 45,Offense Report,,1992,M,White,White,Female,2018-09-24T00:00:00,03:21:00,N,N
200,56 and Above,Arrest,-,1989,M,White,Asian,Male,2019-12-21T00:00:00,21:02:58,Y,Y
703,56 and Above,Field Contact,-,1972,M,White,White,Male,2019-12-25T00:00:00,11:42:08,N,Y
...,...,...,...,...,...,...,...,...,...,...,...,...
43491,56 and Above,Arrest,-,1992,M,White,White,Male,2020-07-12T00:00:00,08:20:33,Y,N
43492,56 and Above,Field Contact,-,1995,F,Hispanic or Latino,White,Male,2020-07-21T00:00:00,00:41:13,N,N
43493,56 and Above,Arrest,Firearm,1994,M,Asian,Black or African American,Male,2020-07-26T00:00:00,20:37:41,Y,Y
43494,56 and Above,Field Contact,Knife/Cutting/Stabbing Instrument,1983,M,Two or More Races,Black or African American,Male,2020-08-04T00:00:00,23:07:39,N,Y


In [8]:
# Will lose ~8091 entries here, but there was nothing recorded. Have to remove.

df = df[df.weapon_type != '-']
df

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_yob,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,reported_date,reported_time,arrest_flag,frisk_flag
128,36 - 45,Offense Report,,1985,M,White,Unknown,Male,2018-09-23T00:00:00,18:35:00,N,N
175,36 - 45,Offense Report,,1992,M,White,White,Female,2018-09-24T00:00:00,03:21:00,N,N
1376,1 - 17,Arrest,,1992,M,White,American Indian or Alaska Native,Male,2015-09-13T00:00:00,00:31:00,N,Y
1377,1 - 17,Field Contact,,1988,F,Not Specified,Unknown,Male,2015-04-19T00:00:00,03:02:00,N,Y
1378,1 - 17,Field Contact,,1977,M,White,Black or African American,Male,2015-04-19T00:00:00,05:51:00,N,-
...,...,...,...,...,...,...,...,...,...,...,...,...
43475,56 and Above,Arrest,Knife/Cutting/Stabbing Instrument,1989,M,Two or More Races,White,Male,2020-04-09T00:00:00,10:55:21,Y,Y
43478,56 and Above,Arrest,Knife/Cutting/Stabbing Instrument,1976,M,White,White,Male,2020-04-23T00:00:00,00:42:59,Y,Y
43489,56 and Above,Field Contact,Knife/Cutting/Stabbing Instrument,1988,M,Black or African American,Black or African American,Male,2020-07-09T00:00:00,19:11:09,N,Y
43493,56 and Above,Arrest,Firearm,1994,M,Asian,Black or African American,Male,2020-07-26T00:00:00,20:37:41,Y,Y


In [9]:
# Will classify weapons into more manageable categories, as some are redundant.

df['weapon_type'] = df['weapon_type'].replace(['Handgun', 'Firearm Other', 'Firearm (unk type)', 'Other Firearm', 'Rifle', 'Shotgun', 'Automatic Handgun'], 'Firearm')
df['weapon_type'] = df['weapon_type'].replace(['Lethal Cutting Instrument', 'Knife/Cutting/Stabbing Instrument', 'Blunt Object/Striking Implement', 'Club, Blackjack, Brass Knuckles', 'Club', 'Blackjack', 'Brass Knuckles'], 'Melee Weapon')
df['weapon_type'] = df['weapon_type'].replace(['None/Not Applicable'], 'None')
df['weapon_type'] = df['weapon_type'].replace(['Mace/Pepper Spray', 'Taser/Stun Gun', 'Fire/Incendiary Device'], 'Other')
df['weapon_type'].value_counts()

None            31607
Melee Weapon     1990
Firearm           415
Other              22
Name: weapon_type, dtype: int64

In [10]:
# Strange to not know own officer's race. Will remove 7 entries.

df = df[df.officer_race != 'Unknown']
df

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_yob,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,reported_date,reported_time,arrest_flag,frisk_flag
128,36 - 45,Offense Report,,1985,M,White,Unknown,Male,2018-09-23T00:00:00,18:35:00,N,N
175,36 - 45,Offense Report,,1992,M,White,White,Female,2018-09-24T00:00:00,03:21:00,N,N
1376,1 - 17,Arrest,,1992,M,White,American Indian or Alaska Native,Male,2015-09-13T00:00:00,00:31:00,N,Y
1377,1 - 17,Field Contact,,1988,F,Not Specified,Unknown,Male,2015-04-19T00:00:00,03:02:00,N,Y
1378,1 - 17,Field Contact,,1977,M,White,Black or African American,Male,2015-04-19T00:00:00,05:51:00,N,-
...,...,...,...,...,...,...,...,...,...,...,...,...
43475,56 and Above,Arrest,Melee Weapon,1989,M,Two or More Races,White,Male,2020-04-09T00:00:00,10:55:21,Y,Y
43478,56 and Above,Arrest,Melee Weapon,1976,M,White,White,Male,2020-04-23T00:00:00,00:42:59,Y,Y
43489,56 and Above,Field Contact,Melee Weapon,1988,M,Black or African American,Black or African American,Male,2020-07-09T00:00:00,19:11:09,N,Y
43493,56 and Above,Arrest,Firearm,1994,M,Asian,Black or African American,Male,2020-07-26T00:00:00,20:37:41,Y,Y


In [11]:
# Removing unrecorded race entries. Losing 196 entries.

df = df[df.subject_perceived_race != '-']
df

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_yob,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,reported_date,reported_time,arrest_flag,frisk_flag
128,36 - 45,Offense Report,,1985,M,White,Unknown,Male,2018-09-23T00:00:00,18:35:00,N,N
175,36 - 45,Offense Report,,1992,M,White,White,Female,2018-09-24T00:00:00,03:21:00,N,N
1376,1 - 17,Arrest,,1992,M,White,American Indian or Alaska Native,Male,2015-09-13T00:00:00,00:31:00,N,Y
1377,1 - 17,Field Contact,,1988,F,Not Specified,Unknown,Male,2015-04-19T00:00:00,03:02:00,N,Y
1378,1 - 17,Field Contact,,1977,M,White,Black or African American,Male,2015-04-19T00:00:00,05:51:00,N,-
...,...,...,...,...,...,...,...,...,...,...,...,...
43475,56 and Above,Arrest,Melee Weapon,1989,M,Two or More Races,White,Male,2020-04-09T00:00:00,10:55:21,Y,Y
43478,56 and Above,Arrest,Melee Weapon,1976,M,White,White,Male,2020-04-23T00:00:00,00:42:59,Y,Y
43489,56 and Above,Field Contact,Melee Weapon,1988,M,Black or African American,Black or African American,Male,2020-07-09T00:00:00,19:11:09,N,Y
43493,56 and Above,Arrest,Firearm,1994,M,Asian,Black or African American,Male,2020-07-26T00:00:00,20:37:41,Y,Y


In [12]:
# Combining three categories since they are similar in terms of what they mean in this scenario

df['subject_perceived_gender'] = df['subject_perceived_gender'].replace(['-', 'Unknown', 'Gender Diverse'], 'Unable to Determine') 
df

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_yob,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,reported_date,reported_time,arrest_flag,frisk_flag
128,36 - 45,Offense Report,,1985,M,White,Unknown,Male,2018-09-23T00:00:00,18:35:00,N,N
175,36 - 45,Offense Report,,1992,M,White,White,Female,2018-09-24T00:00:00,03:21:00,N,N
1376,1 - 17,Arrest,,1992,M,White,American Indian or Alaska Native,Male,2015-09-13T00:00:00,00:31:00,N,Y
1377,1 - 17,Field Contact,,1988,F,Not Specified,Unknown,Male,2015-04-19T00:00:00,03:02:00,N,Y
1378,1 - 17,Field Contact,,1977,M,White,Black or African American,Male,2015-04-19T00:00:00,05:51:00,N,-
...,...,...,...,...,...,...,...,...,...,...,...,...
43475,56 and Above,Arrest,Melee Weapon,1989,M,Two or More Races,White,Male,2020-04-09T00:00:00,10:55:21,Y,Y
43478,56 and Above,Arrest,Melee Weapon,1976,M,White,White,Male,2020-04-23T00:00:00,00:42:59,Y,Y
43489,56 and Above,Field Contact,Melee Weapon,1988,M,Black or African American,Black or African American,Male,2020-07-09T00:00:00,19:11:09,N,Y
43493,56 and Above,Arrest,Firearm,1994,M,Asian,Black or African American,Male,2020-07-26T00:00:00,20:37:41,Y,Y


In [13]:
# Removing 390 entries that are not labeled.

df = df[df.frisk_flag != '-']
df

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_yob,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,reported_date,reported_time,arrest_flag,frisk_flag
128,36 - 45,Offense Report,,1985,M,White,Unknown,Male,2018-09-23T00:00:00,18:35:00,N,N
175,36 - 45,Offense Report,,1992,M,White,White,Female,2018-09-24T00:00:00,03:21:00,N,N
1376,1 - 17,Arrest,,1992,M,White,American Indian or Alaska Native,Male,2015-09-13T00:00:00,00:31:00,N,Y
1377,1 - 17,Field Contact,,1988,F,Not Specified,Unknown,Male,2015-04-19T00:00:00,03:02:00,N,Y
1380,1 - 17,Field Contact,,1988,M,White,White,Male,2015-04-20T00:00:00,22:40:00,N,N
...,...,...,...,...,...,...,...,...,...,...,...,...
43475,56 and Above,Arrest,Melee Weapon,1989,M,Two or More Races,White,Male,2020-04-09T00:00:00,10:55:21,Y,Y
43478,56 and Above,Arrest,Melee Weapon,1976,M,White,White,Male,2020-04-23T00:00:00,00:42:59,Y,Y
43489,56 and Above,Field Contact,Melee Weapon,1988,M,Black or African American,Black or African American,Male,2020-07-09T00:00:00,19:11:09,N,Y
43493,56 and Above,Arrest,Firearm,1994,M,Asian,Black or African American,Male,2020-07-26T00:00:00,20:37:41,Y,Y


# Feature Engineering

In [14]:
# Changing reported_time into useable values.

# Morning = 5am - 12pm
# Afternoon = 1pm to 5pm
# Evening = 6pm to 9pm
# Night = 10pm to 4am

from datetime import datetime
df['reported_hour'] = df['reported_time'].apply(pd.to_datetime).dt.hour
df.drop(['reported_time'], axis=1, inplace=True)
df.head(3)

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_yob,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,reported_date,arrest_flag,frisk_flag,reported_hour
128,36 - 45,Offense Report,,1985,M,White,Unknown,Male,2018-09-23T00:00:00,N,N,18
175,36 - 45,Offense Report,,1992,M,White,White,Female,2018-09-24T00:00:00,N,N,3
1376,1 - 17,Arrest,,1992,M,White,American Indian or Alaska Native,Male,2015-09-13T00:00:00,N,Y,0


In [15]:
df['reported_hour'] = df['reported_hour'].replace([5, 6, 7, 8, 9, 10, 11, 12], 'morning')
df['reported_hour'] = df['reported_hour'].replace([13, 14, 15, 16, 17], 'afternoon')
df['reported_hour'] = df['reported_hour'].replace([18, 19, 20, 21], 'evening')
df['reported_hour'] = df['reported_hour'].replace([22, 23, 0, 1, 2, 3, 4], 'night')
df['reported_hour'].value_counts()

night        10743
morning       8571
afternoon     7908
evening       6219
Name: reported_hour, dtype: int64

In [16]:
df['reported_date'] = df['reported_date'].str.slice(0, 4, 1)
df

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_yob,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,reported_date,arrest_flag,frisk_flag,reported_hour
128,36 - 45,Offense Report,,1985,M,White,Unknown,Male,2018,N,N,evening
175,36 - 45,Offense Report,,1992,M,White,White,Female,2018,N,N,night
1376,1 - 17,Arrest,,1992,M,White,American Indian or Alaska Native,Male,2015,N,Y,night
1377,1 - 17,Field Contact,,1988,F,Not Specified,Unknown,Male,2015,N,Y,night
1380,1 - 17,Field Contact,,1988,M,White,White,Male,2015,N,N,night
...,...,...,...,...,...,...,...,...,...,...,...,...
43475,56 and Above,Arrest,Melee Weapon,1989,M,Two or More Races,White,Male,2020,Y,Y,morning
43478,56 and Above,Arrest,Melee Weapon,1976,M,White,White,Male,2020,Y,Y,night
43489,56 and Above,Field Contact,Melee Weapon,1988,M,Black or African American,Black or African American,Male,2020,N,Y,evening
43493,56 and Above,Arrest,Firearm,1994,M,Asian,Black or African American,Male,2020,Y,Y,evening


In [17]:
# reported_date is a str as of now

df['reported_date'] = df['reported_date'].astype(int)

In [18]:
df['officer_age'] = df['reported_date'] - df['officer_yob']
df.drop(['officer_yob', 'reported_date'], axis=1, inplace=True)
df.head()

Unnamed: 0,subject_age_group,stop_resolution,weapon_type,officer_gender,officer_race,subject_perceived_race,subject_perceived_gender,arrest_flag,frisk_flag,reported_hour,officer_age
128,36 - 45,Offense Report,,M,White,Unknown,Male,N,N,evening,33
175,36 - 45,Offense Report,,M,White,White,Female,N,N,night,26
1376,1 - 17,Arrest,,M,White,American Indian or Alaska Native,Male,N,Y,night,23
1377,1 - 17,Field Contact,,F,Not Specified,Unknown,Male,N,Y,night,27
1380,1 - 17,Field Contact,,M,White,White,Male,N,N,night,27


In [19]:
# Generate Dummy Variable for literally everything

dummyf =  pd.get_dummies(df, columns=['subject_age_group', 'stop_resolution', 'weapon_type', 
'officer_gender', 'officer_race', 'subject_perceived_race', 'subject_perceived_gender', 'frisk_flag', 
'reported_hour'])

dummyf

Unnamed: 0,arrest_flag,officer_age,subject_age_group_1 - 17,subject_age_group_18 - 25,subject_age_group_26 - 35,subject_age_group_36 - 45,subject_age_group_46 - 55,subject_age_group_56 and Above,stop_resolution_Arrest,stop_resolution_Citation / Infraction,stop_resolution_Field Contact,stop_resolution_Offense Report,stop_resolution_Referred for Prosecution,weapon_type_Firearm,weapon_type_Melee Weapon,weapon_type_None,weapon_type_Other,officer_gender_F,officer_gender_M,officer_race_American Indian/Alaska Native,officer_race_Asian,officer_race_Black or African American,officer_race_Hispanic or Latino,officer_race_Nat Hawaiian/Oth Pac Islander,officer_race_Not Specified,officer_race_Two or More Races,officer_race_White,subject_perceived_race_American Indian or Alaska Native,subject_perceived_race_Asian,subject_perceived_race_Black or African American,subject_perceived_race_Hispanic,subject_perceived_race_Multi-Racial,subject_perceived_race_Native Hawaiian or Other Pacific Islander,subject_perceived_race_Other,subject_perceived_race_Unknown,subject_perceived_race_White,subject_perceived_gender_Female,subject_perceived_gender_Male,subject_perceived_gender_Unable to Determine,frisk_flag_N,frisk_flag_Y,reported_hour_afternoon,reported_hour_evening,reported_hour_morning,reported_hour_night
128,N,33,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0
175,N,26,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,1
1376,N,23,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1
1377,N,27,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1
1380,N,27,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43475,Y,31,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,1,0
43478,Y,44,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1
43489,N,32,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0
43493,Y,26,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0


# Feature Selection