---

## Read in data

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('CSV/Terry_Stops.csv')

---

## Rename columns

In [3]:
df = df.rename(columns={'Subject Age Group': 'subj_age_group', 
                                        'Subject ID': 'subj_id', 
                                        'GO / SC Num': 'go_sc_num', 
                                        'Terry Stop ID': 'terry_stop_id', 
                                        'Stop Resolution': 'stop_resolution', 
                                        'Weapon Type': 'weapon',
                                        'Officer ID': 'off_id',
                                        'Officer YOB': 'off_yob',
                                        'Officer Gender': 'off_gender',
                                        'Officer Race':'off_race',
                                        'Subject Perceived Race':'subj_perceived_race',
                                        'Subject Perceived Gender':'subj_perceived_gender',
                                        'Reported Date':'reported_date',
                                        'Reported Time':'reported_time',
                                        'Initial Call Type':'init_call_type',
                                        'Final Call Type':'final_call_type',
                                        'Call Type':'call_type',
                                        'Officer Squad':'off_squad',
                                        'Arrest Flag':'arrest_flag',
                                        'Frisk Flag': 'frisk_flag',
                                        'Precinct':'precinct',
                                        'Sector':'sector',
                                        'Beat':'beat'})

---

### Change "-" values to N/A

In [4]:
df = df.replace('-', 'N/A')

### Change -1 Subject ID to N/A

In [5]:
df.subj_id = df.subj_id.replace(-1, 'N/A')

### Change Officer Squad values to general precint/squad

In [6]:
df.off_squad = df.off_squad.replace(to_replace=' -.*', value='', regex=True)

### Change Gender value of "N" to N/A

In [7]:
df.off_gender = df.off_gender.replace('N', 'N/A')

## Remove values

In [None]:
# df = df[df.off_gender != 'N']
# df = df[df.off_yob != 1900]
# df = df[df.subj_age_group != '-']
# df = df[df.off_gender != 'N']
# df = df[df.off_race != 'Not Specified']
# df = df[(df.subj_perceived_race != '-')&(df.subj_perceived_race != 'Unknown')&(df.subj_perceived_race != 'Other')] # 3287 values dropped
# df = df[(df.subj_perceived_gender != 'Unable to Determine')&
#     (df.subj_perceived_gender != '-')&
#     (df.subj_perceived_gender != 'Unknown')&
#     (df.subj_perceived_gender != 'Gender Diverse (gender non-conforming and/or transgender)')]
# df = df[df.beat != '-']
# df = df[df.call_type != 'None']

---

## Remove Columns

In [None]:
# df = df.drop(columns=['subj_id', 
#                      'go_sc_num', # General offense or Street check number. Relates stop to parent report 
#                      'terry_stop_id',
#                      'reported_time',
#                      'final_call_type',
#                      'off_squad',
#                      'precinct',
#                      'sector',
#                      'arrest_flag'])

---

## Duplicate Stop ID's

When multiple weapons are found there are duplicate records of a stop in the terry_stop_id column.

In [None]:
df[df['subj_id'] == 7726859935].iloc[0]

In [None]:
df[df['subj_id'] == 7726859935]

In [None]:
df.subj_id.value_counts()[:10]

In [None]:
df.terry_stop_id.value_counts()[:10]

In [None]:
df[df.terry_stop_id.isin([13080077761, 12601385662, 12851512661, 12105013403, 9585545373, 12781960580, 8677596250, 12034618758, 12689034912])]

---

## Get officer age by subtracting reported date by officer yob

In [None]:
# df['date'] = df['reported_date'].astype(str).str[0:4]

In [None]:
# df['date'] = df['date'].astype('int64')

In [None]:
# df['off_age'] = df['reported_year'] - df['off_yob']

In [None]:
# df = df.drop(columns=['reported_date', 'off_yob'])

## Format Date of Report and Create Officer's Age c

In [8]:
# Remove "T" character in timestamp to access time
df['reported_date'] = df['reported_date'].str.replace('T', ' ')
# Convert series to datetime
df['reported_date'] = pd.to_datetime(df['reported_date'])
# Create columns for comments' year, month, day, and hour
df['reported_year'] = pd.DatetimeIndex(df['reported_date']).year
df['reported_month'] = pd.DatetimeIndex(df['reported_date']).month
df['reported_day'] = pd.DatetimeIndex(df['reported_date']).day
df['reported_hour'] = [x[:2] for x in df['reported_time']]
# Subtract officer year of birth from the reported year
df['off_age'] = df['reported_year'] - df['off_yob']
# Drop post_date column
df = df.drop(['reported_date', 'reported_time', 'off_yob'], axis=1)

---

## Strip Whitespace from beat column

In [None]:
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

---

## Categorize weapons ordinally

In [None]:
df = df.replace(to_replace='-', value='None')
df = df.replace(to_replace='Lethal Cutting Instrument', value='Blade')
df = df.replace(to_replace='Firearm Other', value='Firearm')
df = df.replace(to_replace='Handgun', value='Firearm')
df = df.replace(to_replace='Club, Blackjack, Brass Knuckles', value='Blunt Object')
df = df.replace(to_replace='Knife/Cutting/Stabbing Instrument', value='Blade')
df = df.replace(to_replace='Rifle', value='Firearm')
df = df.replace(to_replace='Fire/Incendiary Device', value='Non-Lethal')
df = df.replace(to_replace='Firearm (unk type)', value='Firearm')
df = df.replace(to_replace='Other Firearm', value='Firearm')
df = df.replace(to_replace='Club', value='Blunt Object')
df = df.replace(to_replace='Mace/Pepper Spray', value='Non-Lethal')
df = df.replace(to_replace='Blunt Object/Striking Implement', value='Blunt Object')
df = df.replace(to_replace='Firearm', value='Firearm')
df = df.replace(to_replace='Brass Knuckles', value='Blunt Object')
df = df.replace(to_replace='Automatic Handgun', value='Firearm')
df = df.replace(to_replace='Taser/Stun Gun', value='Non-Lethal')
df = df.replace(to_replace='None/Not Applicable', value='None')
df = df.replace(to_replace='Blackjack', value='Blunt Object')
df = df.replace(to_replace='Shotgun', value='Firearm')

---

---

## Change call_type to onview and calls

In [None]:
df = df.replace({'call_type': {'SCHEDULED EVENT (RECURRING)':'scheduled_event', 
                               'ONVIEW':'onview',
                               'TELEPHONE OTHER, NOT 911':'non_911_call',
                               'ALARM CALL (NOT POLICE ALARM)':'alarm',
                               'TEXT MESSAGE':'text',
                               'PROACTIVE (OFFICER INITIATED)':'officer_initiated'}})

---

## Change stop resolution to arrest or no arrest

In [None]:
df = df.replace({'stop_resolution': {'Offense Report':0,
                                     'Arrest':1,
                                     'Field Contact':0,
                                     'Referred for Prosecution':0,
                                     'Citation / Infraction':0}})

---

## Combine beats except for H1 and H2/H3 as they are on different sides of the map

In [None]:
df = df.replace(to_replace='B1', value='B')
df = df.replace(to_replace='B2', value='B')
df = df.replace(to_replace='B3', value='B')
df = df.replace(to_replace='C1', value='C')
df = df.replace(to_replace='C2', value='C')
df = df.replace(to_replace='C3', value='C')
df = df.replace(to_replace='D1', value='D')
df = df.replace(to_replace='D2', value='D')
df = df.replace(to_replace='D3', value='D')
df = df.replace(to_replace='E1', value='E')
df = df.replace(to_replace='E2', value='E')
df = df.replace(to_replace='E3', value='E')
df = df.replace(to_replace='F1', value='F')
df = df.replace(to_replace='F2', value='F')
df = df.replace(to_replace='F3', value='F')
df = df.replace(to_replace='G1', value='G')
df = df.replace(to_replace='G2', value='G')
df = df.replace(to_replace='G3', value='G')
df = df.replace(to_replace='J1', value='J')
df = df.replace(to_replace='J2', value='J')
df = df.replace(to_replace='J3', value='J')
df = df.replace(to_replace='K1', value='K')
df = df.replace(to_replace='K2', value='K')
df = df.replace(to_replace='K3', value='K')
df = df.replace(to_replace='L1', value='L')
df = df.replace(to_replace='L2', value='L')
df = df.replace(to_replace='L3', value='L')
df = df.replace(to_replace='M1', value='M')
df = df.replace(to_replace='M2', value='M')
df = df.replace(to_replace='M3', value='M')
df = df.replace(to_replace='N1', value='N')
df = df.replace(to_replace='N2', value='N')
df = df.replace(to_replace='N3', value='N')
df = df.replace(to_replace='O1', value='O')
df = df.replace(to_replace='O2', value='O')
df = df.replace(to_replace='O3', value='O')
df = df.replace(to_replace='Q1', value='Q')
df = df.replace(to_replace='Q2', value='Q')
df = df.replace(to_replace='Q3', value='Q')
df = df.replace(to_replace='R1', value='R')
df = df.replace(to_replace='R2', value='R')
df = df.replace(to_replace='R3', value='R')
df = df.replace(to_replace='S1', value='S')
df = df.replace(to_replace='S2', value='S')
df = df.replace(to_replace='S3', value='S')
df = df.replace(to_replace='U1', value='U')
df = df.replace(to_replace='U2', value='U')
df = df.replace(to_replace='U3', value='U')
df = df.replace(to_replace='W1', value='W')
df = df.replace(to_replace='W2', value='W')
df = df.replace(to_replace='W3', value='W')

---

## Change frisk_flag "None" value to "

In [None]:
df = df.replace({'frisk_flag': {'None':'N'}})
# df.frisk_flag = df.frisk_flag.replace('N', 0)
# df.frisk_flag = df.frisk_flag.replace('Y', 1)

---

## Change frisk_flag, subj_perceived_gender, , off_gender, and call_type to binary

In [None]:
df.frisk_flag = np.where(df.frisk_flag == 'None', 0, df.frisk_flag)
df.frisk_flag = np.where(df.frisk_flag == 'Y', 1, df.frisk_flag)
df.subj_perceived_gender = np.where(df.subj_perceived_gender == 'Male', 0, df.subj_perceived_gender)
df.subj_perceived_gender = np.where(df.subj_perceived_gender == 'Female', 1, df.subj_perceived_gender)
df.off_gender = np.where(df.off_gender == 'M', 0, df.off_gender)
df.off_gender = np.where(df.off_gender == 'F', 1, df.off_gender)
# df.call_type = np.where(df.call_type == 'call', 0, df.call_type)
# df.call_type = np.where(df.call_type == 'onview', 1, df.call_type)

---

## Make categorical features with dummies

In [None]:
for col in ['beat', 'subj_age_group', 'weapon', 'subj_perceived_gender', 'off_race', 'subj_perceived_race', 'call_type']:
    df[col] = df[col].astype('category')

In [None]:
df.dtypes

In [None]:
dummies_df = pd.get_dummies(df, columns=["subj_age_group"], prefix=["subj_age"])
dummies_df = pd.get_dummies(df, columns=['weapon'], prefix=['weapon'])
dummies_df = pd.get_dummies(df, columns=["subj_perceived_gender"], prefix=["subj_gender"])
dummies_df = pd.get_dummies(df, columns=["off_race"], prefix=["off_race"])
dummies_df = pd.get_dummies(df, columns=["subj_perceived_race"], prefix=["subj_race"])
# dummies_df = pd.get_dummies(df, columns=["call_type"], prefix=["call_type"])
dummies_df = pd.get_dummies(df, columns=["beat"], prefix=["beat"])

In [None]:
dummies_df = pd.get_dummies(df, columns=["beat"], prefix=["beat"])

In [None]:
dummies_df = pd.get_dummies(df, columns=["subj_age_group"], prefix=["subj_age"])

In [None]:
dummies_df = dummies_df[dummies_df['call_type'] != 'None']

In [None]:
dummies_df = pd.get_dummies(df)

In [None]:
list(dummies_df.columns)

In [None]:
dummies_df.to_csv('cleaned_terry_stops_v2.csv', index=False) 

In [None]:
df.to_csv('nc_terry_stops_v2.csv', index=False)