# Import and Setup

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import pandas as pd
import numpy as np

In [3]:
cols_to_use = ['SeqID', 'Date Of Stop', 'Time Of Stop', 'Agency', 'SubAgency',
       'Description', 'Location', 'Latitude', 'Longitude', 'Accident', 'Belts',
       'Personal Injury', 'Property Damage', 'Fatal', 'Commercial License',
       'HAZMAT', 'Commercial Vehicle', 'Alcohol', 'Work Zone',
       'Search Conducted', 'Search Disposition', 'Search Outcome',
       'Search Reason', 'Search Reason For Stop', 'Search Type',
       'Search Arrest Reason', 'State', 'VehicleType', 'Year', 'Make', 'Model',
       'Color', 'Violation Type', 'Charge', 'Article',
       'Contributed To Accident', 'Race', 'Gender', 'Arrest Type']

In [4]:
df = pd.read_csv("C:/Users/mikha/Dropbox/mikhael_misc/Projects/Policing Thesis/Traffic_Violations - Oct 6 2021.csv",
                nrows=15000,
                usecols=cols_to_use)

# Cleaning

## Assign Unique Stop ID 

In [5]:
def assign_stop_IDs(dataframe:pd.DataFrame) -> pd.DataFrame:
    """
    Assigns a unique ID # for each stop.
    Also deletes the 'SeqID' col.
    """
    dataframe['merged_id_col'] = dataframe['SeqID'] + ' _ ' + dataframe['Date Of Stop'] + ' _ ' + dataframe['Time Of Stop']
    
    unique_stops = dataframe['merged_id_col'].unique()
    
    stop_ID_dict = {stop_info:ID for ID, stop_info in enumerate(unique_stops)}

    dataframe.insert(loc=0,
                     column='Stop ID',
                     value=dataframe['merged_id_col'].map(stop_ID_dict))
    
    del dataframe['merged_id_col']
    del dataframe['SeqID']
    
    dataframe.set_index('Stop ID', inplace=True)

assign_stop_IDs(df)

## Convert strings to boolean int (0, 1)

In [6]:
def find_str_bool_cols(dataframe:pd.DataFrame) -> list:
    
    bool_cols = []
    
    str_bools_set = set(['Yes', 'No', np.nan])
    for col in df:
        if all([word in str_bools_set for word in df[col].unique()]):
            bool_cols.append(col)
            
    return bool_cols

def convert_str_bool_cols(dataframe:pd.DataFrame) -> pd.DataFrame:
    
    bool_cols = find_str_bool_cols(dataframe=dataframe)
    
    str_bool_mapping_dict = {'No':0, 'Yes':1}
    
    for col in bool_cols:
        dataframe[col] = dataframe[col].map(str_bool_mapping_dict)
    
convert_str_bool_cols(df)

## Create columns

In [7]:
def create_cols(dataframe:pd.DataFrame) -> pd.DataFrame:
    dataframe['Citation'] = (dataframe['Violation Type']=='Citation').astype(int)
    dataframe['Warning'] = (dataframe['Violation Type']=='Warning').astype(int)    
    
    dataframe['Male'] = (dataframe['Gender']=='M').astype(int)
    dataframe['Female'] = (dataframe['Gender']=='F').astype(int)
    
    dataframe['Probable Cause'] = (dataframe['Search Reason']=='Probable Cause').astype(int)

create_cols(df)

# Save

In [8]:
df.to_csv("C:/Users/mikha/Dropbox/mikhael_misc/Projects/Policing Thesis/Modified Dataset - 2021.csv")


In [9]:
# Crosstab (recreating "Table 1")
pd.crosstab(df['Race'], df['Probable Cause'])

Probable Cause,0,1
Race,Unnamed: 1_level_1,Unnamed: 2_level_1
ASIAN,737,6
BLACK,5026,129
HISPANIC,3744,45
NATIVE AMERICAN,3,0
OTHER,897,9
WHITE,4362,42


In [10]:
pd.crosstab(df['Race'], df['Probable Cause'])

Probable Cause,0,1
Race,Unnamed: 1_level_1,Unnamed: 2_level_1
ASIAN,737,6
BLACK,5026,129
HISPANIC,3744,45
NATIVE AMERICAN,3,0
OTHER,897,9
WHITE,4362,42


In [11]:
# df['Outcome of Probable Cause Search'] =



In [12]:
df[['Search Conducted', 'Search Disposition', 'Search Outcome',
       'Search Reason', 'Search Reason For Stop', 'Search Type',
       'Search Arrest Reason']]

Unnamed: 0_level_0,Search Conducted,Search Disposition,Search Outcome,Search Reason,Search Reason For Stop,Search Type,Search Arrest Reason
Stop ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.0,,Citation,,21-1101(a),,
1,0.0,,Citation,,21-801.1,,
2,0.0,,Citation,,21-1129,,
3,0.0,,Citation,,13-401(h),,
4,0.0,,Citation,,21-201(a1),,
...,...,...,...,...,...,...,...
8124,0.0,,Citation,,16-112(c),,
8125,0.0,,Citation,,13-401(b1),,
8125,0.0,,Citation,,13-401(b1),,
8125,0.0,,Citation,,13-401(b1),,
