# Import & Setup

In [38]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [39]:
import sys
sys.path.append(r"C:/Users/mikha/Dropbox/mikhael_misc/Projects/My-Package")

In [40]:
import pandas as pd
import numpy as np
from myfunctions import clean_path

In [41]:
df = pd.read_csv(filepath_or_buffer=clean_path(r"C:/Users/mikha/Dropbox/mikhael_misc/Projects/Policing Thesis/Modified Dataset - 2021.csv"),
                 index_col='Stop ID')

# CrossTabs

## Accidents

### Can one stop get multiple accident rows?

Yes

In [42]:
df['Accident'].sum() != len(df[df['Accident']==1].index.unique())

True

In [43]:
crosstabs_dict = dict()

## Stops weighted by # tickets

(Should find a better name for this)

In [44]:
crosstabs_dict['Stops weighted by # tickets'] = df['Race'].value_counts(normalize=True).sort_index()

## Fnc for similar cols

In [45]:
def get_race_crosstab_from_duplicate_IDs(dataframe:pd.DataFrame, col:str, col_val_to_filter, get_total_stops=False) -> pd.Series:
    
    if get_total_stops:
        race_col_with_ID = dataframe['Race'].reset_index()
    else:    
        race_col_with_ID = dataframe[dataframe[col]==col_val_to_filter]['Race'].reset_index()
    
    grouped_race_col_with_ID = race_col_with_ID.groupby(by='Stop ID').agg(set)
    
    grouped_race_col_with_ID['Race'] = grouped_race_col_with_ID['Race'].apply(list).apply(lambda x: x[0])
    
    val_counts = grouped_race_col_with_ID.value_counts(normalize=True).sort_index()

    val_counts.index = val_counts.index.get_level_values(0)
    
    return val_counts



In [46]:
for bool_col in ['Accident',
                 'Citation',
                 'Warning',
                 'Probable Cause',
                 'Arrest',
                 'Search Conducted']:
    crosstabs_dict[bool_col] = get_race_crosstab_from_duplicate_IDs(dataframe=df,
                                                                    col=bool_col,
                                                                    col_val_to_filter=1)
    
crosstabs_dict['Stops'] = get_race_crosstab_from_duplicate_IDs(dataframe=df,
                                                               col=None,
                                                               col_val_to_filter=None,
                                                               get_total_stops=True)

# Convert to DF

In [47]:
crosstabs_df = pd.DataFrame(crosstabs_dict).fillna(0)

In [48]:
crosstabs_df

Unnamed: 0,Stops weighted by # tickets,Accident,Citation,Warning,Probable Cause,Arrest,Search Conducted,Stops
ASIAN,0.04612,0.045455,0.048206,0.055763,0.026667,0.026432,0.030641,0.054001
BLACK,0.35148,0.285124,0.310419,0.329566,0.62,0.38326,0.473538,0.319746
HISPANIC,0.2578,0.322314,0.266969,0.204286,0.153333,0.303965,0.253482,0.226028
NATIVE AMERICAN,0.00048,0.0,0.000865,0.00064,0.006667,0.004405,0.002786,0.000673
OTHER,0.05804,0.039256,0.057933,0.075168,0.026667,0.022026,0.016713,0.070307
WHITE,0.28608,0.307851,0.315607,0.334577,0.166667,0.259912,0.222841,0.329245


# MC Pop

https://www.census.gov/quickfacts/montgomerycountymaryland

"Native Hawaiian and Other Pacific Islander alone" --> "OTHER"
"Two or More Races" --> "OTHER"


In [49]:
asian=.156
black=.201
native_american=.000
hispanic=.201
white=.429

other = 1 - (asian + black + native_american + hispanic + white)

In [50]:
mc_pop = pd.Series(data=[asian, black, hispanic, native_american, other, white],
                   index=['ASIAN', 'BLACK', 'HISPANIC', 'NATIVE AMERICAN', 'OTHER', 'WHITE'])

In [51]:
crosstabs_df.insert(loc=0, 
                    column='MC Population',
                    value=mc_pop)

# Reorder Columns / Rename

In [55]:
crosstabs_df.rename(columns={'Stops weighted by # tickets': 'Stops X # Tickets',
                             'Search Conducted':'Searches',
                             'Probable Cause':'Prob. Cause',
                             'Arrest':'Arrests', 
                             'Warning':'Warnings',
                             'Accident':'Accidents',
                             'Citation':'Citations', 
                             'MC Population':'MC Pop.'}, inplace=True)

new_col_order = ['MC Pop.', 'Accidents', 'Stops', 'Stops X # Tickets', 'Citations', 'Warnings', 'Arrests', 'Searches', 'Prob. Cause']

crosstabs_df = crosstabs_df[new_col_order]

In [56]:
crosstabs_df

Unnamed: 0,MC Pop.,Accidents,Stops,Stops X # Tickets,Citations,Warnings,Arrests,Searches,Prob. Cause
ASIAN,0.156,0.045455,0.054001,0.04612,0.048206,0.055763,0.026432,0.030641,0.026667
BLACK,0.201,0.285124,0.319746,0.35148,0.310419,0.329566,0.38326,0.473538,0.62
HISPANIC,0.201,0.322314,0.226028,0.2578,0.266969,0.204286,0.303965,0.253482,0.153333
NATIVE AMERICAN,0.0,0.0,0.000673,0.00048,0.000865,0.00064,0.004405,0.002786,0.006667
OTHER,0.013,0.039256,0.070307,0.05804,0.057933,0.075168,0.022026,0.016713,0.026667
WHITE,0.429,0.307851,0.329245,0.28608,0.315607,0.334577,0.259912,0.222841,0.166667


# Total Row

# Export Crosstab

In [57]:
crosstabs_df.to_csv(r"Crosstabs by Race.csv")