In [1]:
import pandas as pd
import numpy as np

In [2]:
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv("test_df_2.csv")
# index value is in this temp csv
df.drop(df.columns[0], axis=1, inplace=True)

In [4]:
df_uniq = df.drop_duplicates("stop_id")

In [5]:
white = df_uniq['result'][df_uniq['race'] == "White"].value_counts()

In [6]:
black = df_uniq['result'][df_uniq['race'] == "Black/African American"].value_counts()

In [7]:
white

Citation for infraction                                                      252
No Action                                                                     34
In-field cite and release                                                      8
Custodial Arrest without warrant                                               5
Psychiatric hold                                                               5
Custodial Arrest pursuant to outstanding warrant                               4
Field interview card completed                                                 4
Contacted parent/legal guardian or other person responsible for the minor      2
Noncriminal transport or caretaking transport                                  1
Name: result, dtype: int64

In [8]:
black

Citation for infraction                             10
No Action                                            6
Custodial Arrest without warrant                     2
Field interview card completed                       1
Custodial Arrest pursuant to outstanding warrant     1
Noncriminal transport or caretaking transport        1
In-field cite and release                            1
Name: result, dtype: int64

In [9]:
races = df_uniq['race'].value_counts().keys()

In [10]:
races

Index(['White', 'Hispanic/Latino/a', 'Asian', 'Middle Eastern or South Asian',
       'Black/African American', 'Pacific Islander', 'Native American'],
      dtype='object')

In [11]:
white/black

Citation for infraction                                                      25.200000
Contacted parent/legal guardian or other person responsible for the minor          NaN
Custodial Arrest pursuant to outstanding warrant                              4.000000
Custodial Arrest without warrant                                              2.500000
Field interview card completed                                                4.000000
In-field cite and release                                                     8.000000
No Action                                                                     5.666667
Noncriminal transport or caretaking transport                                 1.000000
Psychiatric hold                                                                   NaN
Name: result, dtype: float64

In [12]:
sum(white)/sum(black)

10.589285714285714

In [13]:
result_by_race = {}

In [14]:
for race in races:
    result_by_race[race] = df_uniq['result'][df_uniq['race'] == race].value_counts()

In [15]:
def add_races(data, races_to_add, field='result'):
    res = pd.Series(0, data[field].value_counts().keys())
    for race in races_to_add:
        res = res.add(data[field][data['race'] == race].value_counts(), fill_value=0)
    return res

In [16]:
def generate_relative_df(baseline, comparison, base_race, comparison_race):
        temp_dict={base_race: baseline, comparison_race: comparison}
        scaled_baseline = sum(comparison)*(baseline/sum(baseline))
        relative = (comparison/baseline)*(sum(baseline)/sum(comparison))
        temp_dict["Expected {}".format(comparison_race)] = scaled_baseline
        temp_dict["Relative"] = relative
        return pd.DataFrame(temp_dict)
    

In [17]:
black_hispanic = add_races(df_uniq, ['Black/African American', 'Hispanic/Latino/a'])

In [18]:
sum(black_hispanic)

386.0

In [19]:
sum(white)

593

In [20]:
black_hispanic

Citation for infraction                                                      142.0
Contacted parent/legal guardian or other person responsible for the minor      1.0
Custodial Arrest pursuant to outstanding warrant                               1.0
Custodial Arrest without warrant                                               9.0
Field interview card completed                                                 3.0
In-field cite and release                                                     13.0
No Action                                                                     28.0
Noncriminal transport or caretaking transport                                  2.0
Psychiatric hold                                                               0.0
dtype: float64

In [21]:
generate_relative_df(white, black_hispanic, "White", "Black and Hispanic")

Unnamed: 0,White,Black and Hispanic,Expected Black and Hispanic,Relative
Citation for infraction,252,142.0,164.033727,0.865676
Contacted parent/legal guardian or other person responsible for the minor,2,1.0,1.301855,0.768135
Custodial Arrest pursuant to outstanding warrant,4,1.0,2.60371,0.384067
Custodial Arrest without warrant,5,9.0,3.254637,2.765285
Field interview card completed,4,3.0,2.60371,1.152202
In-field cite and release,8,13.0,5.20742,2.496438
No Action,34,28.0,22.131535,1.265163
Noncriminal transport or caretaking transport,1,2.0,0.650927,3.072539
Psychiatric hold,5,0.0,3.254637,0.0
Warning (verbal or written),278,187.0,180.957841,1.03339


In [22]:
poc = add_races(df_uniq, [x for x in races if x != "White"])

In [23]:
generate_relative_df(white, poc, "White", "POC")

Unnamed: 0,White,POC,Expected POC,Relative
Citation for infraction,252,359.0,353.564924,1.015372
Contacted parent/legal guardian or other person responsible for the minor,2,2.0,2.806071,0.71274
Custodial Arrest pursuant to outstanding warrant,4,2.0,5.612142,0.35637
Custodial Arrest without warrant,5,14.0,7.015177,1.995673
Field interview card completed,4,3.0,5.612142,0.534555
In-field cite and release,8,20.0,11.224283,1.781851
No Action,34,46.0,47.703204,0.964296
Noncriminal transport or caretaking transport,1,2.0,1.403035,1.425481
Psychiatric hold,5,0.0,7.015177,0.0
Warning (verbal or written),278,384.0,390.043845,0.984505


In [24]:
non_priv = add_races(df_uniq, [x for x in races if (x != "White" and x != "Asian")])

In [25]:
generate_relative_df(white, non_priv, "White", "Non-Privileged")

Unnamed: 0,White,Non-Privileged,Expected Non-Privileged,Relative
Citation for infraction,252,238.0,251.150084,0.947641
Contacted parent/legal guardian or other person responsible for the minor,2,2.0,1.993255,1.003384
Custodial Arrest pursuant to outstanding warrant,4,2.0,3.986509,0.501692
Custodial Arrest without warrant,5,14.0,4.983137,2.809475
Field interview card completed,4,3.0,3.986509,0.752538
In-field cite and release,8,17.0,7.973019,2.132191
No Action,34,35.0,33.885329,1.032895
Noncriminal transport or caretaking transport,1,2.0,0.996627,2.006768
Psychiatric hold,5,0.0,4.983137,0.0
Warning (verbal or written),278,278.0,277.062395,1.003384


In [26]:
df_uniq.keys()

Index(['stop_id', 'date_time', 'duration', 'call_for_service', 'location',
       'actions_taken', 'Person Search Consent Given',
       'Property Search Consent Given', 'basis_for_search', 'reason_for_stop',
       'result', 'limited_english', 'age', 'gender', 'Gender Nonconforming',
       'lgbt', 'race', 'disabilities', 'evidence_found'],
      dtype='object')

In [27]:
df_uniq['reason_for_stop'].value_counts()

Traffic Violation                                                   1354
Reasonable Suspicion                                                  50
Consensual Encounter resulting in a search                             8
Investigation to determine whether the person was truant               8
Known to be on Parole / Probation / PRCS / Mandatory Supervision       3
Knowledge of outstanding arrest warrant/wanted person                  2
Name: reason_for_stop, dtype: int64

In [29]:
generate_relative_df(white, black_hispanic, "White", "Black and Hispanic")

Unnamed: 0,White,Black and Hispanic,Expected Black and Hispanic,Relative
Citation for infraction,252,142.0,164.033727,0.865676
Contacted parent/legal guardian or other person responsible for the minor,2,1.0,1.301855,0.768135
Custodial Arrest pursuant to outstanding warrant,4,1.0,2.60371,0.384067
Custodial Arrest without warrant,5,9.0,3.254637,2.765285
Field interview card completed,4,3.0,2.60371,1.152202
In-field cite and release,8,13.0,5.20742,2.496438
No Action,34,28.0,22.131535,1.265163
Noncriminal transport or caretaking transport,1,2.0,0.650927,3.072539
Psychiatric hold,5,0.0,3.254637,0.0
Warning (verbal or written),278,187.0,180.957841,1.03339


In [30]:
black_hispanic_rfs = add_races(df_uniq, ['Black/African American', 'Hispanic/Latino/a'], field='reason_for_stop')

In [31]:
white_rfs = df_uniq['reason_for_stop'][df_uniq['race'] == "White"].value_counts()

In [32]:
generate_relative_df(white_rfs, black_hispanic_rfs, "White", "Black and Hispanic")

Unnamed: 0,White,Black and Hispanic,Expected Black and Hispanic,Relative
Consensual Encounter resulting in a search,3,4.0,1.952782,2.048359
Investigation to determine whether the person was truant,2,4.0,1.301855,3.072539
Knowledge of outstanding arrest warrant/wanted person,1,1.0,0.650927,1.536269
Known to be on Parole / Probation / PRCS / Mandatory Supervision,1,2.0,0.650927,3.072539
Reasonable Suspicion,24,13.0,15.62226,0.832146
Traffic Violation,562,362.0,365.821248,0.989554


In [33]:
df_uniq["basis_for_search"].value_counts()

No Search                                                        1353
Condition of parole / probation/ PRCS / mandatory supervision      26
Incident to arrest                                                 19
Consent given                                                      13
Officer Safety/safety of others                                    11
Evidence of crime                                                   2
Odor of contraband                                                  1
Name: basis_for_search, dtype: int64

In [34]:
white_bfs = df_uniq['basis_for_search'][df_uniq['race'] == "White"].value_counts()

In [35]:
black_hispanic_bfs = add_races(df_uniq, ['Black/African American', 'Hispanic/Latino/a'], field='basis_for_search')

In [36]:
generate_relative_df(white_bfs, black_hispanic_bfs, "White", "Black and Hispanic")

Unnamed: 0,White,Black and Hispanic,Expected Black and Hispanic,Relative
Condition of parole / probation/ PRCS / mandatory supervision,8.0,14.0,5.20742,2.688472
Consent given,4.0,5.0,2.60371,1.920337
Evidence of crime,1.0,1.0,0.650927,1.536269
Incident to arrest,7.0,6.0,4.556492,1.316802
No Search,569.0,355.0,370.37774,0.958481
Odor of contraband,,0.0,,
Officer Safety/safety of others,4.0,5.0,2.60371,1.920337


In [37]:
5*["pie"] + 3*['cake']

['pie', 'pie', 'pie', 'pie', 'pie', 'cake', 'cake', 'cake']

In [38]:
green_dict = {"stop_id": [x for x in range(20)],
             "race": ['green']*20,
             "dessert": 5*["pie"]+5*["ice cream"]+5*["cake"]+5*["beer"]}
blue_dict = {"stop_id": [20+x for x in range(20)],
             "race": ['blue']*20,
             "dessert": 10*["pie"]+2*["ice cream"]+2*["cake"]+6*["beer"]}


In [39]:
green = add_races(pd.DataFrame(green_dict), ["green"], field="dessert")

In [40]:
blue = add_races(pd.DataFrame(blue_dict), ["blue"], field="dessert")

In [41]:
generate_relative_df(green, blue, "green", "blue")

Unnamed: 0,green,blue,Expected blue,Relative
beer,5,6,5.0,1.2
cake,5,2,5.0,0.4
ice cream,5,2,5.0,0.4
pie,5,10,5.0,2.0


In [42]:
green

pie          5
ice cream    5
cake         5
beer         5
dtype: int64

In [43]:
desdata = pd.concat([pd.DataFrame(green_dict), pd.DataFrame(blue_dict)], ignore_index=True)

In [44]:
green= add_races(desdata, ["green"], field='dessert')

In [45]:
green

beer         5
cake         5
ice cream    5
pie          5
dtype: int64

In [46]:
blue = add_races(desdata, ['blue'], field = 'dessert')

In [47]:
blue

pie          10
beer          6
ice cream     2
cake          2
dtype: int64

In [48]:
add_races(desdata, ['green', 'blue'], field='dessert')

beer         11
cake          7
ice cream     7
pie          15
dtype: int64

In [51]:
import analysis.populations as pop

In [52]:
la_pop = pop.get_population("Los Altos")

In [53]:
la_pop

White                            17735.000000
Hispanic/Latino/a                 1357.000000
Asian                             6963.059055
Middle Eastern or South Asian     2622.940945
Black/African American             145.000000
Pacific Islander                     0.000000
Native American                     24.000000
Multiple                          1567.000000
Other                              100.000000
dtype: float64

In [54]:
df_uniq["race"].value_counts()

White                            593
Hispanic/Latino/a                330
Asian                            241
Middle Eastern or South Asian    177
Black/African American            56
Pacific Islander                  23
Native American                    5
Name: race, dtype: int64

In [55]:
(df_uniq["race"].value_counts()/la_pop)/(df_uniq["race"].value_counts()["White"]/la_pop["White"])

Asian                             1.035127
Black/African American           11.550387
Hispanic/Latino/a                 7.272950
Middle Eastern or South Asian     2.018186
Multiple                               NaN
Native American                   6.230677
Other                                  NaN
Pacific Islander                       inf
White                             1.000000
dtype: float64