In [37]:
import pandas as pd
import numpy as np
from datetime import datetime

from astral import Observer
from astral.sun import sun
from pytz import timezone
from timezonefinder import TimezoneFinder
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import chi2_contingency

In [2]:
sf = pd.read_csv('../data/raw_data/san_francisco.csv', low_memory=False)

In [4]:
sf.columns

Index(['raw_row_number', 'date', 'time', 'location', 'lat', 'lng', 'district',
       'subject_age', 'subject_race', 'subject_sex', 'type', 'arrest_made',
       'search_conducted', 'search_vehicle', 'search_basis', 'reason_for_stop',
       'raw_search_vehicle_description', 'raw_result_of_contact_description'],
      dtype='object')

In [6]:
sum(sf['search_conducted'] == True)

53381

In [7]:
sum(sf['search_vehicle'] == True)

53381

In [9]:
sf['contraband_found'].value_counts()

False    45405
True      7976
Name: contraband_found, dtype: int64

In [11]:
sf['arrest_made']

0         False
1         False
2         False
3         False
4         False
          ...  
905065    False
905066    False
905067    False
905068    False
905069    False
Name: arrest_made, Length: 905070, dtype: bool

In [16]:
denominator = sf.query('search_vehicle == True')

In [33]:
denominator.loc[:, 'contraband_found'] = pd.to_numeric(denominator['contraband_found'])

In [34]:
denominator

Unnamed: 0,raw_row_number,date,time,location,lat,lng,district,subject_age,subject_race,subject_sex,...,citation_issued,warning_issued,outcome,contraband_found,search_conducted,search_vehicle,search_basis,reason_for_stop,raw_search_vehicle_description,raw_result_of_contact_description
23,869944,2014-08-01,02:32:00,HOWARD & 9TH,37.775007,-122.413181,,,white,male,...,1.0,0.0,citation,0.0,1.0,1.0,other,Mechanical or Non-Moving Violation (V.C.),"Search Incident to Arrest, Negative Result",Citation
29,869950,2014-08-01,05:20:00,22ND AV. & GEARY BL.,37.780417,-122.481385,,,asian/pacific islander,male,...,0.0,0.0,arrest,0.0,1.0,1.0,other,DUI Check,"Vehicle Inventory, Negative Result",In Custody Arrest
125,870046,2014-08-01,14:28:00,WEST POINT @ MIDDLE POINT,37.735271,-122.379442,,,black,male,...,0.0,1.0,warning,0.0,1.0,1.0,other,,"Search without Consent, Negative Result",Warning
127,870048,2014-08-01,14:34:00,HAIGHT ST & PIERCE,37.771696,-122.433752,,,white,male,...,1.0,0.0,citation,1.0,1.0,1.0,other,Moving Violation,"Search without Consent, Positive Result",Citation
134,870055,2014-08-01,15:21:00,3RD @ WILLIAMS,37.729294,-122.392758,,,black,male,...,0.0,0.0,arrest,0.0,1.0,1.0,other,,Searched as a result of Probation or Parole Co...,In Custody Arrest
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
904975,892915,2014-10-31,17:59:00,21ST & FOLSOM ST,37.757510,-122.415048,,,hispanic,male,...,1.0,0.0,citation,0.0,1.0,1.0,other,Mechanical or Non-Moving Violation (V.C.),"Search without Consent, Negative Result",Citation
905024,892964,2014-10-31,21:37:00,3RD ST/QUESADA,37.733171,-122.391221,,,black,male,...,1.0,0.0,citation,0.0,1.0,1.0,consent,Mechanical or Non-Moving Violation (V.C.),"Search with Consent, Negative Result",Citation
905028,892968,2014-10-31,21:48:00,106 FERN,37.788274,-122.420637,,,black,male,...,0.0,1.0,warning,0.0,1.0,1.0,other,Moving Violation,"Search without Consent, Negative Result",Warning
905048,892988,2014-10-31,22:50:00,3RD/ INNES,37.739494,-122.388972,,,black,male,...,1.0,0.0,citation,0.0,1.0,1.0,other,Moving Violation,Searched as a result of Probation or Parole Co...,Citation


In [63]:
disag = denominator.groupby('subject_race').sum()['contraband_found']
count = denominator.groupby('subject_race').count().iloc[:, 0]
summary = pd.DataFrame({'total_searches': count.values,
                        'contraband': disag.values,
                        'no_contraband': count.values - disag.values,
                        'p(contraband|search)': disag.values / count.values}, index=count.index)

In [65]:
round(summary, 2)

Unnamed: 0_level_0,total_searches,contraband,no_contraband,p(contraband|search)
subject_race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
asian/pacific islander,2861,1033.0,1828.0,0.36
black,23622,2183.0,21439.0,0.09
hispanic,11445,1165.0,10280.0,0.1
other,3746,761.0,2985.0,0.2
white,11707,2834.0,8873.0,0.24


In [69]:
_, p_value, *_ = chi2_contingency(summary[['contraband', 'no_contraband']])

In [70]:
p_value

0.0