Importing basic functions

In [1]:
import pandas as pd
import numpy as np

In [2]:
full = pd.read_csv('IL-clean.csv',low_memory = False)

In [3]:
df_columns=['n_stops','n_searches','n_hits','search_rate','hit_rate'] # the columns of our dataframes

def SummaryStats(df,label):
    # given an input dataframe of policing data, it will return a dataframe with its overall stats summarized 
    # with the index 'label'
    n_stops = df.shape[0]
    n_searches = df['search_conducted'].sum()
    n_hits = df['contraband_found'].sum()
    search_rate = n_searches / max([n_stops,1])
    hit_rate = n_hits / max([n_searches,1])
    td={'n_stops' : n_stops, 'n_searches' : n_searches, 'n_hits' : n_hits, 'search_rate' : search_rate,'hit_rate' : hit_rate}
    return pd.DataFrame(data=td,index=[label],columns=['n_stops','n_searches','n_hits','search_rate','hit_rate'])

In [4]:
full.columns

Index(['id', 'state', 'stop_date', 'stop_time', 'location_raw', 'county_name',
       'county_fips', 'fine_grained_location', 'police_department',
       'driver_gender', 'driver_age_raw', 'driver_age', 'driver_race_raw',
       'driver_race', 'violation_raw', 'violation', 'search_conducted',
       'search_type_raw', 'search_type', 'contraband_found', 'stop_outcome',
       'is_arrested', 'stop_duration', 'vehicle_type', 'drugs_related_stop',
       'district'],
      dtype='object')

In [5]:
df_columns=['gender','age','n_stops','n_searches','n_hits','search_rate','hit_rate'] # the columns of our dataframes

ages = pd.to_numeric(full['driver_age']) # extract the ages from the original dataframe

# initializing the dataframes
gender_data = pd.DataFrame(columns = df_columns)

# filling in all of the data
for i in range(16, 81):
    temp_data = full[full['driver_age']==i]
    temp1 = SummaryStats(temp_data[temp_data['driver_gender']=='M'], str(i) +' Year Old Men')
    temp1['gender']='M'
    temp1['age']=i
    gender_data = gender_data.append(temp1[gender_data.columns])
    temp1 = SummaryStats(temp_data[temp_data['driver_gender']=='F'], str(i) +' Year Old Women')
    temp1['gender']='F'
    temp1['age']=i
    gender_data = gender_data.append(temp1[gender_data.columns])

In [6]:
# initialize the dataframe and stats
LocationStops = pd.DataFrame(columns=['race','district_number','n_stops','n_searches','n_hits','dis_search','search_rate','dis_hit','hit_rate'])
tot_stops = []
tot_searches = []

# set the districts and races we will look at
districts = ['01','02','03','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22']
races = ['White','Black','Hispanic','Asian']

for i in districts:
    district = 'ILLINOIS STATE POLICE ' + i # get the district string
    
    # get the district data
    district_data = full[(full['district']==district)]
    
    # get general stats about that district
    overall = SummaryStats(district_data, i)
    overallhit = overall.get_value(overall.index[0], 'hit_rate')
    overallsearch = overall.get_value(overall.index[0], 'search_rate')
    tot_stops.append(overall.get_value(overall.index[0], 'n_stops'))
    tot_searches.append(overall.get_value(overall.index[0], 'n_searches'))
    
    # iterate through the races
    for race in races:
        # get the stats
        temp = SummaryStats(district_data[district_data['driver_race']==race], race + ' in District ' + i)
        
        # add information about the data used
        temp['race'] = race
        temp['district_number'] = i
        temp['dis_search'] = overallsearch
        temp['dis_hit'] = overallhit
        
        # add it to the dataframe and keep the column order the same
        LocationStops = LocationStops.append(temp)[LocationStops.columns]

In [7]:
# initialize the dataframe and stats
bigdata = pd.DataFrame(columns=['age','race','gender','district_number','n_stops','n_searches','n_hits','search_rate','dis_search','hit_rate','dis_hit'])

# set the districts and races we will look at
districts = ['01','02','03','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22']
races = ['White','Black','Hispanic']

for i in districts:
    district = 'ILLINOIS STATE POLICE ' + i # get the district string
    # get the district data
    district_data = full[full['district']==district]
    district_info = SummaryStats(district_data,'tempdis')
    districthit = district_info.get_value(district_info.index[0], 'hit_rate')
    districtsearch = district_info.get_value(district_info.index[0], 'search_rate')
    # iterate through the ages
    for age in range(6):
        district_age_data = district_data[(district_data['driver_age'] >= age*10+16) & (district_data['driver_age'] < (age+1)*10+16)]
        for race in races:
            district_age_race_data = district_age_data[district_age_data['driver_race']==race]
            tempm=SummaryStats(district_age_race_data[district_age_race_data['driver_gender']=='M'],  str(age*10+16) +' to '+str((age+1)*10+15)+' Year Old ' + race+ ' Men in District '+i)
            tempm['race'] = race
            tempm['district_number']=i
            tempm['age'] = age*10 + 20.5
            tempm['gender'] = 'M'
            tempm['dis_hit'] = districthit
            tempm['dis_search'] = districtsearch
            bigdata=bigdata.append(tempm)[bigdata.columns]
            tempf=SummaryStats(district_age_race_data[district_age_race_data['driver_gender']=='F'], str(age*10+16) +' to '+str((age+1)*10+15)+' Year Old ' +race+ ' Women in District '+i)
            tempf['race'] = race
            tempf['district_number']=i
            tempf['age'] = age*10 + 20.5
            tempf['gender'] = 'F'
            tempf['dis_hit'] = districthit
            tempf['dis_search'] = districtsearch
            bigdata=bigdata.append(tempf)[bigdata.columns]

In [8]:
gender_data.head()

Unnamed: 0,gender,age,n_stops,n_searches,n_hits,search_rate,hit_rate
16 Year Old Men,M,16,10389,367,96,0.035326,0.26158
16 Year Old Women,F,16,6129,125,49,0.020395,0.392
17 Year Old Men,M,17,35375,1304,378,0.036862,0.289877
17 Year Old Women,F,17,20699,398,128,0.019228,0.321608
18 Year Old Men,M,18,66488,3237,941,0.048685,0.290701
