# Effect of gender on policing on the state of Rhode Island

## Data preparation

In [None]:
import pandas as pd
import numpy as np

#importing the dataset
ri= pd.read_csv("police.csv")

#checking the first few rows
display(ri.head())



In [None]:
#checking the number of missing values in each column
print(ri.isnull().sum())

#checking the shape of the dataset
print(ri.shape) 

# Dropping the 'county_name' and 'state' columns, since that has redundant values
ri.drop(['county_name', 'state'], axis='columns', inplace=True)

# Examining the shape of the DataFrame again
print(ri.shape)

In [None]:
# Counting the number of missing values in each column
print(ri.isnull().sum())

# Dropping all rows that are missing 'driver_gender'
ri.dropna(subset=['driver_gender'], inplace=True)

# Counting the number of missing values in each column (again)
print(ri.isnull().sum())

# Examining the shape of the DataFrame
print(ri.shape)

In [None]:
#Checking the datatypes of the variables
print(ri.dtypes)

#Examinining the head of the dataset
print(ri.head())

#Making appropriate type changes
ri['is_arrested'] = ri.is_arrested.astype('bool') #converting 'is_arrested' to boolean

# Concatenating 'stop_date' and 'stop_time'
combined = ri.stop_date.str.cat(ri.stop_time, sep= ' ')

# Converting 'combined' to datetime format
ri['stop_datetime'] = pd.to_datetime(combined)

#Checking the datatypes of the variables again
print(ri.dtypes)

#Setting the index
ri.set_index('stop_datetime', inplace=True)


## Effect of gender on policing

In [None]:
# Creating a DataFrame of female drivers
female = ri[ri['driver_gender']=='F']

# Creating a DataFrame of male drivers
male = ri[ri['driver_gender']=='M']

# Counting the unique values in 'violation'
print(ri.violation.value_counts())
print("\n")

# Expressing the counts as proportions
print(ri.violation.value_counts(normalize= True))
print("\n")

# Computing the violations by female drivers (as proportions)
print(female.violation.value_counts(normalize= True))
print("\n")

# Computing the violations by male drivers (as proportions)
print(male.violation.value_counts(normalize= True))

In [None]:
# Creating a DataFrame of female drivers stopped for speeding
female_and_speeding = female[female['violation']=='Speeding']

# Creating a DataFrame of male drivers stopped for speeding
male_and_speeding = male[male['violation']=='Speeding']

# Computing the stop outcomes for female drivers (as proportions)
print(female_and_speeding.stop_outcome.value_counts(normalize=True))
print ("\n")

# Computing the stop outcomes for male drivers (as proportions)
print(male_and_speeding.stop_outcome.value_counts(normalize=True))



There is no realtionship between gender and if or not they get a ticket

In [None]:
# Calculating the search rate
print(ri.search_conducted.value_counts())

# Calculating the search rate for male and female drivers indiviudally
print(ri.groupby('driver_gender').search_conducted.value_counts(normalize=True))

Male drivers tend to be searched much more often than female drivers.

In [None]:
# Calculating the search rate for each combination of gender and violation
print(ri.groupby(['violation','driver_gender']).search_conducted.mean())

Male drivers tend to be searched much more often than female drivers, for all type of violations

In [None]:
# Counting the 'search_type' values
print(ri.search_type.value_counts())

# Checking if 'search_type' contains the string 'Protective Frisk'
ri['frisk'] = ri.search_type.str.contains('Protective Frisk', na=False)

# Taking the sum of 'frisk'
print(ri.frisk.sum())

In [None]:
# Creating a DataFrame of stops in which a search was conducted
searched = ri[ri.search_conducted == True]

# Calculating the overall frisk rate by taking the mean of 'frisk'
print(searched.frisk.mean())

# Calculating the frisk rate for each gender
print(searched.groupby('driver_gender').frisk.mean())

## Visual exploratory data analysis to check other relationships