# Police Force Responses by Crime

In [None]:
#Dependencies
import matplotlib
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
from scipy.stats import linregress

In [None]:
#Import csv file
police_data = pd.read_csv("Resources/Police_Use_of_Force.csv")
police_data.head()

In [None]:
#create new DataFrame
police_data_df = police_data.loc[:, ["PoliceUseOfForceID", "CaseNumber", "ResponseDate", "Problem", "ForceType",
                                    "ForceTypeAction", "EventAge", "TypeOfResistance", "Precinct", "Neighborhood"]]
police_data_df

In [None]:
#drop NaN from Dataframe
police_data_df = police_data_df.dropna(how='any')
police_data_df

In [None]:
#rename rows in ForceTypeAction column
police_data_df['ForceTypeAction'] = police_data_df['ForceTypeAction'].replace({
    "Knee": "Knees",
    "Body Weight Pin": "Body Weight to Pin",
    "Conscious Neck Restraint": "Neck Restraint: subject did not lose conciousness",
    "Punch": "Punches",
    "Slap": "Slaps",
    'Unconscious Neck Restraint': 'Neck Restraint: subject did not lose conciousness',
    'Firearm as striking tool': 'Firearms As Striking Tool'
})


In [None]:
#rename rows in ForceType column
police_data_df['ForceType'] = police_data_df['ForceType'].replace({
    "Less Lethal Projectile": "Less Lethal"
})

In [None]:
#rename rows in TypeOfResistance column
police_data_df['TypeOfResistance'] = police_data_df['TypeOfResistance'].replace({
    "TENSED": "Tensed",
    "tensed": "Tensed",
    "Tensed                                                                         ": "Tensed",
    "verbal non-compliance": "Verbal Non-Compliance",
    "Verbal Non-Compliance                                                          ": "Verbal Non-Compliance",
    "commission of crime":"Commission of Crime",
    "Commission of a Crime                                                          ":"Commission of Crime",
    "COMMISSION OF CRIME":"Commission of Crime",
    "Fled on Foot                                                                   ":"Fled on Foot",
    "Fled in a Vehicle                                                              ":"Fled in Vehicle",
    "Assaulted Officer                                                              ":"Assaulted Officer",
    "Assaulted Police K9                                                            ":"Assaulting Police K9",
    "Other                                                                          ":"Other"
           
})

In [None]:
#rename columns in Dataframe
clean_df = police_data_df.rename(columns ={
    'PoliceUseOfForceID': "Police ID",
    'CaseNumber': "Case Number",
    'ResponseDate': "Response Date",
    'Problem': "Type of Crime",
    'ForceType': 'Type of Force',
    'ForceTypeAction': 'Type of Force Action',
    'EventAge': 'Event Age',
    'TypeOfResistance': 'Type of Resistance'
})
#reset the index in DataFrame
clean_df.reset_index(drop=True, inplace=True)
clean_df

# 1. What incidents result the most in police use of force? (Sam)

In [None]:
test_data = clean_df["Type of Crime"].describe(include="all")
test_data

In [None]:
incident_data = clean_df.groupby("Type of Crime")
total_count = incident_data.nunique()["Case Number"]

incidents = pd.DataFrame({"Total Count": total_count})
top_incidents = incidents.sort_values("Total Count", ascending=False)

top_incidents.index.name = "Incidents"
top_incidents.head()

In [None]:
top_incidents.head(5)
top_incidents.head(5).plot(kind="bar", edgecolor="black", facecolor="purple",fontsize=10)
plt.title("Top 5 Incidents Resulting in Police Force in Minneapolis, MN")
plt.ylabel("Number of Cases")

In [None]:
# Sam's code end here

# 2. What type of force is used the most? (Ainash)

In [None]:
#find all types of force
force_type_ainash = clean_df['Type of Force']
force_type_ainash.unique()

In [None]:
# find the most used types of force
force_data_ainash = clean_df[['Type of Force', 'Case Number']]
force_data_groupby_ainash = force_data_ainash.groupby('Type of Force')
force_data_count_ainash = force_data_groupby_ainash['Type of Force'].count()
most_force_type_ainash = pd.DataFrame({'Total Count': force_data_count_ainash})
sorted_ainash = most_force_type_ainash.sort_values(['Total Count'], ascending = False)

In [None]:
# create a bar chart to show the Top 5 most used types of force
top5_force_bar_ainash = sorted_ainash.head().plot(kind = 'bar', title = 'The Top 5 most used Type of Force by Police in Minneapolis, MN area')
top5_force_bar_ainash.set_xlabel ('Type of Force')
top5_force_bar_ainash.set_ylabel ('Number of Cases')
plt.show()
plt.tight_layout()

In [None]:
#find all types of force action
force_type_action_ainash = clean_df['Type of Force Action']
force_type_action_ainash.nunique()

In [None]:
# find the most used type of force and its action
merged_force_type_data_ainash = clean_df[['Type of Force', 'Type of Force Action', 'Case Number']]
merged_force_type_data_ainash = merged_force_type_data_ainash.groupby(['Type of Force', 'Type of Force Action'])
merged_force_type = merged_force_type_data_ainash['Type of Force Action'].count()
merged_ainash = pd.DataFrame({'Total Count': merged_force_type})
sorted_merged_ainash = merged_ainash.sort_values(['Total Count'], ascending = False)

In [None]:
# create a bar chart to show the Top 5 most used types of force and its action
top5_merged_bar = sorted_merged_ainash.head().plot(kind = 'bar', title = 'The Top 5 most used Type of Force and it Action by Police in Minneapolis, MN area')
top5_merged_bar.set_xlabel ('Type of Force and it Action')
top5_merged_bar.set_ylabel ('Number of Cases')
plt.show()
plt.tight_layout()

In [None]:
# drop ages =0
age_df = clean_df.loc[clean_df['Event Age'] != 0, :]

In [None]:
#create bins for ages
bins = [6,9.9,14.9,19.9,29.9,39.9,49.9,59.9,69.9,79.9,89.9]
group_names = ['6-10', '11-15', '16-20','21-30','31-40','41-50','51-60', '61-70', '71-80', '81 <']

In [None]:
age_df['Age Groups'] = pd.cut(age_df['Event Age'], bins, labels = group_names, include_lowest = True)

In [None]:
age_data_ainash = age_df[['Age Groups', 'Case Number']]
age_data_groupby_ainash = age_data_ainash.groupby('Age Groups')
age_count_ainash = age_data_groupby_ainash['Age Groups'].count()
age_df_ainash = pd.DataFrame({'Total Count': age_count_ainash})

In [None]:
age_bar_ainash = age_df_ainash.plot(kind = 'bar', title = 'Force used by Police in Minneapolis, MN area by Age')
top5_force_bar_ainash.set_xlabel ('Age Groups')
top5_force_bar_ainash.set_ylabel ('Number of Cases')
plt.show()
plt.tight_layout()

# Types of Force used by Police in Minneapolis,MN Analysis
## When analyzing types of Force used by Police in Minneapolis, Minnesota, we observed that:
* There are 10 types of Force that Police in Minneapolis, MN use in incidents: Bodily Force, Chemical Irritant, Taser, Improvised Weapon, Baton, Police K9 Bite, Firearm, Less Lethal Projectile, Gun Point Display, Maximal Restraint Technique. From these force types, the five most commonly used by law enforcement officers in Minneapolis are: Bodily Force (21212 cases among analyzed data), Chemical Irritant (3780 cases), Taser(2757 cases), Gun Point Display (436 cases) and Improvised weapon (343 cases).
* Types of Force also have sub-categories that are called Types of Force Action. Our data includes 41 types of Force Action. The Top 5 most used Types of Force Action by Police in Minneapolis, MN area are Body Weight to Pin (Bodily Force type), Punches (Bodily Force type), Joint Lock (Bodily Force type), Personal Mace (Chemical Irritant type) and Push Away (Bodily Force type).
* We also decided to analyze the police cases by Age Groups. After analysis of Force Police used by Age Groups, we observed that most commonly, the most force is used on people between age 21-30. 

In [None]:
# Ainash code ends here

# 3. Which is the most popular type of resistance? (Humera)

# Most popular type of Resistance

In [None]:
# find the most used force type
force_type_data_ainash = clean_df[['Type of Force', 'Case Number']]
force_type_data_ainash = force_type_data_ainash.groupby('Type of Force')
force_type = force_type_data_ainash['Type of Force'].count()
force_type_data = pd.DataFrame({'Total Count': force_type})
sorted_force = force_type_data.sort_values(['Total Count'], ascending = False)



In [None]:
incident_data = clean_df.groupby("Type of Crime")
total_count = incident_data.nunique()["CaseNumber"]
# create a bar chart to show the most used force type
force_bar = sorted_force.plot(kind = 'bar', title = 'Used Type of Force by Police in Minneapolis, MN area')
force_bar.set_xlabel ('Type of Force')
force_bar.set_ylabel ('Number of Cases')
plt.show()
plt.tight_layout()resistance_data = clean_df.groupby("TypeOfResistance")
total_count = resistance_data.nunique()["CaseNumber"]

resistance = pd.DataFrame({"Total Count": total_count})
resistance_case_no = resistance.sort_values("Total Count", ascending=False)

resistance_case_no.index.name = "TypeOfResistance"
resistance_case_no.head()

In [None]:
resistance_case_no.head(6).plot(kind="bar",edgecolor="black", figsize=(8, 8),facecolor="blue", fontsize=10)
plt.title("Type of Resistance Resulting in Use of Police Force in Minneapolis")
plt.ylabel("Number of Cases", fontsize=12)
plt.xlabel("Type of Resistance", fontsize=12)
plt.tight_layout()
plt.show()

In [None]:
#Pie chart
colors = ['gold', 'red', 'pink', 'blue', 'lightskyblue']
explode = (0.1, 0, 0, 0, 0)
resistance_case_no.head(5).plot(kind="pie",explode=explode, colors=colors, autopct= "%1.1f%%", subplots="False", shadow=True, startangle=150 )
plt.title('Top 5 - Type of Resistance Resulting in Use of Police Force in Minneapolis', fontsize=10)
plt.axis('equal')
plt.tight_layout()
plt.show()

# Force Type of Action used by Police Force in Minneapolis

In [None]:
popular_data = clean_df["ForceTypeAction"].describe(include= "all")
popular_data

In [None]:
forcetype_data = clean_df.groupby("ForceTypeAction")
total_count = forcetype_data.nunique()["CaseNumber"]

forcetype = pd.DataFrame({"Total Count": total_count})
forcetype_case_no = forcetype.sort_values("Total Count", ascending=False)

forcetype_case_no.index.name = "ForceTypeAction"
forcetype_case_no.head()

In [None]:
forcetype_case_no.head(5)
forcetype_case_no.head(10).plot(kind="barh",edgecolor="black", facecolor="red", fontsize=12, align="center", alpha=1.0)
plt.title("ForceTypeAction Resulting in Use of Police Force in Minneapolis")
plt.ylabel("Number of Cases", fontsize=12)
plt.xlabel("Force Type Action", fontsize=12)
plt.show()

In [None]:
clean_df

# Groupby Forcetype and Type of Resistance

In [None]:
#Top 5 - Force Type and Force Type Action Vs Number of Cases
ft_fa = clean_df.groupby(['ForceType','ForceTypeAction'])
ft_fa.count().head(5)

In [None]:
#Top 5 - Force Type and Force Type Action Vs Number of Cases
ft_fa= clean_df.groupby(['ForceType','ForceTypeAction'])
total_count = ft_fa.nunique()["CaseNumber"]
forcetype_forceaction = pd.DataFrame({"Total Count": total_count})
forcetype_forceaction = forcetype_forceaction.sort_values("Total Count", ascending=False)
#forcetype_forceaction.index.name = "ForceTypeAction"
forcetype_forceaction.head()

In [None]:
#Top 5 - Force Type and Force Type Action Vs Number of Cases
forcetype_forceaction.head(8).plot(kind="barh",edgecolor="black", facecolor="purple", fontsize=12, align="center", alpha=1.0)
plt.title("Force Type and Force Type Action Vs Number of Cases")
plt.ylabel("Force Type Action_Force Type", fontsize=12)
plt.xlabel("Number of Cases", fontsize=12)
plt.show()

In [None]:
#Top 5 - Force Type and Type of Resistance Vs Number of Cases
ft_tr = clean_df.groupby(['ForceType','TypeOfResistance'])
ft_tr.count().head(5)

In [None]:
# Top 5 - Force Type and Type of Resistance Vs Number of Cases
ft_tr= clean_df.groupby(['ForceType','TypeOfResistance'])
total_count = ft_tr.nunique()["CaseNumber"]
forcetype_resistance = pd.DataFrame({"Total Count": total_count})
forcetype_resistance = forcetype_resistance.sort_values("Total Count", ascending=False)
forcetype_resistance.head()

In [None]:
# Top 5 - Force Type and Type of Resistance Vs Number of Cases
forcetype_resistance.head(8).plot(kind="barh",edgecolor="black", facecolor="orange", fontsize=12, align="center", alpha=1.0)
plt.title("Force Type and Type of Resistance Vs Number of Cases")
plt.ylabel("Force Type_ Resistance", fontsize=12)
plt.xlabel("Number of Cases", fontsize=12)
plt.show()

In [None]:
#problem and force type

In [None]:
#Problem and Type of Resistance

In [None]:
# Problem and force type

In [None]:
# Humera code end here

# 4. Which neighborhood in Minneapolis see’s the highest frequency of police force? (Mike Hills)

In [None]:
#pull out neighborhood and forcetype from clean df
#group by neighborhood and calculate the total force count per neighborhood
force_per_neighborhood_df = clean_df.loc[:,['Neighborhood','Type of Force']]
force_group = force_per_neighborhood_df.groupby('Neighborhood')
sum_neigh = force_group['Type of Force'].count()

#place results into new df, sort by force count from highest to lowest; store and display
force_per_neighborhood_df = pd.DataFrame({'Force Count': sum_neigh})
sorted_force_df = force_per_neighborhood_df.sort_values(['Force Count'], ascending=False)
sorted_force_df

In [None]:
#store top 5 highest total force per neighborhood into its own df and display
top5force_df = sorted_force_df.head(5)
top5force_df

In [None]:
Top5bar = top5force_df.plot(kind="bar", figsize=(30,15), width=.4, color="red", alpha=.8, rot=0, fontsize=20, legend=False)
Top5bar.set_ylabel('Force Count',fontdict={'fontsize':24})
Top5bar.set_xlabel('Neighborhood',fontdict={'fontsize':24})
Top5bar.set_title('Minneapolis Police Use of Force Distribution per Neighborhood: 10 Highest' + '\n' + '(2008-2021)', pad=15,fontdict={'fontsize':25})

In [None]:
#Find highest force count neighborhood and store
worst_neigh = sorted_force_df.index[0]

#find and store highest force count
top1force = top5force_df['Force Count'][0]

#remove highest force count row and store into new df
top1_clean_df = force_per_neighborhood_df.loc[force_per_neighborhood_df['Force Count'] < top1force]

#calculate total force count for all neighborhoods combined except for the highest force neighborhood
sumforce = top1_clean_df['Force Count'].sum()

In [None]:

#create dictionary to store values for summarized df
dict = {'Minneapolis Neighborhoods (Total:86)':[worst_neigh, "All Others"], 
        'Force Count': [top1force, sumforce]}

#pass dict into pd.DataFrame to generate df
Topvsrest_df = pd.DataFrame(dict)

#set index to "Minneapolis Neighborhoods" and display
Topvsrest_df = Topvsrest_df.set_index('Minneapolis Neighborhoods (Total:86)')


In [None]:
Topvsrest_bar = Topvsrest_df.plot(kind="bar", figsize=(4,7), width=.5, color="red", alpha=.7, rot=0, fontsize=12, legend=False)
Topvsrest_bar.set_ylabel('Force Count',fontdict={'fontsize':12})
Topvsrest_bar.set_xlabel('Neighborhoods (Total:86)',fontdict={'fontsize':12})
Topvsrest_bar.set_title('Minneapolis Police Use of Force' + '\n' + '(2008-2021)', pad=15,fontdict={'fontsize':14})

In [None]:
x = [top1force,sumforce]
explode = (.06,0)
textprops = {"fontsize":13}
colors = ["red","green"]
labels = ['Downtown West','All Others']

plt.pie(x, labels=labels, textprops=textprops, explode=explode, colors=colors, 
        autopct="%1.1f%%", shadow=True, startangle=90)
plt.axis("equal")
plt.title("Minneapolis Police Use of Force: Worst Neighborhood vs All Others (Total: 86)" + "\n" + "(2008-2021)")

plt.show()


In [None]:

clean_df.head()

In [None]:
# Mike H code ends here

# 5. Is the use of police force in Minneapolis going up or down? (Mike F)

In [None]:
# Mike F code ends here

# 6. Perform a linear regression on use of force between 2015 and 2019 and make a prediction about use of force in 2020 for the city of Minneapolis.  Compare this prediction to actual use of force outcomes in 2020.

In [None]:
#pull out Response Date and Type of Force from clean df and store
date_force_df = clean_df.loc[:,['Response Date','Type of Force']]

#find all response dates containing 2015 - 2019 and store
searchfor = ['2015', '2016', '2017', '2018', '2019']
df_2015_2019 = date_force_df[date_force_df['Response Date'].str.contains('|'.join(searchfor))]

#divide 2015 - 2019 in separate dfs
df_2015 = date_force_df[date_force_df['Response Date'].str.contains("2015")]
df_2016 = date_force_df[date_force_df['Response Date'].str.contains("2016")]
df_2017 = date_force_df[date_force_df['Response Date'].str.contains("2017")]
df_2018 = date_force_df[date_force_df['Response Date'].str.contains("2018")]
df_2019 = date_force_df[date_force_df['Response Date'].str.contains("2019")]
df_2020 = date_force_df[date_force_df['Response Date'].str.contains("2020")]

#checking to make sure the length of 2015 - 2019 dfs is equal to 2015-2019 df to ensure no missing values
len(df_2015_2019) == len(df_2015) + len(df_2016) + len(df_2017) + len(df_2018) + len(df_2019)
df_2015

In [None]:
values2018 = df_2018['Type of Force'].value_counts()

sum2015 = df_2015['Type of Force'].count()
sum2016 = df_2016['Type of Force'].count()
sum2017 = df_2017['Type of Force'].count()
sum2018 = df_2018['Type of Force'].count()
sum2019 = df_2019['Type of Force'].count()
sum2020 = df_2020['Type of Force'].count()

df_2015['Type of Force'].value_counts()

In [None]:
dict = {'year':[2015,2016,2017,2018,2019,2020], 
        'Total Bodily Force Incidents': [sum2015,sum2016,sum2017,sum2018,sum2019,sum2020]}

#pass dict into pd.DataFrame to generate df
tot_bodyforce_df = pd.DataFrame(dict)
tot_bodyforce_df

In [None]:
year = tot_bodyforce_df.iloc[:,0]
tot_bodyforce = tot_bodyforce_df.iloc[:,1]

plt.xlabel('Year')
plt.ylabel('Total Bodily Force Incidents')

bf_slope, bf_int, bf_r, bf_p, bf_std_err = stats.linregress(year, tot_bodyforce)

bf_fit = bf_slope * year + bf_int
plt.scatter(year, tot_bodyforce)
plt.plot(year,bf_fit,"--")
plt.show()

In [None]:
totbodyforce2020 = tot_bodyforce_df.iloc[5,1]
year2020 = 2020
print(f"The total bodily force incidents in 2020 will be {round(bf_slope * year2020 + bf_int,0)}")
print(f"The actual total bodily force incidents in 2020 were {totbodyforce2020}")


In [None]:
force_counts_2015 = df_2015['Type of Force'].value_counts()
force_counts_2016 = df_2016['Type of Force'].value_counts()
force_counts_2017 = df_2017['Type of Force'].value_counts()
force_counts_2018 = df_2018['Type of Force'].value_counts()
force_counts_2019 = df_2019['Type of Force'].value_counts()
force_counts_2020 = df_2020['Type of Force'].value_counts()

bodilyforce2020 = force_counts_2020.loc['Bodily Force']
bodilyforce2020

bodilyforce2019 = force_counts_2019.loc['Bodily Force']
bodilyforce2019

bodilyforce2018 = force_counts_2018.loc['Bodily Force']
bodilyforce2018

bodilyforce2017 = force_counts_2017.loc['Bodily Force']
bodilyforce2017

bodilyforce2016 = force_counts_2016.loc['Bodily Force']
bodilyforce2016

bodilyforce2015 = force_counts_2015.loc['Bodily Force']
bodilyforce2015

dict = {'year':[2015,2016,2017,2018,2019,2020], 
        'Bodily Force Count': [bodilyforce2015,bodilyforce2016,bodilyforce2017,bodilyforce2018,bodilyforce2019,bodilyforce2020]}

#pass dict into pd.DataFrame to generate df
bodyforce_df = pd.DataFrame(dict)
bodyforce_df

startyear = bodyforce_df['year'].min()
endyear = bodyforce_df['year'].max()

bodyforce_df

tot_body = bodyforce_df.iloc[:,1]

b_slope, b_int, b_r, b_p, b_std_err = stats.linregress(year, tot_body)

b_fit = b_slope * year + b_int

bodyforce2020 = bodyforce_df.iloc[5,1]
bodyforce2020

In [None]:
taser2020 = force_counts_2020.loc['Taser']
taser2019 = force_counts_2019.loc['Taser']
taser2018 = force_counts_2018.loc['Taser']
taser2017 = force_counts_2017.loc['Taser']
taser2016 = force_counts_2016.loc['Taser']
taser2015 = force_counts_2015.loc['Taser']

dict = {'year':[2015,2016,2017,2018,2019,2020], 
        'Taser Count': [taser2015,taser2016,taser2017,taser2018,taser2019,taser2020]}

#pass dict into pd.DataFrame to generate df
taser_df = pd.DataFrame(dict)
taser_count = taser_df['Taser Count']

tot_taser = taser_df.iloc[:,1]

t_slope, t_int, t_r, t_p, t_std_err = stats.linregress(year, tot_taser)

t_fit = t_slope * year + t_int

taser2020 = taser_df.iloc[5,1]

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)
fig.suptitle("Bodily Force Incidents", fontsize=13, fontweight="bold")

ax1.set_xlim(startyear, endyear)
ax1.plot(year, taser_count, linewidth=1, marker="o")
ax1.plot(year, t_fit, "b--", linewidth=1)
ax1.set_ylabel("Taser")

ax2.plot(year, bodyforce_df['Bodily Force Count'], linewidth=1, marker="o", color="r")
ax2.plot(year, b_fit, "r--", linewidth=1)
ax2.set_ylabel("Bodily Force")

ax3.plot(year, tot_bodyforce_df['Total Bodily Force Incidents'] , linewidth=1, marker="o", color="g")
ax3.plot(year, bf_fit, "g--", linewidth=1)
ax3.set_ylabel("Total Incidents")
ax3.set_xlabel("Year")

plt.show()

In [None]:

print(f"The total taser incidents in 2020 will be {round(t_slope * year2020 + t_int,0)}")
print(f"The actual total taser incidents in 2020 were {taser2020}\n")

print(f"The total bodily force incidents in 2020 will be {round(b_slope * year2020 + b_int,0)}")
print(f"The actual total bodily force incidents in 2020 were {bodyforce2020}\n")

print(f"The total force incidents in 2020 will be {round(bf_slope * year2020 + bf_int,0)}")
print(f"The actual force incidents in 2020 were {totbodyforce2020}")