# Police Force Responses by Crime

In [None]:
#Dependencies
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd

In [None]:
#Import csv file
police_data = pd.read_csv("Resources/Police_Use_of_Force.csv")
police_data.head()

In [None]:
police_data.columns

In [None]:
police_data_df = police_data.loc[:, ["PoliceUseOfForceID", "CaseNumber", "ResponseDate", "Problem", "ForceType",
                                    "ForceTypeAction", "EventAge", "TypeOfResistance", "Precinct", "Neighborhood"]]
police_data_df

In [None]:
police_data_df.dtypes

In [None]:
police_data_df = police_data_df.dropna(how='any')
police_data_df

In [None]:
clean_df = police_data_df.drop_duplicates(subset=['CaseNumber'])
clean_df

# 1. What incidents result the most in police use of force? (Sam)

In [None]:
test_data = clean_df["Problem"].describe(include= "all")
test_data

In [None]:
incident_data = clean_df.groupby("Problem")
total_count = incident_data.nunique()["CaseNumber"]

incidents = pd.DataFrame({"Total Count": total_count})
top_incidents = incidents.sort_values("Total Count", ascending=False)

top_incidents.index.name = "Incidents"
top_incidents.head()

In [None]:
top_incidents.head(5)
top_incidents.head(5).plot(kind="bar",edgecolor="black", facecolor="purple", fontsize=10)
plt.title("Top 5 Incidents Resulting in Use of Police Force in Minneapolis, MN")
plt.ylabel("Number of Cases")
plt.xlabel("Incidents")

# 2. What type of force is used the most? (Ainash)

# 3. Which is the most popular type of resistance? (Humera)

# 4. Which neighborhood in Minneapolis see’s the highest frequency of police force? (Mike Hills)

In [None]:
#pull out neighborhood and forcetype from original df
#group by neighborhood and calculate the total force count per neighborhood
force_per_neighborhood_df = police_data_df.loc[:,['Neighborhood','ForceType']]
force_group = force_per_neighborhood_df.groupby('Neighborhood')
sum_neigh = force_group['ForceType'].count()

#place results into new df, sort by force count from highest to lowest; store and display
force_per_neighborhood_df = pd.DataFrame({'Force Count': sum_neigh})
sorted_force_df = force_per_neighborhood_df.sort_values(['Force Count'], ascending=False)
sorted_force_df

In [None]:
#store top 10 highest total force per neighborhood into its own df and display
top10force_df = sorted_force.head(10)
top10force_df

In [None]:
Top10bar = top10force_df.plot(kind="bar", figsize=(30,15), width=.4, color="red", alpha=.8, rot=0, fontsize=20, legend=False)
Top10bar.set_ylabel('Force Count',fontdict={'fontsize':24})
Top10bar.set_xlabel('Neighborhood',fontdict={'fontsize':24})
Top10bar.set_title('Minneapolis Police Use of Force Distribution per Neighborhood: 10 Highest' + '\n' + '(2008-2021)', pad=15,fontdict={'fontsize':25})

In [None]:
#Find highest force count neighborhood and store
worst_neigh = sorted_force.index[0]

#find total neighborhoods in Minneapolis and store
total_neigh = len(sorted_force)

#find and store highest force count
top1force = top10force_df['Force Count'][0]

In [None]:
#remove highest force count row and store into new df
top1_clean_df = force_per_neighborhood_df.loc[force_per_neighborhood_df['Force Count'] < top1force]
top1_clean_df

#find and store 10th highest force count 
TenthHighestNeigh = top10force_df['Force Count'][9]

In [None]:
#remove top 10 neighborhoods and store rest in new df
f_per_neigh_clean = force_per_neighborhood_df.loc[force_per_neighborhood_df['Force Count'] < TenthHighestNeigh]

#calculate total force count for all neighborhoods combined except for the highest force neighborhood
sumforce = top1_clean_df['Force Count'].sum()

#store highest force neighborhood in its own df
top1force_df = sorted_force.head(1)

top1force_df

In [None]:

#create dictionary to store values for summarized df
dict = {'Minneapolis Neighborhoods (Total:86)':[worst_neigh, "All Others"], 
        'Force Count': [top1force, sumforce]}

#pass dict into pd.DataFrame to generate df
Topvsrest_df = pd.DataFrame(dict)

#set index to "Minneapolis Neighborhoods" and display
Topvsrest_df = Topvsrest_df.set_index('Minneapolis Neighborhoods (Total:86)')

Topvsrest_df



In [None]:
Topvsrest_bar = Topvsrest_df.plot(kind="bar", figsize=(4,7), width=.5, color="red", alpha=.7, rot=0, fontsize=12, legend=False)
Topvsrest_bar.set_ylabel('Force Count',fontdict={'fontsize':12})
Topvsrest_bar.set_xlabel('Minneapolis Neighborhoods (Total:86)',fontdict={'fontsize':12})
Topvsrest_bar.set_title('Minneapolis Police Use of Force Distribution:' + '\n' + '(2008-2021)', pad=15,fontdict={'fontsize':14})

In [None]:
x = [top1force,sumforce]
explode = (.06,0)
textprops = {"fontsize":13}
colors = ["red","green"]
labels = ['Downtown West','All Others']

plt.pie(x, labels=labels, textprops=textprops, explode=explode, colors=colors, 
        autopct="%1.1f%%", shadow=True, startangle=90)
plt.axis("equal")
plt.title("Minneapolis Police Use of Force Distribution: Worst Neighborhood vs All Others (Total: 86)" + "\n" + "(2008-2021)")

plt.show()


# 5. Is the use of police force in Minneapolis going up or down? (Mike F)

# 6. Perform a linear regression on use of force between 2015 and 2019 and make a prediction about use of force in 2020 for the city of Minneapolis.  Compare this prediction to actual use of force outcomes in 2020.