# Police Use of Force in Minneapolis, MN

In [None]:
#Dependencies
import matplotlib
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd
from scipy.stats import linregress

In [None]:
#Import csv file
police_data = pd.read_csv("Resources/Police_Use_of_Force.csv")
police_data

In [None]:
#create new DataFrame
police_data_df = police_data.loc[:, ["PoliceUseOfForceID", "CaseNumber", "ResponseDate", "Problem", "ForceType",
                                    "ForceTypeAction", "EventAge", "TypeOfResistance", "Precinct", "Neighborhood"]]
police_data_df

In [None]:
#drop NaN from Dataframe
police_data_df = police_data_df.dropna(how='any')
police_data_df

In [None]:
#rename rows in ForceTypeAction column
police_data_df['ForceTypeAction'] = police_data_df['ForceTypeAction'].replace({
    "Knee": "Knees",
    "Body Weight Pin": "Body Weight to Pin",
    "Conscious Neck Restraint": "Neck Restraint: subject did not lose conciousness",
    "Punch": "Punches",
    "Slap": "Slaps",
    'Unconscious Neck Restraint': 'Neck Restraint: subject did not lose conciousness',
    'Firearm as striking tool': 'Firearms As Striking Tool'
})


In [None]:
#rename rows in ForceType column
police_data_df['ForceType'] = police_data_df['ForceType'].replace({
    "Less Lethal Projectile": "Less Lethal"
})

In [None]:
#rename rows in TypeOfResistance column
police_data_df['TypeOfResistance'] = police_data_df['TypeOfResistance'].replace({
    "TENSED": "Tensed",
    "tensed": "Tensed",
    "Tensed                                                                         ": "Tensed",
    "verbal non-compliance": "Verbal Non-Compliance",
    "Verbal Non-Compliance                                                          ": "Verbal Non-Compliance",
    "commission of crime":"Commission of Crime",
    "Commission of a Crime                                                          ":"Commission of Crime",
    "COMMISSION OF CRIME":"Commission of Crime",
    "Fled on Foot                                                                   ":"Fled on Foot",
    "Fled in a Vehicle                                                              ":"Fled in Vehicle",
    "Assaulted Officer                                                              ":"Assaulted Officer",
    "Assaulted Police K9                                                            ":"Assaulting Police K9",
    "Other                                                                          ":"Other"
           
})

In [None]:
#rename columns in Dataframe
clean_df = police_data_df.rename(columns ={
    'PoliceUseOfForceID': "Police ID",
    'CaseNumber': "Case Number",
    'ResponseDate': "Response Date",
    'Problem': "Type of Crime",
    'ForceType': 'Type of Force',
    'ForceTypeAction': 'Type of Force Action',
    'EventAge': 'Event Age',
    'TypeOfResistance': 'Type of Resistance'
})
#reset the index in DataFrame
clean_df.reset_index(drop=True, inplace=True)
clean_df

# 1. What incidents result the most in police use of force? (Sam)

In [None]:
test_data = clean_df["Type of Crime"].describe(include="all")
test_data

In [None]:
incident_data = clean_df.groupby("Type of Crime")
total_count = incident_data.nunique()["Case Number"]

incidents = pd.DataFrame({"Total Count": total_count})
top_incidents = incidents.sort_values("Total Count", ascending=False)

top_incidents.index.name = "Incidents"
top_incidents.head()

In [None]:
top_incidents.head(5)
top_incidents.head(5).plot(kind="bar", edgecolor="black", facecolor="purple",fontsize=10)
plt.title("Top 5 Incidents Resulting in Police Force in Minneapolis, MN")
plt.ylabel("Number of Cases")

# Incident Analysis:
-When looking at the data, it is evident that the number one incident that results in the use of police force in Minneapolis is when there is a suspicious person (2341).

-There could be many reasons why suspicious person is the number one incident resulting in police force. For instance, the suspect could have been caught in the middle of commiting a crime and will do anything it takes to get out of the situation. Another reason is the suspect could be exhibiting unusual behavior because they are either mentally unstable or on drugs. 

-Since every case is different. It is hard to pinpoint an exact reason why suspicious person is the number one incident resulting in police force. 

In [None]:
# Sam's code ends here

# 2. What type of force is used the most? (Ainash)

In [None]:
#find all types of force
force_type_ainash = clean_df['Type of Force']
force_type_ainash.unique()

In [None]:
# find the most used types of force
force_data_ainash = clean_df[['Type of Force', 'Case Number']]
force_data_groupby_ainash = force_data_ainash.groupby('Type of Force')
force_data_count_ainash = force_data_groupby_ainash['Type of Force'].count()
most_force_type_ainash = pd.DataFrame({'Total Count': force_data_count_ainash})
sorted_ainash = most_force_type_ainash.sort_values(['Total Count'], ascending = False)
sorted_ainash

In [None]:
# create a bar chart to show the Top 5 most used types of force
top5_force_bar_ainash = sorted_ainash.head().plot(kind="bar",edgecolor="black", facecolor="purple", fontsize=12, align="center", alpha=1.0)
plt.title('The Top 5 most used Type of Force by Police in Minneapolis, MN area')
top5_force_bar_ainash.set_xlabel ('Type of Force')
top5_force_bar_ainash.set_ylabel ('Number of Cases')
plt.show()
plt.tight_layout()

In [None]:
#find all types of force action
force_type_action_ainash = clean_df['Type of Force Action']
force_type_action_ainash.nunique()

In [None]:
# find the most used type of force and its action
merged_force_type_data_ainash = clean_df[['Type of Force', 'Type of Force Action', 'Case Number']]
merged_force_type_data_ainash = merged_force_type_data_ainash.groupby(['Type of Force', 'Type of Force Action'])
merged_force_type = merged_force_type_data_ainash['Type of Force Action'].count()
merged_ainash = pd.DataFrame({'Total Count': merged_force_type})
sorted_merged_ainash = merged_ainash.sort_values(['Total Count'], ascending = False)

In [None]:
# create a bar chart to show the Top 5 most used types of force and its action
top5_merged_bar = sorted_merged_ainash.head().plot(kind="bar",edgecolor="black", facecolor="purple", fontsize=12, align="center", alpha=1.0)
plt.title('The Top 5 most used Type of Force and it Action by Police in Minneapolis, MN area')
top5_merged_bar.set_xlabel ('Type of Force and it Action')
top5_merged_bar.set_ylabel ('Number of Cases')
plt.show()
plt.tight_layout()

In [None]:
# drop ages =0
age_df = clean_df.loc[clean_df['Event Age'] != 0, :]

In [None]:
#create bins for ages
bins = [6,9.9,14.9,19.9,29.9,39.9,49.9,59.9,69.9,79.9,89.9]
group_names = ['6-10', '11-15', '16-20','21-30','31-40','41-50','51-60', '61-70', '71-80', '81 <']

In [None]:
age_df['Age Groups'] = pd.cut(age_df['Event Age'], bins, labels = group_names, include_lowest = True)

In [None]:
age_data_ainash = age_df[['Age Groups', 'Case Number']]
age_data_groupby_ainash = age_data_ainash.groupby('Age Groups')
age_count_ainash = age_data_groupby_ainash['Age Groups'].count()
age_df_ainash = pd.DataFrame({'Total Count': age_count_ainash})

In [None]:
age_bar_ainash = age_df_ainash.plot(kind="bar",edgecolor="black", facecolor="purple", fontsize=12, align="center", alpha=1.0)
plt.title('Force used by Police in Minneapolis, MN area by Age')
top5_force_bar_ainash.set_xlabel ('Age Groups')
top5_force_bar_ainash.set_ylabel ('Number of Cases')
plt.show()
plt.tight_layout()

# Types of Force used by Police in Minneapolis,MN Analysis
## When analyzing types of Force used by Police in Minneapolis, Minnesota, we observed that:
* There are 10 types of Force that Police in Minneapolis, MN use in incidents: Bodily Force, Chemical Irritant, Taser, Improvised Weapon, Baton, Police K9 Bite, Firearm, Less Lethal Projectile, Gun Point Display, Maximal Restraint Technique. From these force types, the five most commonly used by law enforcement officers in Minneapolis are: Bodily Force (21212 cases among analyzed data), Chemical Irritant (3780 cases), Taser(2757 cases), Gun Point Display (436 cases) and Improvised weapon (343 cases).
* Types of Force also have sub-categories that are called Types of Force Action. Our data includes 41 types of Force Action. The Top 5 most used Types of Force Action by Police in Minneapolis, MN area are Body Weight to Pin (Bodily Force type), Punches (Bodily Force type), Joint Lock (Bodily Force type), Personal Mace (Chemical Irritant type) and Push Away (Bodily Force type).
* We also decided to analyze the police cases by Age Groups. After analysis of Force Police used by Age Groups, we observed that most commonly, the most force is used on people between age 21-30. 

In [None]:
# Ainash's code ends here

# 3. Which is the most popular type of resistance? (Humera)

In [None]:
popular_data = clean_df["Type of Resistance"].describe(include = "all")
popular_data

In [None]:
resistance_data = clean_df.groupby("Type of Resistance")
total_count = resistance_data.nunique()["Case Number"]
total_count

In [None]:
resistance = pd.DataFrame({"Total Count": total_count})
top_resistance = resistance.sort_values("Total Count", ascending=False)
top_resistance

In [None]:
top_resistance.index.name = "Resistance"
top_resistance.head()

In [None]:
top_resistance.head(5).plot(kind="barh",edgecolor="black", facecolor="purple", fontsize=10)
plt.title("Top 10 Resistances Resulting in Use of Police Force in Minneapolis, MN")
plt.ylabel("Number of Cases", fontsize=12)
plt.xlabel("Resistance", fontsize=12)
plt.show()

In [None]:
#Pie chart
colors = ['gold', 'red', 'pink', 'blue', 'lightskyblue']
explode = (0.1, 0, 0, 0, 0)
top_resistance.head(5).plot(kind="pie",explode=explode, colors=colors, autopct= "%1.1f%%", subplots="False", shadow=True, startangle=150 )
plt.title('Top 5 - Type of Resistance Resulting in Use of Police Force in Minneapolis', fontsize=10)
plt.axis('equal')
plt.tight_layout()
plt.show()

In [None]:
# Type of Resistance and Force Type Vs Number of Cases
ft_tr = clean_df.groupby(['Type of Resistance','Type of Force'])
ft_tr.count().head()

In [None]:
# Top 5 - Type of Resistance and Force Type Vs Number of Cases
ft_tr= clean_df.groupby(['Type of Resistance','Type of Force'])
total_count = ft_tr.nunique()["Case Number"]
forcetype_resistance = pd.DataFrame({"Total Count": total_count})
forcetype_resistance = forcetype_resistance.sort_values("Total Count", ascending=False)
forcetype_resistance.head()

In [None]:
# Type of Resistance and Force Type Vs Number of Cases
forcetype_resistance.head(5).plot(kind="barh",edgecolor="black", facecolor="purple", fontsize=12, align="center", alpha=1.0)
plt.title("Type of Resistance and Force Type Vs Number of Cases")
plt.ylabel("Type of Resistance_Force Type", fontsize=12)
plt.xlabel("Number of Cases", fontsize=12)
plt.show()

In [None]:
# Type of Resistance and Type of Crime Vs Number of Cases
tr_cr = clean_df.groupby(['Type of Resistance','Type of Crime'])
tr_cr.count().head(5)

In [None]:
# Top 5 -Type of Resistance and Type of Crime Vs Number of Cases
tr_cr= clean_df.groupby(['Type of Resistance','Type of Crime'])
total_count = tr_cr.nunique()["Case Number"]
resistance_crime = pd.DataFrame({"Total Count": total_count})
resistance_crime = resistance_crime.sort_values("Total Count", ascending=False)
resistance_crime.head()

In [None]:
# Top 5 - Type of Resistance and Type of Crime Vs Number of Cases
resistance_crime.head(5).plot(kind="barh",edgecolor="black", facecolor="purple", fontsize=12, align="center", alpha=1.0)
plt.title("Type of Resistance and Type of Crime Vs Number of Cases")
plt.ylabel("Resistance_crime", fontsize=12)
plt.xlabel("Number of Cases", fontsize=12)
plt.show()

In [None]:
# Type of Resistance and Type of Crime Vs Number of Cases
tr_rd = clean_df.groupby(['Type of Resistance','Event Age'])
tr_rd.count().head(5)

In [None]:
tr_rd= clean_df.groupby(['Type of Resistance','Event Age'])
total_count = tr_rd.nunique()["Case Number"]
resistance_age = pd.DataFrame({"Total Count": total_count})
resistance_age = resistance_age.sort_values("Total Count", ascending=False)
resistance_age.head()

In [None]:
# Top 5 - Type of Resistance and Event Age Vs Number of Cases
resistance_age.head(5).plot(kind="barh",edgecolor="black", facecolor="purple", fontsize=12, align="center", alpha=1.0)
plt.title("Type of Resistance and Event Age Vs Number of Cases")
plt.ylabel("Resistance_age", fontsize=12)
plt.xlabel("Number of Cases", fontsize=12)
plt.show()

# Analysis - Types of Resistance used towards Police Force in Minneapolis 


We analysed the most popular type of resistance by:-

. creating a dataframe with type of resistance and found the Total count was 29075 and freq is 8120 and the most common              resistance seen was getting tensed.

. Creating a groupby with type of Resistance and case number, found top resistances were being Tensed(3708), committed verbal        Non-compliance(1762), Assaulted an Officer (983), fled in a vehicle(301).


Type of Resistance and Force Type Vs Number of Cases - The most type of resistance used against 
bodily force was either being Tensed or commission of Crime or Fled on Foot.

Type of Resistance and Type of Crime Vs Number of Cases - if the crime was of suspicious person then the resistance was being tensed or fled on foot.
whereas if the type of crime was fight then resistance was commission of crime.

Type of Resistance and Event Age Vs Number of Cases - most type of resistance used was being Tensed and commission of crime between the ages of 21 and 25.


In [None]:
# Humera's code ends here

# 4. Which neighborhood in Minneapolis contains the highest frequency of police force? (Mike Hills)

In [None]:
#pull out neighborhood and forcetype from clean df
#group by neighborhood and calculate the total force count per neighborhood
force_per_neighborhood_df = clean_df.loc[:,['Neighborhood','Type of Force']]
force_group = force_per_neighborhood_df.groupby('Neighborhood')
sum_neigh = force_group['Type of Force'].count()

#place results into new df, sort by force count from highest to lowest; store and display
force_per_neighborhood_df = pd.DataFrame({'Force Count': sum_neigh})
sorted_force_df = force_per_neighborhood_df.sort_values(['Force Count'], ascending=False)
sorted_force_df

In [None]:
#store top 5 highest total force per neighborhood into its own df and display
top5force_df = sorted_force_df.head(5)
top5force_df

In [None]:
#create bar chart to show relative distribution of 5 highest force count neighborhoods
Top5bar = top5force_df.plot(kind="bar", figsize=(20,13), width=.25, color="red", alpha=.8, rot=0, fontsize=20, legend=False)
Top5bar.set_ylabel('Force Count',fontdict={'fontsize':24})
Top5bar.set_xlabel('Neighborhood',fontdict={'fontsize':24})
Top5bar.set_title('Minneapolis Police Use of Force Distribution per Neighborhood: 5 Highest' + '\n' + '(2008-2021)', pad=15,fontdict={'fontsize':25})

In [None]:
#find and store neighborhood with highest force count
worst_neigh = sorted_force_df.index[0]

#find and store highest force count
top1force = top5force_df['Force Count'][0]

#remove highest force count row and store into new df
top1_clean_df = force_per_neighborhood_df.loc[force_per_neighborhood_df['Force Count'] < top1force]

#calculate total force count for all neighborhoods combined except for the highest force neighborhood
sumforce = top1_clean_df['Force Count'].sum()

In [None]:
#create dictionary to store values for summarized df
dict = {'Minneapolis Neighborhoods (Total:86)':[worst_neigh, "All Others"], 
        'Force Count': [top1force, sumforce]}

#pass dict into pd.DataFrame to generate df
Topvsrest_df = pd.DataFrame(dict)

#set index to "Minneapolis Neighborhoods (Total:86)" and display
Topvsrest_df = Topvsrest_df.set_index('Minneapolis Neighborhoods (Total:86)')

#df showing force count for highest frequency neighborhood vs. all others
Topvsrest_df

In [None]:
#create pie chart to show percentage distribution of highest frequency neighborhoods vs. all others 
x = [top1force,sumforce]
explode = (.06,0)
textprops = {"fontsize":13}
colors = ["red","green"]
labels = ['Downtown West','All Others']

plt.pie(x, labels=labels, textprops=textprops, explode=explode, colors=colors, 
        autopct="%1.1f%%", shadow=True, startangle=90)
plt.axis("equal")
plt.title("Minneapolis Police Use of Force Events:"+ "\n" +" Highest Frequency Neighborhood vs. All Others (Total: 86)" + "\n" + "(2008-2021)")

plt.show()


# Findings and Analysis: Which neighborhood in Minneapolis contains the highest frequency of police force?

•	Of 86 total neighborhoods in Minneapolis, “Downtown West” contained 25.9% of all police use of force events from 2008 – 2021, with a force count of 7,535 out of 29,075 total use of force events.

•	Of the top 5 neighborhoods with the highest use of force, Downtown West contained the majority, or 60% of the total use of force.

•	Some potential avenues of analysis to help explain the skewed distribution of force events per neighborhood could be:
-	Variability of poverty and socioeconomic distribution of each neighborhood compared to the entire city of Minneapolis.
-	Geographic distribution of race and gender by neighborhood, which could indicate a certain probability of racial and/or gender profiling bias by police. 

(Note: Our data analysis omitted gender and race data from the original CSV to avoid questionable-cause logical fallacies.  However, these data could be useful in illustrating trends or biases within the psychology of individuals and/or the psychosociology of society, which is beyond the scope of our analysis.)


In [None]:
# Mike H's code ends here

# 5. Is the use of police force in Minneapolis going up or down? (Mike F)

In [None]:
#Make a basic histogram, sorted by month, of incidents of police force used.
import datetime;
police_data_time_df = police_data.loc[:, ["PoliceUseOfForceID", "CaseNumber", "ResponseDate", "Problem", "ForceType",
                                    "ForceTypeAction", "EventAge"]];
police_data_time_df["Month"] = pd.to_datetime(police_data_time_df["ResponseDate"]).dt.to_period('m');
police_data_time_df["MonthNumeric"] = police_data_time_df["Month"].apply(lambda x: int(str(x)[0:4]) + (int(str(x)[5:7])) / 12);
police_data_time_df = police_data_time_df[police_data_time_df["MonthNumeric"]>=2008];
print(f'The number of unique months with recorded data is {len(police_data_time_df["MonthNumeric"].unique())}');
timehist = plt.hist(police_data_time_df["MonthNumeric"], bins=158);


In [None]:
#Make a dictionary of incidents, indexed by months since 2008.
monthIndex = {}
#Month 1 represents January, 2008
for x in range(1,158):
    #monthEquiv is the value for that month in the police_data_time_df dataframe
    monthEquiv = 2008+(x/12)
    monthIndex[x] = [round(monthEquiv,2), len(police_data_time_df[police_data_time_df["MonthNumeric"]==monthEquiv])]

incidents_by_month_df = pd.DataFrame.from_dict(monthIndex, orient='index', columns=["Month","Incidents"])
#Overall Data
timeplot = plt.scatter(incidents_by_month_df["Month"],incidents_by_month_df["Incidents"])
#Linear Regression Code
(slope, intercept, rvalue, pvalue, stderr) = linregress(incidents_by_month_df["Month"], incidents_by_month_df["Incidents"])
regress_values = incidents_by_month_df["Month"] * slope + intercept
plt.plot(incidents_by_month_df["Month"],regress_values,"r-")

plt.show()

In [None]:
#The data appears to have a strong cyclical trend, possibly according to time of year.
#In order to see if this is relevant, we'll separate these data sets by month and see if they have a more smooth pattern over
#multiple years.

#incidents_by_month_df

incidents_jan = incidents_by_month_df[(incidents_by_month_df.index%12)==1]
plt.scatter(incidents_jan["Month"],incidents_jan["Incidents"])

In [None]:
incidents_april = incidents_by_month_df[(incidents_by_month_df.index%12)==4]
plt.scatter(incidents_april["Month"],incidents_april["Incidents"])

In [None]:
incidents_july = incidents_by_month_df[(incidents_by_month_df.index%12)==7]
plt.scatter(incidents_july["Month"],incidents_july["Incidents"])

In [None]:
incidents_october = incidents_by_month_df[(incidents_by_month_df.index%12)==10]
plt.scatter(incidents_october["Month"],incidents_october["Incidents"])

For the selected tested months, there does not actually seem to be a consistent trend, which is strong evidence against the hypothesis that there is a cyclical or seasonal influence on police uses of force. If this trend were to explain the intra-year variances between months, then the scatter plots of uses of force by month would be much closer to a linear progression instead of the unpredictable rises and falls that are actually seen upon filtering by month.

##Overall data indicates a downward trend in overall police use of force.
There are several things to consider in this - that the data is highly variable from month to month, and that the overall trend is downward, but that there is a significant rise at the end of the data. This suggests that police use of force is correlated with ongoing political changes that themselves have an overall trend, but that exhibit significant variation. The lowest months on record are mid to late 2020, which correspond to an increase in discourse of police use of force after the high-profile case of George Floyd, who was killed by excessive and unwarranted use of police force, which resulted in intense public scrutiny of police force used. That police use of force rebounded abruptly in 2021 is troubling and warrants further scrutiny.

In [None]:
# Mike F's code ends here

# 6. Perform a linear regression on use of force between 2015 and 2019 and make a prediction about use of force in 2020 for the city of Minneapolis.  Compare this prediction to actual use of force outcomes in 2020. (Mike H)

In [None]:
#pull out Response Date and Type of Force from clean df and store
date_force_df = clean_df.loc[:,['Response Date','Type of Force']]

#find all response dates containing 2015 - 2019 and store
searchfor = ['2015', '2016', '2017', '2018', '2019']
df_2015_2019 = date_force_df[date_force_df['Response Date'].str.contains('|'.join(searchfor))]

#divide 2015 - 2019 in separate dfs
df_2015 = date_force_df[date_force_df['Response Date'].str.contains("2015")]
df_2016 = date_force_df[date_force_df['Response Date'].str.contains("2016")]
df_2017 = date_force_df[date_force_df['Response Date'].str.contains("2017")]
df_2018 = date_force_df[date_force_df['Response Date'].str.contains("2018")]
df_2019 = date_force_df[date_force_df['Response Date'].str.contains("2019")]

#store 2020 df
df_2020 = date_force_df[date_force_df['Response Date'].str.contains("2020")]

#display condensed df with only response date and type of force
date_force_df

In [None]:
#checking to make sure the length of 2015 - 2019 dfs is equal to length of 2015-2019 dfs to ensure no missing values
len(df_2015_2019) == len(df_2015) + len(df_2016) + len(df_2017) + len(df_2018) + len(df_2019)

In [None]:
#summary stats
#total force incidents from 2015 - 2020
TotalIncidents = len(df_2015_2019) + len(df_2020)
TotalIncidents

In [None]:
#mean force incidents per year
MeanIncidents = TotalIncidents / 6
MeanIncidents

In [None]:
#total number of each type of force incidents per year
breakdown = date_force_df['Type of Force'].value_counts()
breakdown

In [None]:
#find and store total bodily force incidents for each year
sum2015 = df_2015['Type of Force'].count()
sum2016 = df_2016['Type of Force'].count()
sum2017 = df_2017['Type of Force'].count()
sum2018 = df_2018['Type of Force'].count()
sum2019 = df_2019['Type of Force'].count()

#create dictionary of total bodily force incidents for each year
dict = {'Year':[2015,2016,2017,2018,2019], 
        'Total Bodily Force Incidents': [sum2015,sum2016,sum2017,sum2018,sum2019]}

#pass dict into pd.DataFrame to generate df
tot_bodyforce_df = pd.DataFrame(dict)
tot_bodyforce_df

In [None]:
#find and store years for x-values
year = tot_bodyforce_df.iloc[:,0]

#store year 2020
year2020 = 2020

#find and store total bodily force incidents for 2020
totbodyforce2020 = len(df_2020)

#find and store total bodily force incidents per year for y-values
tot_bodyforce = tot_bodyforce_df.iloc[:,1]

plt.xlabel('Year')
plt.ylabel('Total Bodily Force Incidents')
plt.xticks(year)

#perform a linear regression on total bodily force incidents versus years 2015 - 2019
bf_slope, bf_int, bf_r, bf_p, bf_std_err = stats.linregress(year, tot_bodyforce)

#create equation of line to calculate predicted total bodily force incidents
bf_fit = bf_slope * year + bf_int

#plot the linear model on top of scatter plot
plt.scatter(year, tot_bodyforce)
plt.plot(year,bf_fit,"--")

print(f"The r-squared is: {bf_r**2}")
plt.show()

In [None]:
#store total incidents per type of force for each year
force_counts_2015 = df_2015['Type of Force'].value_counts()
force_counts_2016 = df_2016['Type of Force'].value_counts()
force_counts_2017 = df_2017['Type of Force'].value_counts()
force_counts_2018 = df_2018['Type of Force'].value_counts()
force_counts_2019 = df_2019['Type of Force'].value_counts()
force_counts_2020 = df_2020['Type of Force'].value_counts()

#store bodily force incidents (*NOT TOTAL BODILY FORCE INCIDENTS*) for each year
bodilyforce2020 = force_counts_2020.loc['Bodily Force']
bodilyforce2019 = force_counts_2019.loc['Bodily Force']
bodilyforce2018 = force_counts_2018.loc['Bodily Force']
bodilyforce2017 = force_counts_2017.loc['Bodily Force']
bodilyforce2016 = force_counts_2016.loc['Bodily Force']
bodilyforce2015 = force_counts_2015.loc['Bodily Force']

#create dictionary of bodily force incidents for each year
dict = {'year':[2015,2016,2017,2018,2019], 
        'Bodily Force Count': [bodilyforce2015,bodilyforce2016,bodilyforce2017,bodilyforce2018,bodilyforce2019]}

#pass dict into pd.DataFrame to generate df
bodyforce_df = pd.DataFrame(dict)
bodyforce_count = bodyforce_df['Bodily Force Count']

#store start year and end year for subplots
startyear = bodyforce_df['year'].min()
endyear = bodyforce_df['year'].max()

b_slope, b_int, b_r, b_p, b_std_err = stats.linregress(year, bodyforce_count)

b_fit = b_slope * year + b_int

#find and store body force incidents for 2020
bodyforce2020 = len(df_2020.loc[df_2020['Type of Force'] == "Bodily Force"])

In [None]:
#find and store taser incidents for each year
taser2020 = force_counts_2020.loc['Taser']
taser2019 = force_counts_2019.loc['Taser']
taser2018 = force_counts_2018.loc['Taser']
taser2017 = force_counts_2017.loc['Taser']
taser2016 = force_counts_2016.loc['Taser']
taser2015 = force_counts_2015.loc['Taser']

#create dictionary of taser incidents for each year
dict = {'year':[2015,2016,2017,2018,2019], 
        'Taser Count': [taser2015,taser2016,taser2017,taser2018,taser2019]}

#pass dict into pd.DataFrame to generate df
taser_df = pd.DataFrame(dict)
taser_count = taser_df['Taser Count']

t_slope, t_int, t_r, t_p, t_std_err = stats.linregress(year, taser_count)

t_fit = t_slope * year + t_int

taser2020 = len(df_2020.loc[df_2020['Type of Force'] == "Taser"])

In [None]:
#find and store chemical irritant incidents for each year
chem2020 = force_counts_2020.loc['Chemical Irritant']
chem2019 = force_counts_2019.loc['Chemical Irritant']
chem2018 = force_counts_2018.loc['Chemical Irritant']
chem2017 = force_counts_2017.loc['Chemical Irritant']
chem2016 = force_counts_2016.loc['Chemical Irritant']
chem2015 = force_counts_2015.loc['Chemical Irritant']

dict = {'year':[2015,2016,2017,2018,2019], 
        'Chemical Irritant Count': [chem2015,chem2016,chem2017,chem2018,chem2019]}

#pass dict into pd.DataFrame to generate df
chem_df = pd.DataFrame(dict)
chem_count = chem_df['Chemical Irritant Count']

c_slope, c_int, c_r, c_p, c_std_err = stats.linregress(year, chem_count)

c_fit = c_slope * year + c_int

chem2020 = len(df_2020.loc[df_2020['Type of Force'] == "Chemical Irritant"])

In [None]:
#generate subplots of bodily force, taser, chemical irritant, and total incidents
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(sharex=True, nrows=2, ncols=2, gridspec_kw={'width_ratios': [3, 3]})
fig.suptitle("Bodily Force Incidents", fontsize=13, fontweight="bold")

ax1.set_xlim(startyear, endyear)
ax1.plot(year, taser_count, linewidth=1, marker="o")
ax1.plot(year, t_fit, "b--", linewidth=1)
ax1.set_ylabel("Taser")

ax2.plot(year, bodyforce_count, linewidth=1, marker="o", color="r")
ax2.plot(year, b_fit, "r--", linewidth=1)
ax2.set_ylabel("Bodily Force")

ax3.plot(year, chem_count, linewidth=1, marker="o", color="b")
ax3.plot(year, c_fit, "b--", linewidth=1)
ax3.set_ylabel("Chemical Irritant")
ax3.set_xlabel("Year")

ax4.plot(year, tot_bodyforce, linewidth=1, marker="o", color="g")
ax4.plot(year, bf_fit, "g--", linewidth=1)
ax4.set_ylabel("Total Incidents")
ax4.set_xlabel("Year")

plt.tight_layout(pad=2)
plt.show()

In [None]:
#print out predicted vs. actual incidents for each graph

print(f"The total taser incidents in 2020 will be {round(t_slope * year2020 + t_int,0)}.")
print(f"The actual total taser incidents in 2020 were {taser2020}.\n")

print(f"The total bodily force incidents in 2020 will be {round(b_slope * year2020 + b_int,0)}.")
print(f"The actual total bodily force incidents in 2020 were {bodyforce2020}.\n")

print(f"The total chemical irritant incidents in 2020 will be {round(c_slope * year2020 + c_int,0)}.")
print(f"The actual total chemical irritant incidents in 2020 were {chem2020}.\n")

print(f"The total force incidents in 2020 will be {round(bf_slope * year2020 + bf_int,0)}.")
print(f"The actual total force incidents in 2020 were {totbodyforce2020}.")

# Linear Regression Analysis

•	The total incidents of force were calculated based on 10 “Type of Force” categories, while 3 were chosen at random for observational comparison:  “Chemical Irritant”, “Taser”, and “Bodily Force.”

•	Based on the subplots, Taser showed an overall increase, Bodily Force showed an overall decrease (with a slight increase from 2017 to 2019).  Chemical Irritant showed a sharp overall decrease.  From this we can observe that Taser and Bodily Force slowed the overall downward trend in total incidents, particularly from years 2017 – 2019, while Chemical Irritant followed the overall downward trend.

•	The total number of bodily force incidents between 2015 and 2020 was 11,355 with an average of 1,893 incidents per year.  A plot of total bodily force vs. year for 2015 – 2019 shows a downward trend in use of force.  A linear regression model follows this trend with an r-squared value of 0.750, implying that 75% of the observed data fits the regression model.  

•	Total incidents for 2015 and 2020 were 2,038 and 1,718, respectively, showing a drop in use of force by 15.7%.  

•	Based on the regression model, the predicted use of force events in 2020 was 1,595, which is 7% lower than the actual use of force in 2020.

•	Overall, the regression model predicted all four analyzed categories of force to be lower than actual, and the observed data trends align closely with the predictions.
