In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

%matplotlib inline

These are the abbreviations used by the original dataset:
* Gender
    * M = Male
    * F = Female
    * None = Unknown
* Race
    * W = White, non-Hispanic
    * B = Black, non-Hispanic
    * A = Asian
    * N = Native American
    * H = Hispanic
    * O = Other
    * None = Unknown

In [None]:
filepath = "../input/data-police-shootings/fatal-police-shootings-data.csv"
data = pd.read_csv(filepath)

In [None]:
# Keys for accessing columns in the data
MANNER = "manner_of_death"
ARMED = "armed"
AGE = "age"
GENDER = "gender"
RACE = "race"
CITY = "city"
STATE = "state"
SIGNS = "signs_of_mental_illness"
THREAT = "threat_level"
FLEE = "flee"
BODY = "body_camera"
data.head()

# Part 1: Data Visualization
First, let's look at general characteristics of the data.

## Manner of death

In [None]:
x = [(data[MANNER] == "shot").value_counts()[True], (data[MANNER] == "shot and Tasered").value_counts()[True]]
labels = ["Shot", "Shot and tasered"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Manner of death distribution")

## Armed

In [None]:
plt.xlabel("Frequency")
plt.ylabel("Type of armament")
plt.title("Top 10 Most Frequently Recorded Armament Types")
plt.barh(data[ARMED].value_counts()[:10].index, data[ARMED].value_counts()[:10].values)

## Age

In [None]:
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.title("Age distribution histogram")
plt.xticks([i * 10 for i in range(10)])
plt.hist(data[(np.isnan(data[AGE]) == False)][AGE]) # Skip data points where the age is nan

## Gender

In [None]:
x = [(data[GENDER] == "M").value_counts()[True], (data[GENDER] == "F").value_counts()[True]]
labels = ["M", "F"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Gender distribution of fatal police shootings")

## Race

In [None]:
x = [(data[RACE] == "W").value_counts()[True], 
     (data[RACE] == "B").value_counts()[True],
     (data[RACE] == "H").value_counts()[True],
     (data[RACE] == "A").value_counts()[True],
     (data[RACE] == "O").value_counts()[True],
     (pd.isnull(data[RACE])).value_counts()[True],
    ]
labels = ["White", "Black", "Hispanic", "Asian", "Other", "Unlabeled"]
plt.pie(x, labels=labels, autopct="%.2f", radius=3)
plt.legend()
plt.title("Race distribution")

## City

In [None]:
print(f"Fatal police shootings have occurred in {data[CITY].value_counts().size} cities")

In [None]:
plt.xlabel("Frequency")
plt.ylabel("City")
plt.title("Top 10 Cities with Most Fatal Police Shootings")
plt.barh(data[CITY].value_counts()[:10].index, data[CITY].value_counts()[:10].values)

## State

In [None]:
fig = px.choropleth(locations=data[STATE].value_counts().index, 
                    locationmode="USA-states", 
                    color=data[STATE].value_counts().values,
                    scope="usa", 
                    title="Fatal police shootings by state (color = number of fatalities)")
fig.show()

## Signs of mental illness

In [None]:
x = data[SIGNS].value_counts().values
labels = ["No signs of mental illness", "Signs of mental illness"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Distribution of mental illness in fatal police shootings")

## Types of threats

In [None]:
x = data[THREAT].value_counts().values
labels = data[THREAT].value_counts().index
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Types of threats in fatal police shootings")

# Fleeing

In [None]:
x = data[FLEE].value_counts().values
labels = data[FLEE].value_counts().index
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Flight in fatal police shootings")

# Body camera

In [None]:
x = data[BODY].value_counts().values
labels = ["Body camera not present", "Body camera present"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Presence of body cameras in fatal police shootings")

# Part 2: Case Study - Unarmed Fatal Police Shootings
Fatal police shootings in which the deceased were unarmed may receive high-profile coverage in the news. 
In this section, let's take a closer look at these cases to see if there are any characteristics unique to unarmed cases.

In [None]:
unarmed_data = data[data[ARMED] == "unarmed"]
unarmed_data.head()

## Unarmed Age Distribution

In [None]:
plt.figure(1)
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.title("Unarmed age distribution")
plt.xticks([i * 10 for i in range(10)])
plt.hist(unarmed_data[(np.isnan(unarmed_data[AGE]) == False)][AGE]) # Skip data points where the age is nan

plt.figure(2)
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.title("Overall age distribution")
plt.xticks([i * 10 for i in range(10)])
plt.hist(data[(np.isnan(data[AGE]) == False)][AGE]) # Skip data points where the age is nan

It seems that the age distribution for unarmed cases parallel that of cases overall.

## Unarmed Gender Distribution

In [None]:
plt.figure(1)
x = [(unarmed_data[GENDER] == "M").value_counts()[True], (unarmed_data[GENDER] == "F").value_counts()[True]]
labels = ["M", "F"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Unarmed gender distribution")

plt.figure(2)
x = [(data[GENDER] == "M").value_counts()[True], (data[GENDER] == "F").value_counts()[True]]
labels = ["M", "F"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Overall gender distribution")

There doesn't seem to be a significant difference between the unarmed and overall cases for gender distribution.

## Unarmed Race Distribution

In [None]:
plt.figure(1)
x = [(unarmed_data[RACE] == "W").value_counts()[True], 
     (unarmed_data[RACE] == "B").value_counts()[True],
     (unarmed_data[RACE] == "H").value_counts()[True],
     (unarmed_data[RACE] == "A").value_counts()[True],
     (unarmed_data[RACE] == "O").value_counts()[True],
     (pd.isnull(unarmed_data[RACE])).value_counts()[True],
    ]
labels = ["White", "Black", "Hispanic", "Asian", "Other", "Unlabeled"]
plt.pie(x, labels=labels, autopct="%.2f", radius=3)
plt.legend()
plt.title("Unarmed race distribution")

plt.figure(2)
x = [(data[RACE] == "W").value_counts()[True], 
     (data[RACE] == "B").value_counts()[True],
     (data[RACE] == "H").value_counts()[True],
     (data[RACE] == "A").value_counts()[True],
     (data[RACE] == "O").value_counts()[True],
     (pd.isnull(data[RACE])).value_counts()[True],
    ]
labels = ["White", "Black", "Hispanic", "Asian", "Other", "Unlabeled"]
plt.pie(x, labels=labels, autopct="%.2f", radius=3)
plt.legend()
plt.title("Overall race distribution")

* In 35% of unarmed fatal police shootings, the deceased were Black, compared to 24% for overall cases. 
* In 41% of unarmed fatal police shootings, the deceased were White, compared to 46% for overall cases. 
* In 1% of unarmed fatal police shootings, the deceased were Unlabeled, compared to 9% for overall cases. 

Overall, there do seem to be substantial differences in the racial distribution of unarmed cases compared to
overall cases.

## Unarmed City

In [None]:
plt.figure(1)
plt.xlabel("Frequency")
plt.ylabel("City")
plt.title("Top 10 Cities with Most Unarmed Fatal Police Shootings")
plt.barh(unarmed_data[CITY].value_counts()[:10].index, unarmed_data[CITY].value_counts()[:10].values)

plt.figure(2)
plt.xlabel("Frequency")
plt.ylabel("City")
plt.title("Top 10 Cities with Most Fatal Police Shootings")
plt.barh(data[CITY].value_counts()[:10].index, data[CITY].value_counts()[:10].values)

## Unarmed State

In [None]:
fig = px.choropleth(locations=unarmed_data[STATE].value_counts().index, 
                    locationmode="USA-states", 
                    color=unarmed_data[STATE].value_counts().values,
                    scope="usa", 
                    title="Unarmed fatal police shootings by state (color = number of fatalities)")
fig.show()

fig = px.choropleth(locations=data[STATE].value_counts().index, 
                    locationmode="USA-states", 
                    color=data[STATE].value_counts().values,
                    scope="usa", 
                    title="Overall fatal police shootings by state (color = number of fatalities)")
fig.show()

It seems that states with a greater number of overall cases also tend to have more unarmed cases.

## Unarmed Signs of Mental Illness

In [None]:
plt.figure(1)
x = unarmed_data[SIGNS].value_counts().values
labels = ["No signs of mental illness", "Signs of mental illness"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Unarmed signs of mental illness")

plt.figure(2)
x = data[SIGNS].value_counts().values
labels = ["No signs of mental illness", "Signs of mental illness"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Unarmed signs of mental illness")
plt.title("Overall signs of mental illness")

There doesn't appear to be a significant difference between unarmed and overall cases.

## Unarmed Threat Types

In [None]:
plt.figure(1)
x = unarmed_data[THREAT].value_counts().values
labels = unarmed_data[THREAT].value_counts().index
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Unarmed types of threats")


plt.figure(2)
x = data[THREAT].value_counts().values
labels = data[THREAT].value_counts().index
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Overall types of threats")

* In 39% of unarmed cases, the deceased were attacking.
* In 64% of overall cases, the deceased were attacking.
* It seems that the deceased were less likely to have been attacking if they were unarmed.

## Unarmed Fleeing

In [None]:
plt.figure(1)
x = unarmed_data[FLEE].value_counts().values
labels = unarmed_data[FLEE].value_counts().index
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Overall flight")

plt.figure(2)
x = data[FLEE].value_counts().values
labels = data[FLEE].value_counts().index
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Overall flight")

## Unarmed Body Camera

In [None]:
plt.figure(1)
x = unarmed_data[BODY].value_counts().values
labels = ["Body camera not present", "Body camera present"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Unarmed presence of body cameras")

plt.figure(2)
x = data[BODY].value_counts().values
labels = ["Body camera not present", "Body camera present"]
plt.pie(x, labels=labels, autopct="%.2f", radius=1.2)
plt.legend()
plt.title("Overall presence of body cameras")