In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
print("Setup Complete")

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Introduction
Recently police killings, namely killings of black people for crimes/or percieved crimes that were minor, has come to the national attention again after the killing of George Floyd in Minneapolis and the killing of Breonna Taylor in Lousiville. I wanted to look at the demographics of people killed by the police, including age, race, gender, and whether or not they were armed, in order to confirm that certain groups are disproportionately more likely to be killed by police. This notebook is based off of data compiled by the Washington Post.

# Loading and checking out data

In [None]:
police_shootings_filepath = "../input/fatal-police-shootings/fatal-police-shootings-data.csv"
police_shootings_data = pd.read_csv(police_shootings_filepath, index_col="date", parse_dates=True)
police_shootings_data.head()
police_shootings_data.tail()

# How old were people who were shot by police?
Based on the following chart, we can see that the vast majority of people are between roughly 15 and 60, with most beng between 20 and 40 years old. Based on the US population pyramid (which can be found here: https://en.wikipedia.org/wiki/Demographics_of_the_United_States), the victims of police shootings aren disproportionately young adults, which may be because children and the elderly are seen as less dangerous by cops. 

In [None]:
# KDE plot 
sns.kdeplot(data=police_shootings_data['age'], shade=True)

# What is the gender breakdown of people shot by police?
As we can see, 95.5% of the people shot by police over the given time period were male, while only 4.4% were female. In other words, the ratio of males to females shot was roughly 22:1. This is clearly disproportionate, according to Statista (statista.com/statistics/737923/us-population-by-gender/), the US population was 51% female and 49% male over the last decade, but nearly all of the people shot by cops were male. This may because males are more likely to commit crimes - according to Wikipedia "Statistics have been consistent in reporting that men commit more criminal acts than women (https://en.wikipedia.org/wiki/Sex_differences_in_crime). However, this alone is likely not enough to account for this discrepency, I hypothesize that men are seen as more threatening than women (due to gender roles and other factors such as physical stature) and thus cops are more likely to feel the need to shoot a male suspect than a female one. It may be that men are conditioned by society to be more aggressive than women and therefore are more likely to be violent/aggressive or percieved as such both if committing crimes or confronting officers. 

In [None]:
total = 0
male = 0
female = 0
unknown_gender = 0

for person in police_shootings_data['gender']:
    if (person == 'M') or (person == 'f'):        
        male += 1
        total += 1
    elif (person == 'F') or (person == 'f'):
        female += 1
        total += 1
    else:
        unknown_gender += 1
        total += 1
print('Total number of people shot:', total)
print('Total number of men shot:', male)
print('Total number of women shot:', female)
print('Total number of people shot with no gender listed:', unknown_gender)
print('Percentage of people shot who were male:', ((male/total)*100),'%')
print('Percentage of people shot who were female:', ((female/total)*100),'%')
print('Ratio of men to women who were shot:', (male/female), 'to 1')

# What is the racial breakdown of people shot by police?
According to the US Census Bureau (https://www.census.gov/quickfacts/fact/table/US/PST045219), in 2019 the US population was estimated to be 60.4% non-Hispanic white, 13.4% black people, 1.3% Native American, 5.9% Asian, 0.2% Native Hawaiian/Pacific islander, 2.7% biracial or multiracial, and 18.3% Latinx/Hispanic. 

In contrast, only 47% of those shot by police were white, 23.6% were black, 1.58% were Asian, 16.7% were Latinx, and 10.9% were other races. From this we can infer that white and Asian people are shot disproportionately less frequently, while black and Latinx people are disproportionately more likely to be shot by police. This could be due to racist stereotypes that paint black and Latinx people are more dangerous & violent (e.g. our dear president: "Sadly, the overwhelming amount of violent crime in our major cities is committed by blacks and hispanics-a tough subject-must be discussed..." (2013 Twitter); "When Mexico sends its people, they’re not sending their best...they’re bringing drugs. They’re bringing crime. They’re rapists. And some, I assume, are good people." (2015)), which makes cops see them as dangerous. White people obviously benefit from white privilege, and the perception of Asian people as a "model minority" may cause cops to see them as less dangerous and thus make cops less likely to shoot Asian people. 

In [None]:
total = 0
white = 0
black = 0
latinx = 0
asian = 0
other_races = 0

for person in police_shootings_data['race']:
    if (person == 'W') or (person == 'w'):        
        white += 1
        total += 1
    elif (person == 'B') or (person == 'b'):
        black += 1
        total += 1
    elif (person == 'H') or (person == 'h'):
        latinx += 1
        total += 1
    elif (person == 'A') or (person == 'a'):
        asian += 1
        total += 1
    elif (person != 'B') and (person != 'W') and (person != 'H') and (person != 'A'):
        other_races += 1
        total += 1
print('Total number of people shot:', total)
print('Total number of white people shot:', white)
print('Total number of black people shot:', black)
print('Total number of Asian people shot:', asian)
print('Total number of Latinx people shot:', latinx)
print('Total number of people of other races shot:', other_races)
print('Percentage of people shot who were white:', ((white/total)*100),'%')
print('Percentage of people shot who were black:', ((black/total)*100),'%')
print('Percentage of people shot who were Asian:', ((asian/total)*100),'%')
print('Percentage of people shot who were Latinx:', ((latinx/total)*100),'%')
print('Percentage of people shot who were other races:', ((other_races/total)*100), '%')

# How many people killed by cops were armed, and if so with what?
Based on the data below, we see that about 7% of people killed by police were unarmed, leaving 97% armed. Specfically 55.6% of all people killed by police were armed with guns, 14.6% were armed with knives, 7% were armed with vehicles, 3.6% were armed with toy weapons, and 12.2% were armed with other weapons. Without looking at race, most of the people killed by police were armed, so officers may have felt the need to use deadly force to defend themselves or others. However, it is also important to see if black/Latinx people killed by police were more likely to be unarmed than white/Asian people killed by police. 

In [None]:
# How many people were armed and what weapons did they have
total_unarmed = 0
total_gun = 0
total_knife = 0
total_vehicle = 0
total_toy_weapon = 0
total_other_weapon = 0
total_ppl = 0

for person in police_shootings_data['armed']:
    if person == 'unarmed':
        total_unarmed += 1
        total_ppl += 1
    elif person == 'gun':
        total_gun += 1
        total_ppl += 1
    elif person == 'knife':
        total_knife += 1
        total_ppl += 1
    elif person == 'vehicle':
        total_vehicle += 1
        total_ppl += 1
    elif person == 'toy weapon':
        total_toy_weapon += 1
        total_ppl += 1
    else:
        total_other_weapon += 1
        total_ppl += 1

print('Percentage of people shot who were unarmed:', ((total_unarmed/total_ppl)*100), '%')
print('Percentage of people shot who were armed with guns:', (((total_gun)/total_ppl)*100), '%')
print('Percentage of people shot who were armed with knives:', (((total_knife)/total_ppl)*100), '%')
print('Percentage of people shot who were armed with vehicles:', (((total_vehicle)/total_ppl)*100), '%')
print('Percentage of people shot who were armed with toy weapons:', (((total_toy_weapon)/total_ppl)*100), '%')
print('Percentage of people shot who were armed with other weapons:', (((total_other_weapon)/total_ppl)*100), '%')

# What percentage of white, black, Asian, and Latinx people killed by cops were unarmed?
As we can see from above, in the general population of people killed by cops, 7% were unarmed. When we look at the figures for people of varying races killed by cops, 6% of white people were unarmed, 10.7% of black people were unarmed, 1.81% of Asians were unarmed, and 7.4% of Latinx people were unarmed. As can be seen, Latinx and black people killed by cops are more likely to be unarmed than white/Asian people killed by cops. 

In [None]:
# Total number of people of each race shot
white_total = 1631
black_total = 818
latinx_total = 579
asian_total = 55

# Total number of people of each race shot, unarmed
white_unarmed = 0
black_unarmed = 0
latinx_unarmed = 0
asian_unarmed = 0 

# I got some help with this for loop from another CRLS student, Pratyush
for _, race, armed in police_shootings_data[['race', 'armed']].itertuples():
    if (race == 'W') and (armed == 'unarmed'):
        white_unarmed += 1
    elif (race == 'B') and (armed == 'unarmed'):
        black_unarmed += 1
    elif (race == 'H') and (armed == 'unarmed'):
        latinx_unarmed += 1
    elif (race == 'A') and (armed == 'unarmed'):
        asian_unarmed += 1

print('Percentage of white people shot who were unarmed:', ((white_unarmed/white_total)*100),'%')
print('Percentage of black people shot who were unarmed:', ((black_unarmed/black_total)*100),'%')
print('Percentage of Asian people shot who were unarmed', ((asian_unarmed/asian_total)*100),'%')
print('Percentage of Latinx people shot who were unarmed', ((latinx_unarmed/latinx_total)*100),'%')

# Conclusions
- People shot by cops are disproportionately young, generally between 20 and 40
- Men are are extremely overepresented in police shootings, making up 95% of people shot by police. 
- White & Asian people are disproportionately unlikely to be shot by cops, while Latinx & black people are disproportionately more likely to be shot by cops
- Without looking at race, 93% of people killed by police were armed in some way, mostly with guns or knives. 
- Latinx/black people killed by cops are more likely to have been unarmed than white/Asian people killed by police.

Overall, young black or Latino men are disproportionately likely to be shot by the police, even if unarmed. This broadly confirms what many activists have been pointing out, but I think that going through the process of finding the numbers yourself can help sway anyone who is still skeptical of the role of structural racism in policing. 