# Understanding US Police Shooting Fatality

Let's get granular police shooting data to check if what we hear in the hears about police shooting fatality is consistent with actual data.

Source: Phil Mui

Reference: bit.ly/mui-asdrp

In [None]:
# Police Shooting in 2020
us_2020_url = 'http://findmarley.org/the-data/database/2020'
us_2019_url = 'http://findmarley.org/the-data/database/2019'

us_2020_file = '../data/marley-police-fatality-2020june.csv'
us_2019_file = '../data/marley-police-fatality-2019.csv'

In [None]:
import numpy as np
import pandas as pd

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
import seaborn as sns
import ssl

ssl._create_default_https_context = ssl._create_unverified_context
%matplotlib inline

In [None]:
# df = pd.read_html(us_2020_url)

In [None]:
df = pd.read_csv(us_2019_file, encoding='utf-8', index_col='ID', parse_dates=True)

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
df.head(3)

In [None]:
# need to sort index by Date

df['DATE'] = pd.to_datetime(df.DATE)
df.sort_values(by='DATE')
df.head(5)

In [None]:
daily_count_series = df.groupby('DATE')['AGE'].count()
daily_count_series.shape

In [None]:
daily_count_series.head(5)

In [None]:
fig = plt.figure(figsize=(14,6)); np.random.seed(123)

plt.title("2019 Daily Police Fatal Shooting in US")
plot_ = sns.lineplot(data=daily_count_series)

plt.ylabel('Daily Fatality')
plt.xlabel('Date')

plt.MaxNLocator(12)

In [None]:
print(plot_.get_xticklabels())

In [None]:
daily_count_series.tail(5)

### Age distribution

In [None]:
age_death_input = df.groupby('AGE')['DATE'].size()

In [None]:
print(type(age_death_input))
age_death_input.head(5)

In [None]:
del age_death_input[0]
age_death_input.head(5)

In [None]:
# remove AGE=0 which means unspecified AGE
age_death_input.shape

In [None]:
age_death_input.index.max()

In [None]:
zeros = np.zeros(age_death_input.index.max())
len(zeros)

In [None]:
type(zeros)

In [None]:
age_death = pd.DataFrame(zeros, 
                         columns=['DEATHS'],
                         dtype='int32',
                         index=np.arange(1, len(zeros)+1))
age_death

In [None]:
for age,deaths in age_death_input.items():
    age_death.at[age,'DEATHS'] = deaths

In [None]:
age_death

In [None]:
fig = plt.figure(figsize=(14,6)); np.random.seed(123)

plt.title("Age Distribution of 2019 Police Fatal Shooting in US")
# ax = sns.barplot(x=age_death.index, y=age_death)
ax = sns.barplot(x=age_death.index, y=age_death['DEATHS'])

plt.ylabel('# Deaths')
plt.xlabel('Age')

for label in ax.get_xticklabels()[::2]:
    label.set_visible(False)

In [None]:
sns.set(color_codes=True)
fig = plt.figure(figsize=(14,6)); np.random.seed(123)

plt.title("Age Distribution of 2019 Police Fatal Shooting in US")
ax = sns.kdeplot(df.AGE, shade=True)

plt.ylabel('Density')
plt.xlabel('Age')
plt.xlim(0, None)

#for label in ax.get_xticklabels()[::2]:
#    label.set_visible(False)

In [None]:
df.groupby(['GENDER', 'RACE'])['DATE'].count()

## US Population Characteriziation

Source: https://www.census.gov/quickfacts/fact/table/US/IPE120218

Population estimates, July 1, 2019, (V2019)	328,239,523

| Race and Ethnicity | Percentage | People |
| -------------------| -----------|--------|
| White                               | 76.5% | 251103235|
| Black or African American           | 13.4% | 43984096 |
| American Indian and Alaska Native   | 1.3%  | 4267114  |
| Asian                               | 5.9%  | 19366132 |
| Native Hawaiian Pacific Islander    | 0.2%  | 656479   |
| Two or More Races                   | 2.7%  | 8862467  |
| Hispanic or Latino                  | 18.3% | 60067833 |
| White alone, not Hispanic or Latino | 60.4% | 198256672|



In [None]:
race_death_count = df.groupby(['RACE'])['DATE'].count()
race_death_count

In [None]:
US_POPULATION_2019 = 328239523

In [None]:
race_data = pd.DataFrame({ 'Race' : 
                                [
                                 'Asian/Pacific Islander', 
                                 'European-American/White', 
                                 'Native American/Alaskan',
                                 'Hispanic/Latino',
                                 'African-American/Black', 
                                ],
                            'Population' :
                                 [
                                  19366132+656479,
                                  251103235, 
                                  4267114,
                                  60067833,
                                  43984096, 
                                  ],
                             'Deaths' : np.zeros(5),
                             'Rate' : np.zeros(5)
                         })
race_data.set_index('Race')

In [None]:
# calculate deaths per 100,000 people

for race,deaths in race_death_count.items():
    population = race_data.loc[ race_data['Race']==race ]['Population']
    scale = population / US_POPULATION_2019 * 100000.0
    
    race_data.loc[ race_data['Race']==race, 'Deaths'] = deaths
    race_data.loc[ race_data['Race']==race, 'Rate'] = deaths / scale


In [None]:
race_data

In [None]:
fig = plt.figure(figsize=(14,6)); np.random.seed(123)

plt.title("2019 US Police Fatal Shooting Death per 100,000")
ax = sns.barplot(x=race_data.Race, y=race_data.Rate)

plt.ylabel('Death per 100,000')
plt.xlabel('')

In [None]:
df.groupby('RACE')['AGE'].mean()

In [None]:
df.groupby(['RACE', 'GENDER'])['AGE'].mean()