# 1. Load Packages and Data

In [None]:
# load packages
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# load data
police_data = pd.read_csv('../input/clean_data.csv')

police_data.head(4)

In [None]:
# check the data dimensions
police_data.shape

In [None]:
# check if there is any missing data
police_data.info()

# 2. Data analysis

There is no missing value, so we can start the analysis

## 2.1. Death of Human and Canine

In [None]:
death_sum = police_data.canine.value_counts()

fig = plt.figure(figsize=(6, 4))
death_sum.plot(kind = 'bar', color = 'skyblue')

plt.title("Summary of Death")
plt.ylabel("Total Cases")
plt.xticks(death_sum.index, ['Police', 'Canine'], rotation=45)
plt.show()

There are about 23000 police death cases. Since the human/police death is dominant, 
I will focus on the police death in the following analysis. 


## 2.2. Police Death by Cause

In [None]:
# filter canine death
death_cause = police_data[police_data['canine']==False].cause_short.value_counts()

fig = plt.figure(figsize=(6, 8))
death_cause.plot(kind='barh', color = 'skyblue')

plt.title('Summary of Police Death by Cause')
plt.xlabel('Total Cases')
plt.show()

From the above figure, we can see that Gunfire is the major cause of police death.

## 2.3. Police Death by Year

In [None]:
death_year = police_data[police_data['canine']==False].year.value_counts().sort_index()

fig = plt.figure(figsize=(8, 6))

plt.plot(death_year, color='brown')

plt.title('Death by Year')
plt.ylabel("Total Cases")
plt.xticks(np.arange(death_year.index[0]-1, death_year.index[-1] + 15, 30) , rotation=45)
plt.xlabel("Year")
plt.show()

There are two peaks which happened in 1910-1940's and 1960-1970's. There is significantly 
drop of police death since 2000.

## 2.4. Police Death by State

In [None]:
death_state = police_data[police_data['canine']==False].state.value_counts()

fig = plt.figure(figsize=(8, 12))
death_state.plot(kind='barh', color = 'skyblue')

plt.title('Summary of Police Death by State')
plt.xlabel('Total Cases')
plt.show()

TX, CA and TY are the top 3 states which have the most total cases of police death. Anyway, they
are also the state with most populations.

##2.5. 9/11 Related Death

In [None]:
death_911 =police_data[(police_data['cause_short']=='9/11 related illness') & \
(police_data['canine']==False)].year.value_counts().sort_index()

fig = plt.figure(figsize=(8, 6))
death_911.plot(kind='bar', color = 'skyblue')
plt.ylabel("Total Cases")
plt.title('9/11 Related Death by Year')
plt.show()

9/11 is one of the biggest tragedies in US history.  More than 100 polices' death is related to this event and its impact still exist till today.