In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
shots=pd.read_csv('../input/data-police-shootings/fatal-police-shootings-data.csv')
shots.info()

In [None]:
shots.describe(include='all')

In [None]:
shots.head()

# Finding Null/Missing values

In [None]:
shots.isna().sum()

# As we can see that some of the values are missing in our dataset but the ratio of missing values is very less as compared to our data so lets drop the null/missing values from it.

In [None]:
shots.dropna(subset=['armed'], inplace=True)
shots.dropna(subset=['age'], inplace=True)
shots.dropna(subset=['gender'], inplace=True)
shots.dropna(subset=['race'], inplace=True)
shots.dropna(subset=['flee'], inplace=True)
shots.isna().sum()

# Adding new column 'Year' in our dataset for broad analysis.

In [None]:
shots['date']=pd.to_datetime(shots['date'])
shots['Year']=shots['date'].dt.year
shots.head()

# Finding total number of shots with respect to each year and plotting it.

In [None]:
yshot=shots['Year'].value_counts()
yearlyshots=yshot.to_frame()
yearlyshots.reset_index(level=0, inplace=True)
yearlyshots.columns=['Year','No of shots']
yearlyshots.head()

In [None]:
plt.bar(yearlyshots['Year'],yearlyshots['No of shots'])
plt.xlabel='Year'
plt.ylabel='No of Shots'

The chart shows us that the number of shooting by police is almost same around the time period of 2016-2019

# Now, Lets dive deeper and know about the shootings with respect to gender, race and whether the person was armed or not.

In [None]:
Gendershots=shots['gender'].value_counts()
gendershots=Gendershots.to_frame()
gendershots.reset_index(level=0, inplace=True)
gendershots.columns=['gender','shots']
gendershots.head()

In [None]:
sns.barplot(x='gender',y='shots',data=gendershots)

As we can see, the most shots were on Males as compared to Females.

In [None]:
rshots=shots['race'].value_counts()
raceshots=rshots.to_frame()
raceshots.reset_index(level=0, inplace=True)
raceshots.columns=['Race','shots']
raceshots.head()

In [None]:
sns.barplot(x='Race',y='shots',data=raceshots)

White citizens are the one with most kills by a police shooting.

In [None]:
shots['armed or not'] = shots['armed'].apply(lambda x: 'F'  if x == 'unarmed' else 'T')
ashots=shots['armed or not'].value_counts()
armshots=ashots.to_frame()
armshots.reset_index(level=0, inplace=True)
armshots.columns=['Armed or not','shots']
armshots.head()

In [None]:
sns.barplot(x='Armed or not',y='shots',data=armshots)

Mostly armed people were shot by the police as compared to unarmed.

# Analysing the shootings done by the police based on the threat level.

In [None]:
sns.catplot(x="gender", y="age", hue="threat_level", kind="bar", data=shots);

In most of the cases, the Attempted attack was high in general and if we consider the gender then the females who were shot by police showed more of attack threat as compared to males.

# Analysing the shootings done by the police based on the signs of mental illness.

In [None]:
sns.catplot(x="gender", y="age", hue="signs_of_mental_illness", kind="bar", data=shots);

Females showed more of signs of having a mental illness as comapred to males and overall the differnce is not huge between the person shot by police who had no signs of mental illness to the one who had the signs.

In [None]:
sns.boxplot(x='gender',y='age',data=shots);

The boxplot shows that the age of these people was between 30-45 in both females and males.

# Overall we can say that shootings done by police majorly included white people as compared to other race.
# If we talk about the gender, males were shot more as compared to females.
# Not only this, Armed people who were a threat to police and other citizens were more at the target of the police to shot.