# Data & Libraries

In [None]:
import numpy as np
import pandas as pd
import os
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as py
from plotly.offline import iplot
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import cufflinks as cf
%matplotlib inline

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
py.offline.init_notebook_mode(connected=True)
cf.go_offline()

In [None]:
filePath = r"/kaggle/input/us-police-shootings/shootings.csv"

dataFrame = pd.read_csv(filePath)

### Feature Generation

In [None]:
dataFrame['date'] = pd.to_datetime(dataFrame['date'])
#dataFrame['Year'] = pd.to_datetime(dataFrame['date']).dt.year.astype('category')
#dataFrame['Month'] = dataFrame['date'].dt.strftime('%B')
#dataFrame['month_num']=dataFrame['date'].dt.strftime('%m')
#dataFrame['Day of Week'] = dataFrame['date'].dt.strftime('%A')
#dataFrame['Date of Month'] = dataFrame['date'].dt.strftime('%d')


# Graphical Analysis

## 1. Most affected race

In [None]:
# shooting - race relation.
# Using Seaborn
plt.figure(figsize=(10,7))
sns.countplot(x="race", data=dataFrame)
plt.show()

## 2. Gender proportion

In [None]:
# shooting - gender relation
# Using Plotly - iplot
dataFrame.gender.value_counts().iplot(kind='bar', xTitle = 'Gender', yTitle = 'Number of Victims', title = 'Death Toll/Gender')

## 3. Mental Health - Shooting relation

In [None]:
fig = go.Figure(
        data=[
            go.Pie(
                labels = dataFrame.signs_of_mental_illness.value_counts().index,
                values = dataFrame.signs_of_mental_illness.value_counts(),
                hole = 0.5,
                textinfo = 'label + percent')])

fig.update_layout(
    title_text = 'Victim Mentally ill?',
    title_x = 0.5)

fig.show()

## 4. Fleeing?

In [None]:
fig = go.Figure(
        data=[
            go.Pie(
                labels = dataFrame.flee.value_counts().index,
                values = dataFrame.flee.value_counts(),
                hole = 0.5,
                textinfo = 'label + percent')])

fig.update_layout(
    title_text = 'Victim Flee?',
    title_x = 0.5)

fig.show()

In [None]:
# Shooting - Threat level relation
trace = go.Bar(
            x = dataFrame.threat_level.value_counts().index,
            y = dataFrame.threat_level.value_counts(),
            marker = dict(color = "aquamarine",
                          ))

layout = go.Layout(
            title = "Shooting-Threat level relation",
            title_font_size = 30,
            plot_bgcolor = "blanchedalmond",
            xaxis = dict(
                title = "Threat level",
                title_font_size = 20),
            yaxis = dict(
                title = "Number of Victims",
                title_font_size = 20))

fig = go.Figure(data=[trace], layout = layout)
fig.show()

In [None]:
dataFrame.state.value_counts().iplot(kind='bar', xTitle = 'States', yTitle = 'Number of Victims', title = 'Victims in each State')

In [None]:
plt.figure(figsize=(20,15))
statePlot = sns.countplot(y='arms_category', data=dataFrame, order = dataFrame['arms_category'].value_counts().index)

## Time Series Analysis
### Daily Report : Jan, 2015 - June, 2020

In [None]:
daily_df = dataFrame.date.groupby(dataFrame.date.dt.to_period('d')).agg('count').to_frame(name = 'Count')
daily_df.index = daily_df.index.to_timestamp()

In [None]:
plt.figure(figsize = (25,10))
plt.plot(daily_df.Count, marker = '.', linestyle="")
plt.xlabel('Date')
plt.ylabel('Death Count')
plt.title('Death Count per day')
plt.show()

In [None]:
daily_df.Count.iplot(kind='hist', xTitle = 'Daily Death count', yTitle = 'Frequency', title='Death frequency distribution')

## Monthly Report

In [None]:
monthly_df = dataFrame['date'].groupby(dataFrame.date.dt.to_period('M')).agg('count').to_frame(name="Count")
monthly_df.index = monthly_df.index.to_timestamp()

In [None]:
trace = go.Scatter(
        x = monthly_df.index,
        y = monthly_df.Count,
        name = "Victim count",
        line = dict(color = "#7F7F7F"))

layout = go.Layout(
            title = "Monthly Death Count : Jan, 2015 - June, 2020",
            title_font_size = 30,
            xaxis = dict(
                title = "Year",
                title_font_size = 20),
            yaxis = dict(
                title = "Number of Victim",
                title_font_size = 20))
fig = go.Figure(data = [trace], layout = layout)
fig.show()

In [None]:
monthly_df.iplot(xTitle = 'Year', yTitle = 'Number of Victims', title='Monthly Death Count : Jan, 2015 - June, 2020')

In [None]:
monthly_df.Count.iplot(kind='box', yTitle='Number of Victims', title = 'Death frequency distribution')

### Monthly Report every year

In [None]:
groups = monthly_df.groupby(pd.Grouper(freq = 'A'))

In [None]:
keys = groups.groups.keys()
i=1
year = ['2015', '2016', '2017', '2018', '2019', '2020'] 
months = ['Jan', 'Feb', 'March', 'April', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

fig = go.Figure()

for key in keys:
    fig.add_trace(
        go.Bar(
            x = months[:groups.get_group(key).index.size],
            y = groups.get_group(key).Count.values))

fig.data[0].visible = True
steps = []
for i in range(len(fig.data)):
    step = dict(
            method = "update",
            args = [{"visible" : [False]*len(fig.data)},
                   {"title" : "Death count in " + year[i]}],
            name = year[i],
            label = year[i],
            visible = True)
    step["args"][0]["visible"][i] = True
    steps.append(step)
    
sliders = [dict(
    active=0,
    currentvalue={"prefix": "Year: "},
    pad={"t": 10},
    steps=steps
)]

fig.update_layout(sliders = sliders)

fig.show()

In [None]:
keys = groups.groups.keys()
i=1
year = ['2015', '2016', '2017', '2018', '2019', '2020'] 
months = ['Jan', 'Feb', 'March', 'April', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

fig = make_subplots(rows=6, x_title = "Months", y_title = 'Death Count')
for key in keys:
    fig.add_bar(x=months[:groups.get_group(key).index.size], y=groups.get_group(key).Count.values, row=i, col=1, name=year[i-1],showlegend = True)
    i = i+1
    
fig.update_layout(height=2000)
fig.show()

In [None]:
keys = groups.groups.keys()
i=1
year = ['2015', '2016', '2017', '2018', '2019', '2020'] 
months = ['Jan', 'Feb', 'March', 'April', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
color = ['b', 'g', 'r', 'c', 'm', 'y']

plt.figure(figsize= (15,20))
for key in keys:
    plt.subplot(6,1,i)
    plt.xlabel(year[i-1], fontsize = 16)
    plt.ylabel('Death Count', fontsize = 16)
    plt.bar(months[:groups.get_group(key).index.size], groups.get_group(key).Count.values, color = color[i-1], alpha =0.7)
    i = i+1
    
plt.tight_layout()
plt.show()


## Most death recorded months

In [None]:
month_df = dataFrame['date'].groupby([dataFrame.date.dt.strftime('%B')]).agg('count').to_frame(name="Count")
order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
month_df.loc[order].iplot(xTitle = 'Month', yTitle='Number of Victims', title='Victims/Month from 2015 to 2020')

## Most death recorded year

In [None]:
year_df = dataFrame['date'].groupby([dataFrame.date.dt.strftime('%Y')]).agg('count').to_frame(name="Count")
year_df.iplot(xTitle = 'Year', yTitle='Number of Victims', title='Victims/Year from 2015 to 2020')

## Most death recorded day

In [None]:
day_df = dataFrame['date'].groupby([dataFrame.date.dt.strftime('%A')]).agg('count').to_frame(name="Count")
order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
day_df.loc[order].iplot(xTitle = 'Day of Week', yTitle='Number of Victims', title='Victims/Day from 2015 to 2020')

In [None]:
#from sklearn.preprocessing import LabelEncoder

#label_encoder = LabelEncoder()

#dataFrame['Gender'] = label_encoder.fit_transform(dataFrame['gender'])
#dataFrame['Gender'] = dataFrame['Gender'].astype('category')
# Male = 1, Female = 0

#dataFrame['Mental_Health'] = label_encoder.fit_transform(dataFrame['signs_of_mental_illness'])
#dataFrame['Mental_Health'] = dataFrame['Mental_Health'].astype('category')
# True = 1, False = 0

#dataFrame['Body_Camera'] = label_encoder.fit_transform(dataFrame['body_camera'])
#dataFrame['Body_Camera'] = dataFrame['Body_Camera'].astype('category')
# True = 1, False =0

#dataFrame['Armed'] = dataFrame['arms_category'].replace(to_replace = 'Unarmed', value=0)
#dataFrame['Armed'] = dataFrame['Armed'].mask(dataFrame['Armed'] != 0, 1)
#dataFrame['Armed'] = dataFrame['Armed'].astype('category')
# Armed = 1, Unarmed = 0

#dataFrame['Fleeing'] = dataFrame['flee'].replace(to_replace = 'Not fleeing', value=0)
#dataFrame['Fleeing'] = dataFrame['Fleeing'].mask(dataFrame['Fleeing'] != 0, 1)
#dataFrame['Fleeing'] = dataFrame['Fleeing'].astype('category')
# Fleeing = 1, Not Fleeing = 0

#dataFrame['Threat'] = dataFrame['threat_level'].replace(to_replace = 'attack', value=1)
#dataFrame['Threat'] = dataFrame['Threat'].mask(dataFrame['Threat'] != 1, 0)
#dataFrame['Threat'] = dataFrame['Threat'].astype('category')
# Attack = 1, Other = 0

#race = pd.get_dummies(dataFrame['race'])
#dataFrame = pd.concat([dataFrame, race], axis = 1)
#dataFrame.drop(['id', 'name', 'manner_of_death', 'armed', 'race', 'gender', 'city', 'signs_of_mental_illness', 'threat_level', 'flee', 'body_camera', 'arms_category', 'Other'], axis = 1, inplace = True)
#dataFrame[['Asian', 'Black', 'Hispanic', 'Native', 'White']] = dataFrame[['Asian', 'Black', 'Hispanic', 'Native', 'White']].astype('category')