# Import Incidents data

In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import json
from json import loads

In [2]:
# File to read
incidentsData = 'fatal-police-shootings-data.csv'
# Read files
incidents = pd.read_csv(incidentsData, encoding='utf-8')
incidents

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False
3,8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,W,San Francisco,CA,True,attack,Not fleeing,False
4,9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,H,Evans,CO,False,attack,Not fleeing,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5411,5921,William Slyter,2020-06-13,shot,gun,22.0,M,W,Kansas City,MO,False,other,Other,False
5412,5922,TK TK,2020-06-13,shot,undetermined,,M,,San Bernardino,CA,False,attack,Not fleeing,False
5413,5924,Nicholas Hirsh,2020-06-15,shot,gun,31.0,M,W,Lawrence,KS,False,attack,Car,False
5414,5926,TK TK,2020-06-16,shot,gun,24.0,M,,Beach Park,IL,False,attack,Not fleeing,False


### Prepare data for death by race analysis and shots by state

### Data for 3 level multiple Doughnut

In [3]:
# Filter columns for first analysis
raceIncidents = incidents[['race','gender']]
raceIncidents

Unnamed: 0,race,gender
0,A,M
1,W,M
2,H,M
3,W,M
4,H,M
...,...,...
5411,W,M
5412,,M
5413,W,M
5414,,M


In [4]:
# Replace codes with values
raceIncidents['race'] = raceIncidents['race'].replace('W', 'White non-Hispanic')
raceIncidents['race'] = raceIncidents['race'].replace('B', 'Black non-Hispanic')
raceIncidents['race'] = raceIncidents['race'].replace('A', 'Asian')
raceIncidents['race'] = raceIncidents['race'].replace('N', 'Native American')
raceIncidents['race'] = raceIncidents['race'].replace('H', 'Hispanic')
raceIncidents['race'] = raceIncidents['race'].replace('O', 'Other')
raceIncidents['race'] = raceIncidents['race'].replace('None', 'Unknown')
raceIncidents['gender'] = raceIncidents['gender'].replace('F', '2')
raceIncidents['gender'] = raceIncidents['gender'].replace('M', '1')
raceIncidents = raceIncidents.dropna(how='any')
raceIncidents = raceIncidents.rename({'race': 'Race', 'gender': 'Gender'}, axis=1)
raceIncidents

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_ind

Unnamed: 0,Race,Gender
0,Asian,1
1,White non-Hispanic,1
2,Hispanic,1
3,White non-Hispanic,1
4,Hispanic,1
...,...,...
5408,Black non-Hispanic,1
5409,Black non-Hispanic,1
5410,White non-Hispanic,2
5411,White non-Hispanic,1


In [5]:
# Count male
maleRace = raceIncidents.loc[raceIncidents['Gender'] == '1',:]
maleRace = maleRace.groupby(maleRace['Race']).count()
maleRace = maleRace.rename({'Gender': 'Male'}, axis=1)
maleRace = maleRace.sort_values(by='Race',ascending=True)
maleRace

Unnamed: 0_level_0,Male
Race,Unnamed: 1_level_1
Asian,89
Black non-Hispanic,1251
Hispanic,876
Native American,73
Other,44
White non-Hispanic,2339


In [6]:
# Count female
femaleRace = raceIncidents.loc[raceIncidents['Gender'] == '2',:]
femaleRace = femaleRace.groupby(femaleRace['Race']).count()
femaleRace = femaleRace.rename({'Gender': 'Female'}, axis=1)
femaleRace = femaleRace.sort_values(by='Race',ascending=True)
femaleRace

Unnamed: 0_level_0,Female
Race,Unnamed: 1_level_1
Asian,4
Black non-Hispanic,47
Hispanic,26
Native American,5
Other,4
White non-Hispanic,136


In [7]:
# Merge to have separated count
raceGender = pd.merge(maleRace,femaleRace,on='Race',how='outer')
raceGender

Unnamed: 0_level_0,Male,Female
Race,Unnamed: 1_level_1,Unnamed: 2_level_1
Asian,89,4
Black non-Hispanic,1251,47
Hispanic,876,26
Native American,73,5
Other,44,4
White non-Hispanic,2339,136


### Data for Radar Chart

In [8]:
# Filter columns for second analysis
deathIncidents = incidents[['date','armed']]
deathIncidents

Unnamed: 0,date,armed
0,2015-01-02,gun
1,2015-01-02,gun
2,2015-01-03,unarmed
3,2015-01-04,toy weapon
4,2015-01-04,nail gun
...,...,...
5411,2020-06-13,gun
5412,2020-06-13,undetermined
5413,2020-06-15,gun
5414,2020-06-16,gun


In [9]:
deathIncidents[['Year','Month','Day']] = deathIncidents.date.str.split('-',expand=True)
del deathIncidents['date']
del deathIncidents['Month']
del deathIncidents['Day']
deathIncidents = deathIncidents[['Year','armed']]
deathIncidents = deathIncidents.rename({'armed': 'Armed'}, axis=1)
deathIncidents

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,Year,Armed
0,2015,gun
1,2015,gun
2,2015,unarmed
3,2015,toy weapon
4,2015,nail gun
...,...,...
5411,2020,gun
5412,2020,undetermined
5413,2020,gun
5414,2020,gun


In [10]:
deathIncidents['Armed'].unique()

array(['gun', 'unarmed', 'toy weapon', 'nail gun', 'knife', nan, 'shovel',
       'hammer', 'hatchet', 'undetermined', 'sword', 'machete',
       'box cutter', 'metal object', 'screwdriver', 'lawn mower blade',
       'flagpole', 'guns and explosives', 'cordless drill', 'crossbow',
       'metal pole', 'Taser', 'metal pipe', 'metal hand tool',
       'blunt object', 'metal stick', 'sharp object', 'meat cleaver',
       'carjack', 'chain', "contractor's level", 'unknown weapon',
       'stapler', 'beer bottle', 'bean-bag gun',
       'baseball bat and fireplace poker', 'straight edge razor',
       'gun and knife', 'ax', 'brick', 'baseball bat', 'hand torch',
       'chain saw', 'garden tool', 'scissors', 'pole', 'pick-axe',
       'flashlight', 'vehicle', 'baton', 'spear', 'chair', 'pitchfork',
       'hatchet and gun', 'rock', 'piece of wood', 'bayonet', 'pipe',
       'glass shard', 'motorcycle', 'pepper spray', 'metal rake',
       'crowbar', 'oar', 'machete and gun', 'tire iron',
 

In [24]:
# Of the police shoot to death encounters by year, the agressor used different type of arms
# Filter by year and then apply count by year
armed2015 = deathIncidents.loc[deathIncidents['Year'] == '2015',:]
armed2015 = armed2015[['Armed','Year']]
armed2015['Armed'] = armed2015['Armed'].str.title()
armed2015 = armed2015.groupby(armed2015['Armed']).count()
armed2015 = armed2015.sort_values(by='Armed',ascending=True)
armed2015 = armed2015.rename({'Year': 'Cases'}, axis=1)
armed2015

Unnamed: 0_level_0,Cases
Armed,Unnamed: 1_level_1
Ax,1
Baseball Bat,1
Baseball Bat And Fireplace Poker,1
Bean-Bag Gun,1
Beer Bottle,1
Blunt Object,2
Box Cutter,7
Brick,1
Carjack,1
Chain,1


In [25]:
armed2016 = deathIncidents.loc[deathIncidents['Year'] == '2016',:]
armed2016 = armed2016[['Armed','Year']]
armed2016['Armed'] = armed2016['Armed'].str.title()
armed2016 = armed2016.groupby(armed2016['Armed']).count()
armed2016 = armed2016.sort_values(by='Armed',ascending=True)
armed2016 = armed2016.rename({'Year': 'Cases'}, axis=1)
armed2016

Unnamed: 0_level_0,Cases
Armed,Unnamed: 1_level_1
Ax,5
Baseball Bat,6
Baton,2
Bayonet,1
Blunt Object,1
Brick,1
Chain Saw,2
Chair,1
Crossbow,2
Crowbar,1


In [26]:
armed2017 = deathIncidents.loc[deathIncidents['Year'] == '2017',:]
armed2017 = armed2017[['Armed','Year']]
armed2017['Armed'] = armed2017['Armed'].str.title()
armed2017 = armed2017.groupby(armed2017['Armed']).count()
armed2017 = armed2017.sort_values(by='Armed',ascending=True)
armed2017 = armed2017.rename({'Year': 'Cases'}, axis=1)
armed2017

Unnamed: 0_level_0,Cases
Armed,Unnamed: 1_level_1
Air Conditioner,1
Ax,4
Baseball Bat,2
Baseball Bat And Bottle,1
Baton,1
Beer Bottle,1
Blunt Object,2
Chainsaw,1
Chair,1
Crossbow,2


In [27]:
armed2018 = deathIncidents.loc[deathIncidents['Year'] == '2018',:]
armed2018 = armed2018[['Armed','Year']]
armed2018['Armed'] = armed2018['Armed'].str.title()
armed2018 = armed2018.groupby(armed2018['Armed']).count()
armed2018 = armed2018.sort_values(by='Armed',ascending=True)
armed2018 = armed2018.rename({'Year': 'Cases'}, axis=1)
armed2018

Unnamed: 0_level_0,Cases
Armed,Unnamed: 1_level_1
Ax,7
Baseball Bat,3
Baton,1
Bb Gun,2
Bow And Arrow,1
Box Cutter,3
Chain,1
Claimed To Be Armed,1
Crossbow,3
Glass Shard,2


In [28]:
armed2019 = deathIncidents.loc[deathIncidents['Year'] == '2019',:]
armed2019 = armed2019[['Armed','Year']]
armed2019['Armed'] = armed2019['Armed'].str.title()
armed2019 = armed2019.groupby(armed2019['Armed']).count()
armed2019 = armed2019.sort_values(by='Armed',ascending=True)
armed2019 = armed2019.rename({'Year': 'Cases'}, axis=1)
armed2019

Unnamed: 0_level_0,Cases
Armed,Unnamed: 1_level_1
Air Pistol,1
Airsoft Pistol,1
Ax,7
Barstool,1
Baseball Bat,5
Baseball Bat And Knife,1
Baton,1
Bb Gun,2
Bb Gun And Vehicle,1
Beer Bottle,1


In [29]:
armed2020 = deathIncidents.loc[deathIncidents['Year'] == '2020',:]
armed2020 = armed2020[['Armed','Year']]
armed2020['Armed'] = armed2020['Armed'].str.title()
armed2020 = armed2020.groupby(armed2020['Armed']).count()
armed2020 = armed2020.sort_values(by='Armed',ascending=True)
armed2020 = armed2020.rename({'Year': 'Cases'}, axis=1)
armed2020

Unnamed: 0_level_0,Cases
Armed,Unnamed: 1_level_1
Baseball Bat,1
Bb Gun,1
Chair,1
Crowbar,1
Gun,282
Gun And Car,3
Gun And Vehicle,1
Hammer,3
Knife,67
Machete,4


### Data for area chart with linear gradient police shots during encounters by year

In [14]:
shotYear = deathIncidents.groupby(['Year']).count()
shotYear = shotYear.rename({'Armed': 'Police_shot'}, axis=1)
shotYear

Unnamed: 0_level_0,Police_shot
Year,Unnamed: 1_level_1
2015,938
2016,899
2017,903
2018,966
2019,1002
2020,481


### Create csv files

In [30]:
# Create csv files
shotYear.to_csv('csv/shotYear.csv')
maleRace.to_csv('csv/maleRace.csv')
femaleRace.to_csv('csv/femaleRace.csv')
raceGender.to_csv('csv/raceGender.csv')
armed2015.to_csv('csv/armed2015.csv')
armed2016.to_csv('csv/armed2016.csv')
armed2017.to_csv('csv/armed2017.csv')
armed2018.to_csv('csv/armed2018.csv')
armed2019.to_csv('csv/armed2019.csv')
armed2020.to_csv('csv/armed2020.csv')