# Mortality / Deaths in the USA

Data from:
https://www.kaggle.com/mchirico/gun-vs-vehicle-deaths/notebook

Be advised all the data comes in letter and number codes. These codes are translated in the provided .json file.  
Most of the cleaning process will be about translating these codes.  
There is one dataset for each year.  Each year's data will be cleaned in sequence.

In [1]:
import numpy as np
import pandas as pd

Create dataFrame. One for race, one for gun/vehicles. We will append by columns (Tranpose at the end)

In [2]:
Races = pd.DataFrame(['notWhite','isWhite'],columns=['Year'])

In [3]:
GunCar = pd.DataFrame(['Vehicle Death','Gun Homicide'],columns=['Year'])

Read only certain columns (because CSV files too large)

In [4]:
fields = ['manner_of_death','race']

In [5]:
MG_fields = ['358_cause_recode']    # The column reporting cause of death

Filter Causes of Death.

In [6]:
# These codes are for gun homicides or accidental gun discharge. No gun suicides included
GunCodes = [407, 435, 446]

# These codes include land vehicles and motorcycles
VehicleCodes = [385,386,387,388,389,390,391,392,393,394,395,396,397,398,399]

# Combine both, for row filtering from datasource
GV_Codes = GunCodes + VehicleCodes

## 2005 Data

### Races

In [7]:
data = pd.read_csv('2005_data.csv',usecols=fields)

Filter to show only rows with "manner of death" == homicide (number code 3)

In [8]:
data = data[data['manner_of_death'] == 3]

Make binary column to compare Whites vs. everyone else. Race code for white is '1'

In [9]:
data['isWhite'] = data.race == 1

In [10]:
data.groupby('isWhite')['isWhite'].count()

isWhite
False    9692
True     9543
Name: isWhite, dtype: int64

DataFrame Append

In [11]:
Races['2005'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [12]:
data = pd.read_csv('2005_data.csv',usecols=MG_fields)

Filter to show only rows involving gun or vehicle deaths

In [13]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

Make binary column separating gun deaths and vehicle deaths

In [14]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [15]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    46515
True     13413
Name: Died by Gun, dtype: int64

DataFrame Append

In [16]:
GunCar['2005'] = data.groupby('Died by Gun')['Died by Gun'].count()

## 2006 Data

### Races

In [17]:
data = pd.read_csv('2006_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [18]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    10026
True      9747
Name: isWhite, dtype: int64

In [19]:
Races['2006'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [20]:
data = pd.read_csv('2006_data.csv',usecols=MG_fields)

In [21]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [22]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [23]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    46441
True     13709
Name: Died by Gun, dtype: int64

In [24]:
GunCar['2006'] = data.groupby('Died by Gun')['Died by Gun'].count()

## 2007 Data

### Races

In [25]:
data = pd.read_csv('2007_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [26]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    9777
True     9724
Name: isWhite, dtype: int64

In [27]:
Races['2007'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [28]:
data = pd.read_csv('2007_data.csv',usecols=MG_fields)

In [29]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [30]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [31]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    44958
True     13590
Name: Died by Gun, dtype: int64

In [32]:
GunCar['2007'] = data.groupby('Died by Gun')['Died by Gun'].count()

## 2008 Data

### Races

In [33]:
data = pd.read_csv('2008_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [34]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    9233
True     9646
Name: isWhite, dtype: int64

In [35]:
Races['2008'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [36]:
data = pd.read_csv('2008_data.csv',usecols=MG_fields)

In [37]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [38]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [39]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    40872
True     13094
Name: Died by Gun, dtype: int64

In [40]:
GunCar['2008'] = data.groupby('Died by Gun')['Died by Gun'].count()

## 2009 Data

### Races

In [41]:
data = pd.read_csv('2009_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [42]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    8764
True     9075
Name: isWhite, dtype: int64

In [43]:
Races['2009'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [44]:
data = pd.read_csv('2009_data.csv',usecols=MG_fields)

In [45]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [46]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [47]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    37143
True     12324
Name: Died by Gun, dtype: int64

In [48]:
GunCar['2009'] = data.groupby('Died by Gun')['Died by Gun'].count()

In [49]:
GunCar

Unnamed: 0,Year,2005,2006,2007,2008,2009
0,Vehicle Death,46515,46441,44958,40872,37143
1,Gun Homicide,13413,13709,13590,13094,12324


## 2010 Data

### Races

In [50]:
data = pd.read_csv('2010_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [51]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    8663
True     8617
Name: isWhite, dtype: int64

In [52]:
Races['2010'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [53]:
data = pd.read_csv('2010_data.csv',usecols=MG_fields)

In [54]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [55]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [56]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    36203
True     11974
Name: Died by Gun, dtype: int64

In [57]:
GunCar['2010'] = data.groupby('Died by Gun')['Died by Gun'].count()

## 2011 Data

### Races

In [58]:
data = pd.read_csv('2011_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [59]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    8768
True     8435
Name: isWhite, dtype: int64

In [60]:
Races['2011'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [61]:
data = pd.read_csv('2011_data.csv',usecols=MG_fields)

In [62]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [63]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [64]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    36078
True     11946
Name: Died by Gun, dtype: int64

In [65]:
GunCar['2011'] = data.groupby('Died by Gun')['Died by Gun'].count()

## 2012 Data

### Races

In [66]:
data = pd.read_csv('2012_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [67]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    9184
True     8694
Name: isWhite, dtype: int64

In [68]:
Races['2012'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [69]:
data = pd.read_csv('2012_data.csv',usecols=MG_fields)

In [70]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [71]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [72]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    37283
True     12476
Name: Died by Gun, dtype: int64

In [73]:
GunCar['2012'] = data.groupby('Died by Gun')['Died by Gun'].count()

## 2013 Data

### Races

In [74]:
data = pd.read_csv('2013_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [75]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    8910
True     8305
Name: isWhite, dtype: int64

In [76]:
Races['2013'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [77]:
data = pd.read_csv('2013_data.csv',usecols=MG_fields)

In [78]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [79]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [80]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    36216
True     12018
Name: Died by Gun, dtype: int64

In [81]:
GunCar['2013'] = data.groupby('Died by Gun')['Died by Gun'].count()

## 2014 Data

### Races

In [82]:
data = pd.read_csv('2014_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [83]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    8757
True     8083
Name: isWhite, dtype: int64

In [84]:
Races['2014'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [85]:
data = pd.read_csv('2014_data.csv',usecols=MG_fields)

In [86]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [87]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [88]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    36233
True     11839
Name: Died by Gun, dtype: int64

In [89]:
GunCar['2014'] = data.groupby('Died by Gun')['Died by Gun'].count()

## 2015 Data

### Races

In [90]:
data = pd.read_csv('2015_data.csv',usecols=fields)
data = data[data['manner_of_death'] == 3]

In [91]:
data['isWhite'] = data.race == 1
data.groupby('isWhite')['isWhite'].count()

isWhite
False    10087
True      8798
Name: isWhite, dtype: int64

In [92]:
Races['2015'] = data.groupby('isWhite')['isWhite'].count()

### Guns and Vehicles

In [93]:
data = pd.read_csv('2015_data.csv',usecols=MG_fields)

In [94]:
data = data[data['358_cause_recode'].isin(GV_Codes)]

In [95]:
data['Died by Gun'] = data['358_cause_recode'].isin(GunCodes)

In [96]:
data.groupby('Died by Gun')['Died by Gun'].count()

Died by Gun
False    38720
True     13796
Name: Died by Gun, dtype: int64

In [97]:
GunCar['2015'] = data.groupby('Died by Gun')['Died by Gun'].count()

## Export

In [98]:
Races

Unnamed: 0,Year,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,notWhite,9692,10026,9777,9233,8764,8663,8768,9184,8910,8757,10087
1,isWhite,9543,9747,9724,9646,9075,8617,8435,8694,8305,8083,8798


In [99]:
R2 = Races.T

In [100]:
R2.columns = ['non_White', 'White']
R2 = R2.drop('Year')

In [101]:
R2.to_csv('Homicides_Race.csv')

In [102]:
GunCar

Unnamed: 0,Year,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,Vehicle Death,46515,46441,44958,40872,37143,36203,36078,37283,36216,36233,38720
1,Gun Homicide,13413,13709,13590,13094,12324,11974,11946,12476,12018,11839,13796


In [103]:
GC2 = GunCar.T

In [104]:
GC2.columns = ['Vehicle Death', 'Gun Homicide']
GC2 = GC2.drop('Year')

In [105]:
GC2.to_csv('Gun_Vehicle_Deaths.csv')