# Introduction to the U.S. Gun Deaths Dataset

The following csv is pulled from the FiveThirtyEight <a href="https://github.com/fivethirtyeight/guns-data/blob/master/full_data.csv" target="_blank">dataset</a> of US gun deaths between 2012 and 2014. Each row represents a single fatality. It includes the columns:

<ul>
<li>Year</li>
<li>Month</li>
<li>Intent (includes Suicide, Homicide, Accidental, NA, Undetermined)</li>
<li>Police (whether a police officer was involved with the shooting - either 0 (False) or 1 (True) </li>
<li>Sex</li>
<li>Age</li>
<li>Race</li>
<li>Hispanic</li>
<li>Place</li>
<li>Education</li>
</ul>

## Read in the dataset and display the first five rows

In [1]:
import csv
f = open("guns.csv", "r")
data = list(csv.reader(f))
data[:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

# Removing Headers from a List of Lists

In [2]:
headers = data[0]
data = data[1:]
print(headers)
data[:5]

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'],
 ['5',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '31',
  'White',
  '100',
  'Other specified',
  '2']]

# Counting Gun Deaths by Year

In [3]:
years = [row[1] for row in data]
year_counts = {}
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

# Exploring Gun Deaths by Year and Month

In [4]:
import datetime
dates =[datetime.datetime(year=int(date[1]), month=int(date[2]), day=1) for date in data]
dates[:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [5]:
date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

# Exploring Gun Deaths by Sex

In [6]:
sex_counts = {}
for row in data:
    if row[5] in sex_counts:
        sex_counts[row[5]] += 1
    else:
        sex_counts[row[5]] = 1
sex_counts

{'F': 14449, 'M': 86349}

# Exploring Gun Deaths by Race

In [7]:
race_counts = {}
for row in data:
    if row[7] in race_counts:
        race_counts[row[7]] += 1
    else:
        race_counts[row[7]] = 1
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

# Reading in a Second Dataset

Our previous analysis of race only gives us the total number of guns deaths in the U.S. by race. We need to get the number of gun deaths per 100,000 people in each race. We can determine the percentage of the U.S. population that falls into each racial category by mapping it to the U.S. census data.

In [8]:
f = open("census.csv", "r")
census = list(csv.reader(f))
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

# Computing Rates of Gun Deaths Per Race

In [9]:
mapping = {
    "Asian/Pacific Islander": 15159516 + 674625,
    "Native American/Native Alaskan": 3739506,
    "Black": 40250635,
    "Hispanic": 44618105,
    "White": 197318956
}

race_per_hundredk = {}
for k, v in race_counts.items():
    race_per_hundredk[k] = v/mapping[k]*100000
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

# Filtering by Intent for Homicide

In [10]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
homicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1
homicide_race_counts

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

In [11]:
homicide_race_per_hundredk = {}
for k, v in homicide_race_counts.items():
    homicide_race_per_hundredk[k] = v/mapping[k]*100000
homicide_race_per_hundredk

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

# General Function to Filter by Intent

In [12]:
def gun_death_intent(column_idx, intent_type):
    data_column = [row[column_idx] for row in data]
    intents = [row[3] for row in data]
    homicide_type_count = {}
    for i, dc in enumerate(data_column):
        if intents[i] == intent_type:
            if dc in homicide_type_count:
                homicide_type_count[dc] += 1
            else:
                homicide_type_count[dc] = 1
    return(homicide_type_count)

# Link between Month and Homicide Rate
Most homicides tend to occur during the summer months and around the Winter holiday season.

In [13]:
month_homicides = gun_death_intent(2, "Homicide")
month_homicides

{'01': 2829,
 '02': 2178,
 '03': 2780,
 '04': 2845,
 '05': 2976,
 '06': 3130,
 '07': 3269,
 '08': 3125,
 '09': 2966,
 '10': 2968,
 '11': 2919,
 '12': 3191}

# Homicide rate by gender
There are significantly more homicides by male than female.

In [14]:
gender_homicides = gun_death_intent(5, "Homicide")
gender_homicides

{'F': 5373, 'M': 29803}

# Suicide by Race
We can map this to total population by race like we did for homicide. Gun deaths by suicide are disproportionately high for Native American/Native Alaskan and White.

In [15]:
race_suicides = gun_death_intent(7, "Suicide")
race_suicides

{'Asian/Pacific Islander': 745,
 'Black': 3332,
 'Hispanic': 3171,
 'Native American/Native Alaskan': 555,
 'White': 55372}

In [16]:
suicide_race_per_hundredk = {}
for k, v in race_suicides.items():
    suicide_race_per_hundredk[k] = v/mapping[k]*100000
suicide_race_per_hundredk

{'Asian/Pacific Islander': 4.705023152187416,
 'Black': 8.278130270491385,
 'Hispanic': 7.106980451097149,
 'Native American/Native Alaskan': 14.841532544673013,
 'White': 28.06217969245692}