# Introducing US Gun Deaths Data

In [1]:
import csv
f = open("guns.csv", "r")
guns_object = csv.reader(f)
data = list(guns_object)
print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


# Removing Headers From A List Of Lists

In [2]:
headers = data[0]
data = data[1:]
print(headers)
print(data[:5])

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


# Counting Gun Deaths By Year

In [3]:
years = [row[1] for row in data]
year_counts = {}
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

# Exploring Gun Deaths By Month And Year

In [4]:
import datetime
dates = [datetime.datetime(int(row[1]), int(row[2]), 1) for row in data] # create datetime.datetime object for each row
dates[:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [5]:
date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

# Exploring Gun Deaths By Race And Sex

In [6]:
sex_list = [row[5] for row in data]
sex_counts = {}
for sex in sex_list:
    if sex in sex_counts:
        sex_counts[sex] += 1
    else:
        sex_counts[sex] = 1
        
race_list = [row[7] for row in data]
race_counts = {}
for race in race_list:
    if race in race_counts:
        race_counts[race] += 1
    else:
        race_counts[race] = 1
        
print(sex_counts)
print(race_counts)

{'M': 86349, 'F': 14449}
{'Asian/Pacific Islander': 1326, 'White': 66237, 'Native American/Native Alaskan': 917, 'Black': 23296, 'Hispanic': 9022}


Almost six times more male than female gun deaths. Majority of gun deaths occurred to people of white race. There were about the same number of deaths in years 2012-2014.

# Reading In A Second Dataset

Unless we know the proportion of each race in the US, we won't be able to meaningfully compare the numbers above. What we really want to get is a rate of gun deaths per 100000 people of each race. In order to do this, we need to read in data about what percentage of the US population falls into each racial category.

In [7]:
f = open("census.csv", "r")
census_object = csv.reader(f)
census = list(census_object)
print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


# Computing Rates Of Gun Deaths Per Race

The racial categories are named slightly differently in census and in data. We need to manually construct a dictionary that allows us to map between them, and perform the division. 

In [8]:
mapping = {
    "Black" : census[1][12],
    "Hispanic" : census[1][11],
    "Native American/Native Alaskan" : census[1][13],
    "White" : census[1][10],
    "Asian/Pacific Islander" : census[1][14] + census[1][15]
}
print(mapping)

{'Black': '40250635', 'Hispanic': '44618105', 'Native American/Native Alaskan': '3739506', 'White': '197318956', 'Asian/Pacific Islander': '15159516674625'}


In [9]:
race_per_hundredk = {}
for race, count in race_counts.items():
    race_per_hundredk[race] = count/int(mapping[race])*100000
    
print(race_per_hundredk)

{'Asian/Pacific Islander': 8.746980714890115e-06, 'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.8773477735196, 'Hispanic': 20.220491210910907}


# Filtering By Intent

In [10]:
intents = [row[3] for row in data]
races = [row[7] for row in data]

In [11]:
homicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1
print(homicide_race_counts) 

{'White': 9147, 'Asian/Pacific Islander': 559, 'Black': 19510, 'Native American/Native Alaskan': 326, 'Hispanic': 5634}


In [12]:
homicide_race_per_hundredk = {}
for race, count in homicide_race_counts.items():
    homicide_race_per_hundredk[race] = count/int(mapping[race])*100000
    
print(homicide_race_per_hundredk)

{'White': 4.6356417981453335, 'Asian/Pacific Islander': 3.687452654316421e-06, 'Black': 48.471284987180944, 'Native American/Native Alaskan': 8.717729026240365, 'Hispanic': 12.627161104219914}


People of black race die of homicide 12 times more often than people of white race and 4 times more often than people of Hispanic origin.

# Exploring the link, if any, between month and homicide rate

In [13]:
months_list = [row[2] for row in data]
homicide_month_counts = {}

for i, month in enumerate(months_list):
    if intents[i] == "Homicide":
        if month in homicide_month_counts:
            homicide_month_counts[month] += 1
        else:
            homicide_month_counts[month] = 1

print(homicide_month_counts)

{'03': 2780, '04': 2845, '06': 3130, '07': 3269, '08': 3125, '09': 2966, '10': 2968, '12': 3191, '01': 2829, '02': 2178, '05': 2976, '11': 2919}


Most homicide cases, over 3 thousand per month, happened in June, July, August and December. The lowest rate was in February.

# Exploring the homicide rate by gender

In [14]:
homicide_sex_counts = {}
for i, sex in enumerate(sex_list):
    if intents[i] == "Homicide":
        if sex in homicide_sex_counts:
            homicide_sex_counts[sex] += 1
        else:
            homicide_sex_counts[sex] = 1
            
print(homicide_sex_counts)

{'M': 29803, 'F': 5373}


Over 5.5 times more men than women die of homicide.

# Exploring the rates of other intents by gender and race

In [15]:
suicide_sex_counts = {}
for i, sex in enumerate(sex_list):
    if intents[i] == "Suicide":
        if sex in suicide_sex_counts:
            suicide_sex_counts[sex] += 1
        else:
            suicide_sex_counts[sex] = 1
            
print(suicide_sex_counts)

{'M': 54486, 'F': 8689}


In [16]:
accident_sex_counts = {}
for i, sex in enumerate(sex_list):
    if intents[i] == "Accidental":
        if sex in accident_sex_counts:
            accident_sex_counts[sex] += 1
        else:
            accident_sex_counts[sex] = 1
            
print(accident_sex_counts)

{'M': 1421, 'F': 218}


Men commit suicide over 6 times more often than women. Men also die in gun accidents over 6 times more often than women.

In [17]:
suicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == "Suicide":
        if race in suicide_race_counts:
            suicide_race_counts[race] += 1
        else:
            suicide_race_counts[race] = 1
print(suicide_race_counts) 

{'Asian/Pacific Islander': 745, 'White': 55372, 'Native American/Native Alaskan': 555, 'Black': 3332, 'Hispanic': 3171}


In [18]:
suicide_race_per_hundredk = {}
for race, count in suicide_race_counts.items():
    suicide_race_per_hundredk[race] = count/int(mapping[race])*100000
    
print(suicide_race_per_hundredk)

{'Asian/Pacific Islander': 4.914404700296482e-06, 'White': 28.06217969245692, 'Native American/Native Alaskan': 14.841532544673013, 'Black': 8.278130270491385, 'Hispanic': 7.106980451097149}


White people commit suicide much more often than other races.

In [19]:
accident_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == "Accidental":
        if race in accident_race_counts:
            accident_race_counts[race] += 1
        else:
            accident_race_counts[race] = 1
print(accident_race_counts) 

{'White': 1132, 'Black': 328, 'Hispanic': 145, 'Asian/Pacific Islander': 12, 'Native American/Native Alaskan': 22}


In [20]:
accident_race_per_hundredk = {}
for race, count in accident_race_counts.items():
    accident_race_per_hundredk[race] = count/int(mapping[race])*100000
    
print(accident_race_per_hundredk)

{'White': 0.5736904466492313, 'Black': 0.814893976206835, 'Hispanic': 0.3249801846133985, 'Asian/Pacific Islander': 7.915819651484268e-08, 'Native American/Native Alaskan': 0.5883130017708221}


Black people die in gun accidents more often than other races.

# Finding out if gun death rates correlate to location

In [21]:
location_list = [row[9] for row in data]
location_counts = {}

for location in location_list:
    if location in location_counts:
        location_counts[location] += 1
    else:
        location_counts[location] = 1
        
location_counts

{'Farm': 470,
 'Home': 60486,
 'Industrial/construction': 248,
 'NA': 1384,
 'Other specified': 13751,
 'Other unspecified': 8867,
 'Residential institution': 203,
 'School/instiution': 671,
 'Sports': 128,
 'Street': 11151,
 'Trade/service area': 3439}

The majority of homicides happen at home or on the street.