First, let's open the csv file and turn it into a list of lists.

In [1]:
import csv

f = open("guns.csv")
reader = csv.reader(f)
data = list(reader)

data[0:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

Next, we'll remove the header from the data since we don't need it.

In [2]:
headers = data[0]
data = data[1:len(data)]
print(headers)
print('\n')
print(data[0:5])

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


Is there a difference in the number of gun deaths by year? We can find out by counting up the number of deaths by each year.

In [3]:
years = []
year_counts = {}

for row in data:
    years.append(row[1])
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
        

print(year_counts)

{'2014': 33599, '2013': 33636, '2012': 33563}


There doesn't seem to be much of a difference between each of the three years, but maybe there's a difference by year and month.

In [4]:
import datetime
dates = [datetime.datetime(int(row[1]), int(row[2]), day = 1) for row in data]
print(dates[0:5])


[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0)]


In [5]:
date_counts = {}
for row in dates:
    if row in date_counts:
        date_counts[row] += 1
    else:
        date_counts[row] = 1
print(date_counts)

{datetime.datetime(2013, 10, 1, 0, 0): 2808, datetime.datetime(2014, 2, 1, 0, 0): 2361, datetime.datetime(2014, 11, 1, 0, 0): 2756, datetime.datetime(2014, 5, 1, 0, 0): 2864, datetime.datetime(2013, 4, 1, 0, 0): 2798, datetime.datetime(2014, 4, 1, 0, 0): 2862, datetime.datetime(2014, 6, 1, 0, 0): 2931, datetime.datetime(2014, 3, 1, 0, 0): 2684, datetime.datetime(2012, 10, 1, 0, 0): 2733, datetime.datetime(2013, 3, 1, 0, 0): 2862, datetime.datetime(2014, 10, 1, 0, 0): 2865, datetime.datetime(2014, 1, 1, 0, 0): 2651, datetime.datetime(2013, 9, 1, 0, 0): 2742, datetime.datetime(2013, 11, 1, 0, 0): 2758, datetime.datetime(2013, 7, 1, 0, 0): 3079, datetime.datetime(2013, 2, 1, 0, 0): 2375, datetime.datetime(2012, 9, 1, 0, 0): 2852, datetime.datetime(2014, 9, 1, 0, 0): 2914, datetime.datetime(2012, 6, 1, 0, 0): 2826, datetime.datetime(2012, 1, 1, 0, 0): 2758, datetime.datetime(2012, 2, 1, 0, 0): 2357, datetime.datetime(2012, 11, 1, 0, 0): 2729, datetime.datetime(2013, 1, 1, 0, 0): 2864, date

There seems to be a little bit of variability when we look at months. What about sex and race?

In [6]:
def counter(index):
    trait_list = []
    trait_count = {}
    for row in data:
        trait_list.append(row[index])
    for trait in trait_list:
        if trait in trait_count:
            trait_count[trait] += 1
        else:
            trait_count[trait] = 1
    return trait_count

sex_counts = counter(5)
race_counts = counter(7)

print(sex_counts, race_counts)

{'F': 14449, 'M': 86349} {'Native American/Native Alaskan': 917, 'White': 66237, 'Black': 23296, 'Asian/Pacific Islander': 1326, 'Hispanic': 9022}


It seems that most gun deaths are by males and those with a white race. So far it doesn't seem like the time period matters much for when gun deaths occur compared to sex and race. We would probably need more information to make any inferences from the analysis. For example, are those that are white more likely to die by gunshot? Or are there more deaths because there are simply more whites?

In order to answer these questions, let's import data about the population of each race.

In [7]:
census_file = open('census.csv')
census = list(csv.reader(census_file))
c_header = census[0]
census = census[1]
print(c_header)
print(census)

['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races']
['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']


In [8]:
census_header_length = len(c_header)
census_length = len(census)
print(census_header_length)
print(census_length)

17
17


In [9]:
def find_index(string):
    counter = 0
    rounds = 0
    for key in c_header:
        if key == string and counter != 0:
            return counter - 1
        if key == string and counter == 0:
            return counter
        counter += 1
        
asia_index = find_index('Race Alone - Asian')
islander_index = find_index('Race Alone - Native Hawaiian and Other Pacific Islander')
black_index = find_index('Race Alone - Black or African American')
na_index = find_index('Race Alone - American Indian and Alaska Native')
white_index = find_index('Race Alone - White')
hispanic_index = find_index('Race Alone - Hispanic')

mapping = {
    'Asian/Pacific Islander': int(census[asia_index]) + int(census[islander_index]),
    'Black': int(census[black_index]),
    'Native American/Native Alaskan': int(census[na_index]),
    'White': int(census[white_index]),
    'Hispanic': int(census[hispanic_index])
}
print(mapping)


{'Native American/Native Alaskan': 40250635, 'Black': 44618105, 'Hispanic': 197318956, 'Asian/Pacific Islander': 18899022, 'White': 308745538}


In [10]:
race_per_hundredk = {}
for key in race_counts:
    number = (race_counts[key] / mapping[key])*100000
    race_per_hundredk[key] = number
    
print(race_per_hundredk)

{'Native American/Native Alaskan': 2.278224927383133, 'White': 21.453589395678975, 'Hispanic': 4.572292588047142, 'Black': 52.211988832784364, 'Asian/Pacific Islander': 7.016236078247858}


In [14]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
homicide_race_per_hundredk = {}

for i, race in enumerate(races):
    if intents[i] == 'Homicide':
        if race not in homicide_race_per_hundredk:
            homicide_race_per_hundredk[race] = 1
        else:
            homicide_race_per_hundredk[race] += 1
print(homicide_race_per_hundredk)

{'Native American/Native Alaskan': 326, 'White': 9147, 'Black': 19510, 'Asian/Pacific Islander': 559, 'Hispanic': 5634}


In [15]:
for key in homicide_race_per_hundredk:
    number = (homicide_race_per_hundredk[key] / mapping[key])*100000
    homicide_race_per_hundredk[key] = number
print(homicide_race_per_hundredk)

{'Native American/Native Alaskan': 0.8099251104982568, 'White': 2.9626339085748987, 'Black': 43.7266441503959, 'Asian/Pacific Islander': 2.9578250133789994, 'Hispanic': 2.8552755975457322}


It seems that Blacks have a disproportionately high rate of gun deaths from homicide. Looking at counts alone would not have revealed this rate; only by looking at how the counts relate to the population and the intent can we see further meaning in gun death rates.