# Exploring Gun Deaths in the US
The data set contains information on gun deaths in the US from 2012 to 2014. Each row in the dataset represents a single fatality. Here's an explanation of each column:

-- this is an identifier column, which contains the row number. It's common in CSV files to include a unique identifier for each - row, but we can ignore it in this analysis.

- year -- the year in which the fatality occurred.
- month -- the month in which the fatality occurred.
- intent -- the intent of the perpetrator of the crime. This can be Suicide, Accidental, NA, Homicide, or Undetermined.
- police -- whether a police officer was involved with the shooting. Either 0 (false) or 1 (true).
- sex -- the gender of the victim. Either M or F.
- age -- the age of the victim.
- race -- the race of the victim. Either Asian/Pacific Islander, Native American/Native Alaskan, Black, Hispanic, or White.
- hispanic -- a code indicating the Hispanic origin of the victim.
- place -- where the shooting occurred. Has several categories, which you're encouraged to explore on your own.
- education -- educational status of the victim. Can be one of the following:
  1. -- Less than High School
  2. -- Graduated from High School or equivalent
  3. -- Some College
  4. -- At least graduated from College
  5. -- Not available

In [9]:
import csv
f = open("guns.csv")
csvreader = csv.reader(f)
data = list(csvreader)
print(data[0:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


In [15]:
headers = data[0]
headers

['',
 'year',
 'month',
 'intent',
 'police',
 'sex',
 'age',
 'race',
 'hispanic',
 'place',
 'education']

In [22]:
data = data[1:]

In [25]:
year_counts = {}
for row in data:
    if row[1] in year_counts:
        year_counts[row[1]] += 1
    else:
        year_counts[row[1]] = 1
print(year_counts)

{'2012': 33561, '2013': 33636, '2014': 33599}


In [27]:
import datetime
dates = [datetime.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in data]
dates[:10]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 3, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0)]

In [28]:
date_counts = {}
for d in dates:
    if d in date_counts:
        date_counts[d] += 1
    else:
        date_counts[d] = 1
print(date_counts)

{datetime.datetime(2012, 1, 1, 0, 0): 2756, datetime.datetime(2012, 2, 1, 0, 0): 2357, datetime.datetime(2012, 3, 1, 0, 0): 2743, datetime.datetime(2012, 4, 1, 0, 0): 2795, datetime.datetime(2012, 5, 1, 0, 0): 2999, datetime.datetime(2012, 6, 1, 0, 0): 2826, datetime.datetime(2012, 7, 1, 0, 0): 3026, datetime.datetime(2012, 8, 1, 0, 0): 2954, datetime.datetime(2012, 9, 1, 0, 0): 2852, datetime.datetime(2012, 10, 1, 0, 0): 2733, datetime.datetime(2012, 11, 1, 0, 0): 2729, datetime.datetime(2012, 12, 1, 0, 0): 2791, datetime.datetime(2013, 1, 1, 0, 0): 2864, datetime.datetime(2013, 2, 1, 0, 0): 2375, datetime.datetime(2013, 3, 1, 0, 0): 2862, datetime.datetime(2013, 4, 1, 0, 0): 2798, datetime.datetime(2013, 5, 1, 0, 0): 2806, datetime.datetime(2013, 6, 1, 0, 0): 2920, datetime.datetime(2013, 7, 1, 0, 0): 3079, datetime.datetime(2013, 8, 1, 0, 0): 2859, datetime.datetime(2013, 9, 1, 0, 0): 2742, datetime.datetime(2013, 10, 1, 0, 0): 2808, datetime.datetime(2013, 11, 1, 0, 0): 2758, datet

In [29]:
sex_counts = {}
for row in data:
    if row[5] in sex_counts:
        sex_counts[row[5]] += 1
    else:
        sex_counts[row[5]] = 1
print(sex_counts)

{'M': 86348, 'F': 14448}


In [10]:
race_counts = {}
for row in data:
    if row[7] in race_counts:
        race_counts[row[7]] += 1
    else:
        race_counts[row[7]] = 1
print(race_counts)

{'race': 1, 'Asian/Pacific Islander': 1326, 'White': 66237, 'Native American/Native Alaskan': 917, 'Black': 23296, 'Hispanic': 9022}


In [4]:
import csv
g = open("census.csv")
csvreader_g = csv.reader(g)
census = list(csvreader_g)
print(census[0])
print(census[1])

['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races']
['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']


mapping = {"Asian/Pacific Islander":census[1][14]+census[1][15], "Black":census[1][12], "Native American/Native Alaskan":census[1][13], "Hispanic":census[1][11], "White":census[1][10]}
print(mapping)

##### Here's a list of the race name in data, and the corresponding race name in census:

- Asian/Pacific Islander -- Race Alone - Asian plus Race Alone - Native Hawaiian and Other Pacific Islander.
- Black -- Race Alone - Black or African American.
- Hispanic -- Race Alone - Hispanic
- Native American/Native Alaskan -- Race Alone - American Indian and Alaska Native
- White -- Race Alone - White

In [12]:
mapping = {"Asian/Pacific Islander":census[1][14]+census[1][15], "Black":census[1][12], "Native American/Native Alaskan":census[1][13], "Hispanic":census[1][11], "White":census[1][10]}
print(mapping)

{'Asian/Pacific Islander': '15159516674625', 'Black': '40250635', 'Native American/Native Alaskan': '3739506', 'Hispanic': '44618105', 'White': '197318956'}


In [17]:
race_per_hundredk = {}

for k in race_counts:
    if k in mapping and int(mapping[k]) != 0:
        race_per_hundredk[k] = 100000*(race_counts[k])/(int(mapping[k]))
print(race_per_hundredk)

{'Asian/Pacific Islander': 8.746980714890115e-06, 'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.877347773519595, 'Hispanic': 20.220491210910907}


In [18]:
intents = [row[3] for row in data]
print(intents[0:10])

['intent', 'Suicide', 'Suicide', 'Suicide', 'Suicide', 'Suicide', 'Suicide', 'Undetermined', 'Suicide', 'Accidental']


In [19]:
races = [row[7] for row in data]
print(races[0:10])

['race', 'Asian/Pacific Islander', 'White', 'White', 'White', 'White', 'Native American/Native Alaskan', 'White', 'Native American/Native Alaskan', 'White']


In [20]:
homicide_race_counts = {}
for i,race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1
print(homicide_race_counts)

{'White': 9147, 'Asian/Pacific Islander': 559, 'Black': 19510, 'Native American/Native Alaskan': 326, 'Hispanic': 5634}
