# Dataquest Project - Explore gun deaths in USA
### Data from FiveThirtyEight

#### This project looks at gun violence data from the United States from 2012 to 2014.

In [1]:
# Load in data
import csv

f = open('guns.csv', 'r')
reader = csv.reader(f)
data = list(reader)

print(data[0:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


## Remove header row

In [2]:
headers = data[0]
data = data[1:]
print(headers)

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


## Calculate how many deaths happened each year

In [3]:
years = [x[1] for x in data]
print(years[:5])

['2012', '2012', '2012', '2012', '2012']


In [4]:
year_counts = {}
for x in years:
    if x in year_counts:
        year_counts[x] += 1
    else:
        year_counts[x] = 1
        
print(year_counts)

{'2014': 33599, '2013': 33636, '2012': 33563}


## Look at gun deaths by month

In [5]:
# First make datetime object for each entry
import datetime as dt

dates = [dt.datetime(year = int(x[1]), month = int(x[2]), day = 1) for x in data]
dates[:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [6]:
date_counts = {}
for x in dates:
    if x in date_counts:
        date_counts[x] += 1
    else:
        date_counts[x] = 1

date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

## Look at gun deaths by sex and race

In [7]:
sexes = [x[5] for x in data]
sex_counts = {}
for x in sexes:
    if x in sex_counts:
        sex_counts[x] += 1
    else:
        sex_counts[x] = 1

races = [x[7] for x in data]
race_counts = {}
for x in races:
    if x in race_counts:
        race_counts[x] += 1
    else:
        race_counts[x] = 1
        
sex_counts

{'F': 14449, 'M': 86349}

In [8]:
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

## Look at gun deaths as proportion of race population in the US

In [9]:
# Import census data
f = open('census.csv', 'r')
reader = csv.reader(f)
census = list(reader)
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

## Look at number of gun deaths per 100,000 by race

In [10]:
# Create a dictionary to map terms from census data to gun violence data
mapping = {'Asian/Pacific Islander': 15159516 + 674625, 
           'Black': 40250635, 
           'Native American/Native Alaskan': 3739506,
           'Hispanic': 44618105,
           'White': 197318956 }

race_per_hundredk = {}
for race,count in race_counts.items():
    x = 100000* count/(mapping[race])
    race_per_hundredk[race] = x

print(race_per_hundredk)

{'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Asian/Pacific Islander': 8.374309664161762, 'Black': 57.877347773519595, 'Hispanic': 20.220491210910907}


## Look at only homicides per 100k (murder rate)

In [11]:
# Extract intent column
intents = [x[3] for x in data]
races = [x[7] for x in data]

homicide_race_counts = {}
for i,race in enumerate(races):
    if intents[i] == 'Homicide':
        if race not in homicide_race_counts:
            homicide_race_counts[race] = 1
        else:
            homicide_race_counts[race] += 1
            
homicide_race_counts
for race, count in homicide_race_counts.items():
    x = 100000 * count/mapping[race]
    homicide_race_counts[race] = x
    
homicide_race_counts

{'Asian/Pacific Islander': 3.5303462309701548,
 'Black': 48.47128498718095,
 'Hispanic': 12.627161104219912,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

# Murder findings:
### African Americans are murdered by guns at a much higher level than other races in the US (48 per 100k). 
### African Americans are murdered at nearly 12 times the rate of White Americans (4 per 100k).
### Asian Americans are murdered at the lowest rate of any race in the US (3 per 100k).

# Look at suicide by race/gender

In [12]:
# Already have the intents and sexes column isolated
suicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == 'Suicide':
        if race not in suicide_race_counts:
            suicide_race_counts[race] = 1
        else:
            suicide_race_counts[race] += 1
suicide_race_counts

{'Asian/Pacific Islander': 745,
 'Black': 3332,
 'Hispanic': 3171,
 'Native American/Native Alaskan': 555,
 'White': 55372}

In [13]:
# Calculate suicide rate per 100k
for race, counts in suicide_race_counts.items():
    rate = 100000 * counts/mapping[race]
    suicide_race_counts[race] = rate
suicide_race_counts

{'Asian/Pacific Islander': 4.705023152187416,
 'Black': 8.278130270491385,
 'Hispanic': 7.10698045109715,
 'Native American/Native Alaskan': 14.841532544673013,
 'White': 28.06217969245692}

In [28]:
suicide_sex_counts = {}
for i, sex in enumerate(sexes):
    if intents[i] == 'Suicide':
        if sex not in suicide_sex_counts:
            suicide_sex_counts[sex] = 1
        else:
            suicide_sex_counts[sex] += 1
suicide_sex_counts

{'F': 8689, 'M': 54486}

In [29]:
# Set up mapping for sex based on 50% of US census data
US_pop = int(census[1][9])
mapping_sex = {'M': US_pop/2,
               'F': US_pop/2}


for sex, count in suicide_sex_counts.items():
    rate = 100000 * count/mapping_sex[sex]
    suicide_sex_counts[sex] = rate
suicide_sex_counts

{'F': 5.628583367575663, 'M': 35.29508497706613}

# Suicide Findings:
### Suicide by gun is most prominent in the White population in the US.
### Males are 5 times as likely to commit suicide using a gun than females.