In [1]:
# Title: US_gun_deaths.ipynb
# Desc: Python Intermediate project from Dataquest, helping to practice modules, 
#       enumeration, indexing, and scopes.
# Date: 3/1/2017
# Note: guns.csv contains information on gun deaths in the U.S. from 2012 to 
#       2014. 
#       The dataset came from FiveThirtyEight.

In [1]:
# read in the dataset as a list
import csv
import datetime

file = open("guns.csv", "r")
data = list(csv.reader(file))

In [2]:
# get rid of the header row
headers = data[0]
data = data[1:]

In [3]:
years = [row[1] for row in data]
year_counts = {}
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1

In [4]:
# explore data by months and years using the datetime objects
dates = [datetime.datetime(year=int(row[1]), month=int(row[2]), day=1) for row in data]

date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1

In [5]:
# explore data by sex and race
sex_counts = {}
race_counts = {}

for row in data:
    sex = row[5]
    if sex in sex_counts:
        sex_counts[sex] += 1
    else:
        sex_counts[sex] = 1

for row in data:
    race = row[7]
    if race in race_counts:
        race_counts[race] += 1
    else:
        race_counts[race] = 1

In [6]:
sex_counts

{'F': 14449, 'M': 86349}

In [7]:
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

What I've learned so far:
- A lot of male and a lot of white population have died by gun in the U.S. from 2012 to 2014.

Questions:
- Are these rates proportional to the general population? (i.e. are there this much more male and white population in the U.S?)
- What are the reasons for this trend?

In [8]:
# read in the 2010 census data to find proportions by sex, race, etc.
file2 = open("census.csv", "r")
census = list(csv.reader(file2))

In [9]:
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

In [10]:
# keys are from gun.csv, and counts are from census.csv
mapping = {}
numbers = census[1]

mapping["Asian/Pacific Islander"] = int(numbers[14]) + int(numbers[15])
mapping["Black"] = int(numbers[12])
mapping["Hispanic"] = int(numbers[11])
mapping["Native American/Native Alaskan"] = int(numbers[13])
mapping["White"] = int(numbers[10])

In [11]:
race_per_hundredk = {}
for race, count in race_counts.items():
    race_per_hundredk[race] = count / mapping[race] * 100000

In [12]:
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

In [13]:
mapping

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}

In [14]:
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

In [15]:
headers

['',
 'year',
 'month',
 'intent',
 'police',
 'sex',
 'age',
 'race',
 'hispanic',
 'place',
 'education']

In [23]:
# looking only at homicides
intents = [row[3] for row in data]
races = [row[7] for row in data]

homicide_race_counts = {}

for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race not in homicide_race_counts:
            homicide_race_counts[race] = 1
        else:
            homicide_race_counts[race] += 1

In [24]:
homicide_race_per_hundredk = {}

for race, count in homicide_race_counts.items():
    homicide_race_per_hundredk[race] = count / mapping[race] * 100000

In [25]:
homicide_race_per_hundredk

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

New findings:
- Proportional to population, a lot of white and black Americans are involved in gun deaths.
- Proportional to population, Black Americans are most involved in homicide gun deaths by far

Questions:
- Where do these happen?
- Why do these happen?