# Import and format data

In [1]:
import csv

# Read data from guns.csv
data = list(csv.reader(open('guns.csv')))

# Extract Headers
headers = data[:1]
data = data[1:]

# Count fatalities by year

In [2]:
year_counts = {}
years = [row[1] for row in data]
for year in years:
    if year not in year_counts:
        year_counts[year] = 1
    else:
        year_counts[year] += 1

print(year_counts)

{'2012': 33563, '2013': 33636, '2014': 33599}


Number of deaths seem to be consistent from 2012 - 2014

---
# Count fatalities by month

In [3]:
import datetime

# Fix day to first day of month since we don't have the day details
dates = [datetime.datetime(year=int(row[1]), month=int(row[2]), day=1) for row in data]

# Count how many times each unique date appears in the data
date_counts = {}

for date in dates:
    date_string = date.strftime("%B %d, %Y")
    if date_string in date_counts:
        date_counts[date_string] += 1
    else:
        date_counts[date_string] = 1
        
# Count how many deaths occur by month
month_counts = {}
for date in dates:
    if date.month in month_counts:
        month_counts[date.month] += 1
    else:
        month_counts[date.month] = 1

print(month_counts)

{1: 8273, 2: 7093, 3: 8289, 4: 8455, 5: 8669, 6: 8677, 7: 8989, 8: 8783, 9: 8508, 10: 8406, 11: 8243, 12: 8413}


Rise in fatalities from April - August and then dip until December. It is known that crime increases during Summer when the temperature is high so may be consistent with that? Why the sudden rise in deaths in December?

---
# Count by gender

In [4]:
sex_counts = { 'M': 0, 'F': 0 }
for row in data:
    sex_counts[row[5]] += 1

print(sex_counts)

{'M': 86349, 'F': 14449}


More male fatalities than female, consistent with other studies about male agression/risk.

---
# Count by Race

In [5]:
race_counts = {}
for row in data:
    if row[7] in race_counts:
        race_counts[row[7]] += 1
    else:
        race_counts[row[7]] = 1

print(race_counts)

{'Asian/Pacific Islander': 1326, 'White': 66237, 'Native American/Native Alaskan': 917, 'Black': 23296, 'Hispanic': 9022}


Number of fatalities by race is consistent with the population size of each race.

# Deeper analysis of fatalities by race

In [6]:
census = list(csv.reader(open('census.csv')))

## Creating a manual mapping to race data from the census

In [7]:
mapping = { 
    'Asian/Pacific Islander': 15834141, 
    'Black': 40250635, 
    'Native American/Native Alaskan': 3739506, 
    'Hispanic': 44618105, 
    'White': 197318956 
}

## Fatalities by race per 100,000 people

In [8]:
race_per_hundredk = {}
for race in race_counts:
    race_per_hundredk[race] = (race_counts[race]/mapping[race]) * 100000

print(race_per_hundredk)

{'Asian/Pacific Islander': 8.374309664161762, 'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.8773477735196, 'Hispanic': 20.220491210910907}


## Fatalities by Homicide Intent

In [9]:
intents = [row[3] for row in data]
races = [row[7] for row in data]

homicide_race_counts = {}

for i, race in enumerate(races):
    if intents[i] == 'Homicide':
        if race not in homicide_race_counts:
            homicide_race_counts[race] = 1
        else:
            homicide_race_counts[race] += 1

print(homicide_race_counts)

homicides_by_race_per_hundredk = {}
for race in homicide_race_counts:
    homicides_by_race_per_hundredk[race] = (homicide_race_counts[race]/mapping[race]) * 100000

print(homicides_by_race_per_hundredk)

{'White': 9147, 'Asian/Pacific Islander': 559, 'Black': 19510, 'Native American/Native Alaskan': 326, 'Hispanic': 5634}
{'White': 4.6356417981453335, 'Asian/Pacific Islander': 3.530346230970155, 'Black': 48.471284987180944, 'Native American/Native Alaskan': 8.717729026240365, 'Hispanic': 12.627161104219914}


Data shows highest homicide rates per 100,000 amongst the Black/African American population

### Next possible routes to go
- Figure out the link, if any, between month and homicide rate.
- Explore the homicide rate by gender.
- Explore the rates of other intents, like Accidental, by gender and race.
- Find out if gun death rates correlate to location and education.

# Homicide rate by month

In [13]:
homicides = [row for row in data if row[3] == 'Homicide']

# Fix day to first day of month since we don't have the day details
homicides_dates = [datetime.datetime(year=int(row[1]), month=int(row[2]), day=1) for row in homicides]

month_counts = {}

for i, homicide in enumerate(homicides):
    month = homicides_dates[i].month
    if month not in month_counts:
        month_counts[month] = 1
    else:
        month_counts[month] += 1
print(month_counts)

{3: 2780, 4: 2845, 6: 3130, 7: 3269, 8: 3125, 9: 2966, 10: 2968, 12: 3191, 1: 2829, 2: 2178, 5: 2976, 11: 2919}


Homicides rise during Summer from May till August and then start to dip again, unit a steep rise in December

In [15]:
homicides_gender_count = {}
for homicide in homicides:
    gender = homicide[5]
    if gender not in homicides_gender_count:
        homicides_gender_count[gender] = 1
    else:
        homicides_gender_count[gender] += 1

print(homicides_gender_count)

{'M': 29803, 'F': 5373}


Males are responsible for around 84.7% of all homicides

## Homicides instances with Police involved

In [24]:
homicides_with_police = [homicide for homicide in homicides if homicide[4] == '1']
homicide_with_police_race_counts = {}
for homicide in homicides_with_police:
    if homicide[7] in homicide_with_police_race_counts:
        homicide_with_police_race_counts[homicide[7]] += 1
    else:
        homicide_with_police_race_counts[homicide[7]] = 1

homicides_with_police_by_race_per_hundredk = {}
for race in homicide_with_police_race_counts:
    homicides_with_police_by_race_per_hundredk[race] = (homicide_with_police_race_counts[race]/mapping[race]) * 100000

print(homicides_with_police_by_race_per_hundredk)

{'White': 0.3593167196769478, 'Native American/Native Alaskan': 0.6685375020122979, 'Black': 0.8844580961269306, 'Hispanic': 0.6320304280067475, 'Asian/Pacific Islander': 0.18946401955117112}


## Homicides by education

### Reference
1 -- Less than High School
2 -- Graduated from High School or equivalent
3 -- Some College
4 -- At least graduated from College
5 -- Not available

In [27]:
education_counts = {}
for homicide in homicides:
    education = homicide[10]
    if education in education_counts:
        education_counts[education] += 1
    else:
        education_counts[education] = 1
print(education_counts)

{'2': 15649, '3': 5640, '5': 455, '1': 11839, '4': 1559, 'NA': 34}


Homicide rate decreases as education levels increase

# Accidental Deaths

In [25]:
accidental_deaths = [row for row in data if row[3] == 'Accidental']
accidental_deaths_by_gender = {}
for death in accidental_deaths:
    gender = death[5]
    if gender not in accidental_deaths_by_gender:
        accidental_deaths_by_gender[gender] = 1
    else:
        accidental_deaths_by_gender[gender] += 1
print(accidental_deaths_by_gender)

{'M': 1421, 'F': 218}


Men are 6.5 times more likely to die by accident than women! **Possible route to explore is employment rate of men in hazardous environments or identify if men tend to lead more riskier lives than women. How does education fit into all of this?**