In [33]:
import csv
import os

# Reading in the FiveThirtyEight Dataset

In [34]:
f = open(os.path.expanduser("~/Downloads/full_data.csv"), "r")
csv_reader_obj=csv.reader(f)
data=list(csv_reader_obj)
data[:5] #Printing first five rows to verify 

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  'BA+'],
 ['2',
  '2012',
  '01',
  'Suicide',
  '0',
  'F',
  '21',
  'White',
  '100',
  'Street',
  'Some college'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  'BA+'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', 'BA+']]

# Getting rid of the header row

In [35]:
headers=data[0]
data=data[1:]
print(headers)
data[:5] #Printing the first five rows to verify the removal of header row

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  'BA+'],
 ['2',
  '2012',
  '01',
  'Suicide',
  '0',
  'F',
  '21',
  'White',
  '100',
  'Street',
  'Some college'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  'BA+'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', 'BA+'],
 ['5',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '31',
  'White',
  '100',
  'Other specified',
  'HS/GED']]

# Gun deaths per year (2012-2014)

In [36]:
years=[]
for ele in data:
    years.append(ele[1])
year_count={}
for ele in years:
    if ele in year_count:
        year_count[ele]+=1
    else:
        year_count[ele]=1
year_count

{'2012': 33563, '2013': 33636, '2014': 33599}

# Adding a timestamp column
---
**The day is the 1st of the month by default, since that data was unavailable.**

In [37]:
import datetime
for ele in data:
    date=datetime.datetime(year=int(ele[1]), month=int(ele[2]), day =1)
    ele.append(date)
data[:5] #Printing the first five rows to verify addition of timestamp column

[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  'BA+',
  datetime.datetime(2012, 1, 1, 0, 0)],
 ['2',
  '2012',
  '01',
  'Suicide',
  '0',
  'F',
  '21',
  'White',
  '100',
  'Street',
  'Some college',
  datetime.datetime(2012, 1, 1, 0, 0)],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  'BA+',
  datetime.datetime(2012, 1, 1, 0, 0)],
 ['4',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '64',
  'White',
  '100',
  'Home',
  'BA+',
  datetime.datetime(2012, 2, 1, 0, 0)],
 ['5',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '31',
  'White',
  '100',
  'Other specified',
  'HS/GED',
  datetime.datetime(2012, 2, 1, 0, 0)]]

# Computing the gun deaths per month of each year

In [41]:
date_count={}
for ele in data:
    if ele[11] in date_count:
        date_count[ele[11]]+=1
    else:
        date_count[ele[11]]=1
date_count

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

# Computing the gun deaths based on gender

In [42]:
sex_counts={}
for ele in data:
    if ele[5] in sex_counts:
        sex_counts[ele[5]]+=1
    else:
        sex_counts[ele[5]]=1
sex_counts

{'F': 14449, 'M': 86349}

# Computing the gun deaths based on race

In [43]:
race_counts={}
for ele in data:
    if ele[7] in race_counts:
        race_counts[ele[7]]+=1
    else:
        race_counts[ele[7]]=1
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

# Reading in a CSV that contains the population information of the different races (as of April 2010)

In [51]:
f=open(os.path.expanduser("~/Downloads/census.csv"), "r")
csv_reader_obj=csv.reader(f)
census_data=list(csv_reader_obj)
census_data

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

**Manually creating a dictionary to map the race categories present in our data, to those present in the census data**

In [52]:
mapping={"White":census_data[1][10], "Native American/Native Alaskan": census_data[1][13], "Asian/Pacific Islander":census_data[1][14]+census_data[1][15],"Black":census_data[1][12], "Hispanic":census_data[1][11]}
mapping

{'Asian/Pacific Islander': '15159516674625',
 'Black': '40250635',
 'Hispanic': '44618105',
 'Native American/Native Alaskan': '3739506',
 'White': '197318956'}

# Computing rate of gun deaths per race (per 100,000 people)

In [57]:
race_per_hundredk={}
for ele in race_counts:
    race_per_hundredk[ele]=(race_counts[ele]/int(mapping[ele]))*100000
race_per_hundredk

{'Asian/Pacific Islander': 8.746980714890115e-06,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

# Computing homicidal gun deaths per race

In [60]:
intents=[]; races=[]
for ele in data:
    intents.append(ele[3])
    races.append(ele[7])
homicide_race_counts={}
for idx, value in enumerate(intents):
    if value=="Homicide":
        if races[idx] in homicide_race_counts:
            homicide_race_counts[races[idx]]+=1
        else:
            homicide_race_counts[races[idx]]=1
homicide_race_counts

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

# Computing rate of homicidal gun deaths per race (per 100,000 people)

In [63]:
homicide_rate_perhundredk={}
for ele in homicide_race_counts:
    homicide_rate_perhundredk[ele]=(homicide_race_counts[ele]/int(mapping[ele]))*100000
homicide_rate_perhundredk

{'Asian/Pacific Islander': 3.687452654316421e-06,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

# Computing homicidal gun deaths per month (for all years)

In [64]:
homicides_per_month={}
months=[]
for ele in data:
    months.append(ele[2])
for idx, value in enumerate(intents):
    if value=="Homicide":
        if months[idx] in homicides_per_month:
            homicides_per_month[months[idx]]+=1
        else:
            homicides_per_month[months[idx]]=1
homicides_per_month

{'01': 2829,
 '02': 2178,
 '03': 2780,
 '04': 2845,
 '05': 2976,
 '06': 3130,
 '07': 3269,
 '08': 3125,
 '09': 2966,
 '10': 2968,
 '11': 2919,
 '12': 3191}

# Computing homicidal gun deaths based on gender

In [65]:
homicides_by_gender={}
genders=[]
for ele in data:
    genders.append(ele[5])
for idx, value in enumerate(intents):
    if value=="Homicide":
        if genders[idx] in homicides_by_gender:
            homicides_by_gender[genders[idx]]+=1
        else:
            homicides_by_gender[genders[idx]]=1
homicides_by_gender

{'F': 5373, 'M': 29803}

# Computing accidental gun deaths based on race

In [67]:
accidental_by_race={}
for idx, value in enumerate(races):
    if intents[idx]=="Accidental":
        if value in accidental_by_race:
            accidental_by_race[value]+=1
        else:
            accidental_by_race[value]=1
accidental_by_race

{'Asian/Pacific Islander': 12,
 'Black': 328,
 'Hispanic': 145,
 'Native American/Native Alaskan': 22,
 'White': 1132}

# Computing accidental gun deaths based on gender

In [69]:
accidental_by_gender={}
for idx, value in enumerate(genders):
    if intents[idx]=="Accidental":
        if value in accidental_by_gender:
            accidental_by_gender[value]+=1
        else:
            accidental_by_gender[value]=1
accidental_by_gender

{'F': 218, 'M': 1421}

# Computing gun deaths based on education of the deceased

In [70]:
edu=[]
for ele in data:
    edu.append(ele[10])
set(edu) #To find the different categories present

{'BA+', 'HS/GED', 'Less than HS', 'NA', 'Some college'}

In [72]:
deaths_by_edu={}
for ele in data:
    if ele[10] in deaths_by_edu:
        deaths_by_edu[ele[10]]+=1
    else:
        deaths_by_edu[ele[10]]=1
deaths_by_edu

{'BA+': 12946,
 'HS/GED': 42927,
 'Less than HS': 21823,
 'NA': 1422,
 'Some college': 21680}

# Computing gun deaths based on location of death

In [73]:
loc=[]
for ele in data:
    loc.append(ele[9])
set(loc) #To find the different categories present

{'Farm',
 'Home',
 'Industrial/construction',
 'NA',
 'Other specified',
 'Other unspecified',
 'Residential institution',
 'School/instiution',
 'Sports',
 'Street',
 'Trade/service area'}

In [74]:
deaths_by_loc={}
for ele in data:
    if ele[9] in deaths_by_loc:
        deaths_by_loc[ele[9]]+=1
    else:
        deaths_by_loc[ele[9]]=1
deaths_by_loc

{'Farm': 470,
 'Home': 60486,
 'Industrial/construction': 248,
 'NA': 1384,
 'Other specified': 13751,
 'Other unspecified': 8867,
 'Residential institution': 203,
 'School/instiution': 671,
 'Sports': 128,
 'Street': 11151,
 'Trade/service area': 3439}

In [80]:
ages=[]
for ele in data:
    if ele[6] != "NA":
        ages.append(int(ele[6]))
set(ages)

{0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 107}

# Computing Gun deaths by age

In [84]:
deaths_by_age={}
for i_index in range(-1, 101, 10):
    deaths_by_age[(i_index+1, i_index+10)]=0
for ele in ages:
    ans=(ele//10)*10
    deaths_by_age[(ans, ans+9)]+=1
deaths_by_age

{(0, 9): 456,
 (10, 19): 7252,
 (20, 29): 22816,
 (30, 39): 16448,
 (40, 49): 14998,
 (50, 59): 16169,
 (60, 69): 10582,
 (70, 79): 6822,
 (80, 89): 4415,
 (90, 99): 816,
 (100, 109): 6}

**From the computations made above, the following inferences can be drawn:**
 - The gun deaths per 100,000 people of a particular race is highest for "Blacks", and lowest for "Asian/Pacific Highlander"
 - The absolute number of male deaths is much larger than that of females.
 - Most gun death victims belonged to the age group 20-29 years, and it is alarming to see the deaths of 456 children aged 9 and below, and 7,252 victims aged between 10-19.
 
**More inferences can be drawn with the population values for all the different categories, such as those for different educational qualifications**