In [2]:
#read in dataset as list
import csv

with open("guns.csv", 'r') as f:
    gunreader = csv.reader(f)
    data = list(gunreader)

#save and strip header
header = data[0]
data = data[1:]
    
#view first 5 rows
for i in range(5):
    print(str(data[i]))

['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4']
['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3']
['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4']
['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']
['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']


In [3]:
#extract year data
years = [row[1] for row in data]

#count years
year_counts = {}
for year in years:
    if year not in year_counts: year_counts[year]=1
    else: year_counts[year] +=1

year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

In [9]:
#create dates for each row
import datetime

dates = [datetime.datetime(year=int(row[1]), month=int(row[2]), day=1) for row in data]

dates[:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [10]:
#count dates
date_counts = {}
for date in dates:
    if date not in date_counts: date_counts[date]=1
    else: date_counts[date] +=1

date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

In [12]:
#count sex and race
#actually, tired of writing same code. 
#method to return counts for any column

#count_stats returns a dictionary of unique types
#for any stat in header of data

def count_stats(stat):
    col_index = header.index(stat)
    stat_counts = {}

    for row in data:
        if row[col_index] not in stat_counts: stat_counts[row[col_index]]=1
        else: stat_counts[row[col_index]] +=1

    return stat_counts

In [13]:
header

['',
 'year',
 'month',
 'intent',
 'police',
 'sex',
 'age',
 'race',
 'hispanic',
 'place',
 'education']

In [14]:
sex_counts = count_stats('sex')

sex_counts

{'F': 14449, 'M': 86349}

In [15]:
race_counts = count_stats('race')

race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

In [21]:
#read in census data to convert counts above to rate stats
with open("census.csv", 'r') as f:
    censusreader = csv.reader(f)
    census = list(censusreader)

census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

In [27]:
#method to return count given race keyword
def race_count(race):
    col_index = -1
    
    for col in range(len(census[0])):
        if race in census[0][col]:
            col_index=col
    
    try:
        return census[1][col_index]
    except Exception:
        return -1

#map gun data to census data
mapping = {}
for race in race_counts.keys():
    mapping[race] = int(race_count(race[:5]))

mapping

{'Asian/Pacific Islander': 15159516,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 674625,
 'White': 197318956}

In [28]:
#add native hawaiian to asian
mapping['Asian/Pacific Islander']+=int(race_count('Hawaii'))
mapping

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 674625,
 'White': 197318956}

In [30]:
#give race count stats as rate stats
race_per_hundredk = {}

for race in race_counts:
    rate = race_counts[race]/mapping[race]*100000
    race_per_hundredk[race]=rate
    
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 135.92736705577173,
 'White': 33.56849303419181}

In [33]:
#filter gun deaths to only count Homicides

intents = [row[3] for row in data]
intents[:10]

['Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Undetermined',
 'Suicide',
 'Accidental',
 'Suicide']

In [34]:
races = [row[7] for row in data]
races[:5]

['Asian/Pacific Islander', 'White', 'White', 'White', 'White']

In [35]:
homicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race not in homicide_race_counts:
            homicide_race_counts[race]=1
        else: homicide_race_counts[race]+=1
homicide_race_counts

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

In [37]:
#give race count stats as rate stats
homicide_race_per_hundredk = {}

for race in homicide_race_counts:
    rate = homicide_race_counts[race]/mapping[race]*100000
    homicide_race_per_hundredk[race]=rate
    
homicide_race_per_hundredk

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 48.32314248656661,
 'White': 4.6356417981453335}

According to this data, Blacks and Native Americans have homicide rates well over Asians and Whites, while Hispanic's homicide rates are somewhere in between.

"Here are some potential next steps:

    Figure out the link, if any, between month and homicide rate.
    Explore the homicide rate by gender.
    Explore the rates of other intents, like Accidental, by gender and race.
    Find out if gun death rates correlate to location and education."