## Importing dataset

In [1]:
import csv
f = open('guns.csv')
data = list(csv.reader(f))
print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


## Removing headers

In [2]:
headers = data[0]
data = data[1:]
print(headers)

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


In [3]:
print(data[:5])

[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


In [4]:
years = list()
for row in data:
    years.append(row[1])
years[:5]

['2012', '2012', '2012', '2012', '2012']

## Counting gun deaths by year

In [5]:
#Gun deaths by year
year_counts =dict()
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

## Counting gun deaths by month and year

In [6]:
#Gun deaths by month
import datetime
date = datetime.datetime(year=2014, month =12, day=1)

date

dates =list()
for row in data:
    dates.append(datetime.datetime(year=int(row[1]),month=int(row[2]),day=1))
dates[:5]

date_counts = dict()
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

## Counting gun deaths by sex

In [7]:
#Gun deaths by sex
sex_counts = dict()
for row in data:
    if row[5] in sex_counts:
        sex_counts[row[5]] += 1
    else:
        sex_counts[row[5]] = 1
sex_counts

{'F': 14449, 'M': 86349}

## Counting gun deaths by race

In [8]:
#Gun deaths by race
race_counts = dict()
for row in data:
    if row[7] in race_counts:
        race_counts[row[7]] += 1
    else:
        race_counts[row[7]] = 1
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

## Result so far

It is evident from the sex_counts that majority of gun death victims are male and from race_counts the majority of the gun deaths victims are white.

Now we need to use these two results and find a common parameter with which we can definitively say who are the majority victims.

## Importing Census dataset

In [9]:
census = list(csv.reader(open('census.csv','r')))
print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


## Population of each race from census dataset

In [10]:
#dictionary where race name from data is the key and populatio from census is the value for the key
mapping = dict()
keys = ['White', 'Hispanic', 'Black','Native American/Native Alaskan','Asian/Pacific Islander']
values = [census[1][i] for i in range(10,16)]
values[4] = int(values[4])+int(values[5])
values = values[:5]

for key in keys:
    if key not in mapping:
        mapping[key] = values[keys.index(key)]
print('Population of a race in 2010 census:')
mapping

Population of a race in 2010 census:


{'Asian/Pacific Islander': 15834141,
 'Black': '40250635',
 'Hispanic': '44618105',
 'Native American/Native Alaskan': '3739506',
 'White': '197318956'}

## Computing rates of gun deaths per race

In [11]:
#Number of person-deaths of a given race per 100k
race_per_hundredk = dict()
for count in race_counts:
    if count not in race_per_hundredk:
        race_per_hundredk[count] = (race_counts[count]/int(mapping[count]))*100000
print('Number of deaths of a given race per 100k population:')
race_per_hundredk

Number of deaths of a given race per 100k population:


{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

## Filtering by Intent

In [12]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
homicide_race_per_hundredk = dict()

In [13]:
for i,race in enumerate(races):
    if intents[i]=='Homicide':
        if race not in homicide_race_per_hundredk:
            homicide_race_per_hundredk[race] = 1
        else:
            homicide_race_per_hundredk[race] += 1
print('Homicide deaths in a given race between 2012-2014:')
homicide_race_per_hundredk

Homicide deaths in a given race between 2012-2014:


{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

In [14]:
for count in homicide_race_per_hundredk:
    if count in homicide_race_per_hundredk:
        homicide_race_per_hundredk[count] = (homicide_race_per_hundredk[count]/int(mapping[count]))*100000
print('Homicide deaths in a given race between 2012-2014 per 100k population:')
homicide_race_per_hundredk

Homicide deaths in a given race between 2012-2014 per 100k population:


{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

## Conclusion

It is evident from the above results from the intent column of the gun deaths dataset is that that for every 100k population of Black race there are 97 homicide deaths.

Further we can use multiple columns to find insights on gun deaths using following multiple parameters:
    - age and intent
    - sex and race
    - sex and age
    - age and place