# Exploration of Gun deaths in the US data

## Reading in the US gun deaths data

In [4]:
# Reading the data set into list of lists

import csv
file_object = open("guns.csv","r")
data = list(csv.reader(file_object))
print (data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


## Removing header row from the dataset

In [5]:
# Removing header row from the list of lists

headers = data[0]
data = data[1:]
# print("The header row: ", headers)
print (data[:5])


[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


## Counting no of gun deaths by year

In [7]:
# calculating the number of deaths in each year

years = [row[1] for row in data] # adding all the values under year column in data to the variable years
years[:10]


year_counts = {} # creating a dictionary to store the counts of deaths in each year

for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1

year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

## Exploring gun deaths by month and year

In [11]:
# creating datetime objects for each row to calculate the number of gun deaths by month and year 

import datetime
dates = [datetime.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in data]
dates[:5]


[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [12]:
date_counts = {} # creating a dictionary to store the counts of deaths in each month and year

for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1

date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

## Counting total no of gun deaths by sex

In [13]:
# Counting gun deaths by gender

genders = [row[5] for row in data] # adding all the values under sex column in data to the variable genders
genders[:10]


gender_counts = {} # creating a dictionary to store the counts of deaths in each sex

for gender in genders:
    if gender in gender_counts:
        gender_counts[gender] += 1
    else:
        gender_counts[gender] = 1

gender_counts 


{'F': 14449, 'M': 86349}

## Counting total no of gun deaths by race

In [14]:
# Counting gun deaths by race

races = [row[7] for row in data] # adding all the values under race column in data to the variable races
races[:10]


race_counts = {} # creating a dictionary to store the counts of deaths in each race

for race in races:
    if race in race_counts:
        race_counts[race] += 1
    else:
        race_counts[race] = 1

race_counts 

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

# Findings so far from the above basic data exploration:

1)  The number of gun deaths of men in the three years (2012-14) was  way more than that of women (Almost 6 times).

2) The race of the victims with highest number of gun deaths - White (66237), followed by Black (23296). The least deaths were of Native Americans/Native Alaskans (917). This maybe because the population of natives in very low and majority of them prefer to live away from the urban population. However, We need to find the proportion of each race in the US to get a a deeper and more accurate understanding of the statistic.

3) The number of gun deaths did not differ much from one year to the other. So, we decided to look into each month and year. We found that July was the only month to cross the 3000 deaths mark and it did so twice in two years (2012 and 2013).

Further data exploration of the same and new related data will provide more information and insights.



## Reading in a second dataset to better understand the findings

In [15]:
# Reading in the census data to better understand the gun deaths data by different race

file_object = open("census.csv","r")
census = list(csv.reader(file_object))
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

## Computing rates of gun deaths per race


In [16]:
# Finding rates of gun deaths per race

mapping = {
    "Asian/Pacific Islander": 15159516 + 674625,
    "Native American/Native Alaskan": 3739506,
    "Black": 40250635,
    "Hispanic": 44618105,
    "White": 197318956
}

race_per_hundredk = {}
for key in race_counts:
    race_per_hundredk[key] = (race_counts[key]/mapping[key]) * 100000
print (race_per_hundredk)


{'Native American/Native Alaskan': 24.521955573811088, 'Hispanic': 20.220491210910907, 'Black': 57.8773477735196, 'White': 33.56849303419181, 'Asian/Pacific Islander': 8.374309664161762}


## Filtering by Intent of the perpetrator of the crime


In [17]:
# Filtering by intent

intents = [row[3] for row in data]
homicide_race_counts = {}
for i,race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1
            
race_per_hundredk = {}
for k,v in homicide_race_counts.items():
    race_per_hundredk[k] = (v/mapping[k]) * 100000
print (race_per_hundredk)

        
    


{'Native American/Native Alaskan': 8.717729026240365, 'Hispanic': 12.627161104219914, 'Black': 48.471284987180944, 'White': 4.6356417981453335, 'Asian/Pacific Islander': 3.530346230970155}


# Findings

It appears that gun related homicides in the US are extremely high for people in the Black racial category, followed by Hispanic people.
Some areas to investigate further:

- Link between month and Homicide rate.
- Homicide rate by gender.
- The rates of other intents by gender and race.
- Gun death rates by location and education.