# US Gun Death Data
## 2012 to 2014
Data analysis and manipulation using basic Python
## Dataset : `guns.csv`

- identifier
- `year` 
- `month`
- `intent` : `Suicide`,`Accidental`,`NA`,`Homicide`,`Undetermined`
- `police` : `1` : a police officer was involved, `0` if not
- `sex` : `M` or `F`
- `age`
- `race` : `Asian/Pacific Islander`,`Native American/Native`,`Alaskan`,`Black`,`Hispanic`,`White`
- `hispanic` : a code indicating the Hispanic origin of the victim
- `place` : where the shooting occured
- `education` : `1`,`2`,`3`,`4`,`5`

In [2]:
import csv

f = open('guns.csv','r')
data = list(csv.reader(f))

data[:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

In [3]:
headers = data[0]
data = data[1:]

print(headers)
data[:5]

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'],
 ['5',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '31',
  'White',
  '100',
  'Other specified',
  '2']]

## Counting gun deaths by year

In [4]:
years = [el[1] for el in data]
year_counts = {}
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else: year_counts[year] = 1
        
print('Gun deaths by year : ')
year_counts

Gun deaths by year : 


{'2012': 33563, '2013': 33636, '2014': 33599}

In [5]:
import datetime

dates = [datetime.datetime(year=int(el[1]),month=int(el[2]),day=1) for el in data]
dates[:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

## Counting gun deaths by month and year

In [6]:
date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1

date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

## Gun Deaths By Race And Sex

In [7]:
sex_counts = {}
race_counts = {}
for el in data:
    sex = el[5]
    race = el[7]
    if sex in sex_counts:
        sex_counts[sex] +=1 
    else:
        sex_counts[sex] = 1
    if race in race_counts:
        race_counts[race] +=1 
    else:
        race_counts[race] = 1
        
print('Gun deaths by race :\n')
print(race_counts)
print('\nGun deaths by sex :\n')
sex_counts

Gun deaths by race :

{'Asian/Pacific Islander': 1326, 'White': 66237, 'Native American/Native Alaskan': 917, 'Black': 23296, 'Hispanic': 9022}

Gun deaths by sex :



{'F': 14449, 'M': 86349}

## Adding the proportion of each race in the US

file : `census.csv`

In [8]:
f2 = open('census.csv')
census = list(csv.reader(f2))

census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

## Gun Deaths rate per 100000 by race

In [9]:
#mapping between both datasets
mapping = {
    'Asian/Pacific Islander':15159516+674625,
    'Black':40250635,
    'Hispanic' : 44618105,
    'Native American/Native Alaskan' : 3739506,
    'White' : 197318956
}

race_per_hundredk = {}
for key,value in race_counts.items():
    race_per_hundredk[key] = round((value/mapping[key])*100000,1)
race_per_hundredk

{'Asian/Pacific Islander': 8.4,
 'Black': 57.9,
 'Hispanic': 20.2,
 'Native American/Native Alaskan': 24.5,
 'White': 33.6}

## Gun related murder rate per 100000

In [10]:
intents = [el[3] for el in data]
races = [el[7] for el in data]

homicide_rate_per_hundredk = {}

for i,race in enumerate(races):
    if intents[i] == 'Homicide':
        if race in homicide_rate_per_hundredk:
            homicide_rate_per_hundredk[race] += 1
        else:
            homicide_rate_per_hundredk[race] = 1
homicide_rate_per_hundredk

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

In [11]:
for key,value in homicide_rate_per_hundredk.items():
    homicide_rate_per_hundredk[key] = round((value/mapping[key])*100000,1)
homicide_rate_per_hundredk

{'Asian/Pacific Islander': 3.5,
 'Black': 48.5,
 'Hispanic': 12.6,
 'Native American/Native Alaskan': 8.7,
 'White': 4.6}

Blacks have way more chances to be murdered with a gun than any other race. 

# Next steps

- Gun related Suicide,Accidental,etc. rate per 100000 by race
- Homicide rate by gender
- Does gun death rates correlate to location, education 
- Gun related murders commited, filtered by race, to understand the previous numbers and rates

## Gun related Suicide rate per 100000 by race

In [46]:
suicideRatePerRace = {}

for i,race in enumerate(races):
    if intents[i] == 'Suicide':
        if race in suicideRatePerRace:
            suicideRatePerRace[race] += 1
        else:
            suicideRatePerRace[race] = 1
        
suicideRatePerRace

{'Asian/Pacific Islander': 745,
 'Black': 3332,
 'Hispanic': 3171,
 'Native American/Native Alaskan': 555,
 'White': 55372}

In [47]:
for key,value in suicideRatePerRace.items():
    suicideRatePerRace[key] = round((value/mapping[key])*100000,1)
suicideRatePerRace

{'Asian/Pacific Islander': 4.7,
 'Black': 8.3,
 'Hispanic': 7.1,
 'Native American/Native Alaskan': 14.8,
 'White': 28.1}

We can see that white people are more likely to commit suicide (with a gun) than any other race.

## Gun related Suicide rate by gender

In [27]:
genders = [el[5] for el in data]
suicideRatePerGender = {}
homicideRatePerGender = {}
for i,gender in enumerate(genders):
    if intents[i] == 'Suicide':
        if gender in suicideRatePerGender:
            suicideRatePerGender[gender] += 1
        else:
            suicideRatePerGender[gender] = 1
    elif intents[i] == 'Homicide':
        if gender in homicideRatePerGender:
            homicideRatePerGender[gender] += 1
        else:
            homicideRatePerGender[gender] = 1
print('Suicide rate per gender : ',suicideRatePerGender)
print('Homicide rate per gender : ',homicideRatePerGender)

Suicide rate per gender :  {'M': 54486, 'F': 8689}
Homicide rate per gender :  {'M': 29803, 'F': 5373}


# Correlation between location and gun death rates

In [30]:
locations = [el[-2] for el in data]
deathsPerLocation = {}

for el in locations:
    if el in deathsPerLocation:
        deathsPerLocation[el] += 1
    else:
        deathsPerLocation[el] = 1
        
deathsPerLocation

{'Farm': 470,
 'Home': 60486,
 'Industrial/construction': 248,
 'NA': 1384,
 'Other specified': 13751,
 'Other unspecified': 8867,
 'Residential institution': 203,
 'School/instiution': 671,
 'Sports': 128,
 'Street': 11151,
 'Trade/service area': 3439}

## Intent for each location : Correlation with location

In [36]:
def intentLocation(location,data):
    returnDict = {}
    for el in data:
        intent = el[3]
        if el[-2] == location:
            if intent in returnDict:
                returnDict[intent] +=1
            else:
                returnDict[intent] = 1
    return returnDict

locationsSet = set(locations)
for location in locationsSet:
    print('Intents in',location,':\n',intentLocation(location,data),'\n')
    

Intents in NA :
 {'Homicide': 1383, 'NA': 1} 

Intents in Sports :
 {'Suicide': 98, 'Homicide': 22, 'Undetermined': 3, 'Accidental': 5} 

Intents in School/instiution :
 {'Homicide': 361, 'Suicide': 295, 'Undetermined': 4, 'Accidental': 11} 

Intents in Home :
 {'Suicide': 45415, 'Undetermined': 535, 'Homicide': 13613, 'Accidental': 923} 

Intents in Other specified :
 {'Suicide': 8031, 'Accidental': 212, 'Homicide': 5429, 'Undetermined': 79} 

Intents in Trade/service area :
 {'Homicide': 1663, 'Suicide': 1723, 'Undetermined': 18, 'Accidental': 35} 

Intents in Other unspecified :
 {'Accidental': 350, 'Suicide': 4774, 'Homicide': 3604, 'Undetermined': 139} 

Intents in Street :
 {'Suicide': 2181, 'Homicide': 8867, 'Undetermined': 25, 'Accidental': 78} 

Intents in Industrial/construction :
 {'Homicide': 85, 'Suicide': 155, 'Accidental': 7, 'Undetermined': 1} 

Intents in Residential institution :
 {'Homicide': 73, 'Suicide': 125, 'Accidental': 5} 

Intents in Farm :
 {'Homicide': 76, 

We can see that the distribution of gun suicides vs. homicides at home is different than in the streets.
Also, streets and schools are the only locations where the homicide rate is higher than the suicide rate.

## Distribution of locations for each intent

In [50]:
def locationIntent(intent,data):
    returnDict = {}
    for el in data:
        location = el[-2]
        if el[3] == intent:
            if location in returnDict:
                returnDict[location] +=1
            else:
                returnDict[location] = 1
    return returnDict

intentsSet = set(intents)
for intent in intentsSet:
    print('Location distribution for',intent,':\n',locationIntent(intent,data),'\n')

Location distribution for NA :
 {'NA': 1} 

Location distribution for Suicide :
 {'Home': 45415, 'Street': 2181, 'Other specified': 8031, 'Other unspecified': 4774, 'Trade/service area': 1723, 'Sports': 98, 'School/instiution': 295, 'Farm': 378, 'Industrial/construction': 155, 'Residential institution': 125} 

Location distribution for Undetermined :
 {'Home': 535, 'Other specified': 79, 'Other unspecified': 139, 'Street': 25, 'Trade/service area': 18, 'School/instiution': 4, 'Farm': 3, 'Industrial/construction': 1, 'Sports': 3} 

Location distribution for Accidental :
 {'Other specified': 212, 'Home': 923, 'Other unspecified': 350, 'Street': 78, 'Industrial/construction': 7, 'Trade/service area': 35, 'School/instiution': 11, 'Farm': 13, 'Sports': 5, 'Residential institution': 5} 

Location distribution for Homicide :
 {'Home': 13613, 'Other specified': 5429, 'NA': 1383, 'Other unspecified': 3604, 'Street': 8867, 'Trade/service area': 1663, 'Farm': 76, 'Industrial/construction': 85, 'S

## Correlation with education

In [53]:
educationList = [el[-1] for el in data]

def intentEducation(education,data):
    returnDict = {}
    for el in data:
        intent = el[3]
        if el[-1] == education:
            if intent in returnDict:
                returnDict[intent] +=1
            else:
                returnDict[intent] = 1
    return returnDict

educationSet = set(educationList)
mapEducation = {
    1:'Less than High School',
    2:'Graduated from High School or equivalent',
    3:'Some College',
    4:'At least graduated from College',
    5:'Not available'
}
for education in educationSet:
    if education != 'NA':
        print(mapEducation[int(education)],':\n',intentEducation(education,data),'\n')
    else:
        print(education,':\n',intentEducation(education,data),'\n')

NA :
 {'Homicide': 34, 'Accidental': 13, 'Suicide': 6} 

Graduated from High School or equivalent :
 {'Suicide': 26321, 'Undetermined': 324, 'Homicide': 15649, 'Accidental': 633} 

Less than High School :
 {'Suicide': 9292, 'Homicide': 11839, 'Undetermined': 200, 'Accidental': 492} 

Some College :
 {'Suicide': 15533, 'Accidental': 327, 'Homicide': 5640, 'Undetermined': 180} 

Not available :
 {'Suicide': 876, 'Homicide': 455, 'Accidental': 28, 'Undetermined': 10} 

At least graduated from College :
 {'Suicide': 11147, 'Undetermined': 93, 'Homicide': 1559, 'Accidental': 146, 'NA': 1} 



- The total amount of people for each education would help put those numbers into perspective. For now we can just compare the different intents for each education, or the evolution of the distribution of intents with education.
- For people whom gratuated from college, `suicides` are 7 times more 'likely to happend' than `homicides`. The more you have education, the more the ratio `suicides` vs. `homicides` is in favor of `suicide`.