The dataset is stored in the guns.csv file. It contains information on gun deaths in the US from 2012 to 2014. Each row in the dataset represents a single fatality. The columns contain demographic and other information about the victim.

We are going to explore this data by showing the deaths rate by race and the Homicide( deaths by homicide) rate.

Here are some potential next steps: 

   Explore the homicide rate by gender.
   
   Explore the rates of other intents, like Accidental, by gender and race.
   
 

In [1]:
import csv
import datetime
import re


In [2]:
with open("guns.csv","r") as f :
    csvreader = csv.reader(f)
    data = list(csvreader)

print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


In [3]:
# header
header = data[0]
# remove first row 
data = data[1:]

print(data[:5])

[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


In [4]:
# years column 
years = []
for row in data:
    years.append(row[1])
    
# gun deaths by year
year_counts = {}
for year in years:
    if year in year_counts.keys():
        year_counts[year] += 1
    else :
        year_counts[year] = 1
        
print(year_counts)

{'2012': 33563, '2013': 33636, '2014': 33599}


In [5]:
# dates
dates = []
for row in data:
    dates.append(datetime.datetime(year=int(row[1]),month=int(row[2]),day=1))
    
print(dates[:5])
print()

# gun deaths by month and year
    
date_counts = {}

for date in dates:
    if date in date_counts.keys():
        date_counts[date] += 1
    else :
        date_counts[date] = 1
        
print(date_counts)
    
    

[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0)]

{datetime.datetime(2012, 1, 1, 0, 0): 2758, datetime.datetime(2012, 2, 1, 0, 0): 2357, datetime.datetime(2012, 3, 1, 0, 0): 2743, datetime.datetime(2012, 4, 1, 0, 0): 2795, datetime.datetime(2012, 5, 1, 0, 0): 2999, datetime.datetime(2012, 6, 1, 0, 0): 2826, datetime.datetime(2012, 7, 1, 0, 0): 3026, datetime.datetime(2012, 8, 1, 0, 0): 2954, datetime.datetime(2012, 9, 1, 0, 0): 2852, datetime.datetime(2012, 10, 1, 0, 0): 2733, datetime.datetime(2012, 11, 1, 0, 0): 2729, datetime.datetime(2012, 12, 1, 0, 0): 2791, datetime.datetime(2013, 1, 1, 0, 0): 2864, datetime.datetime(2013, 2, 1, 0, 0): 2375, datetime.datetime(2013, 3, 1, 0, 0): 2862, datetime.datetime(2013, 4, 1, 0, 0): 2798, datetime.datetime(2013, 5, 1, 0, 0): 2806, datetime.datetime(2013, 6, 1, 0, 0): 2920, datetime.datetime(2013, 7, 1, 0, 0)

In [6]:
# Sex and race count
sex_counts ={}  # index 5
race_counts ={} # index 7

for row in data :
    if row[5] in sex_counts.keys():
        sex_counts[row[5]] += 1
    else:
        sex_counts[row[5]] = 1
        
    if row[7] in race_counts.keys():
        race_counts[row[7]] += 1
    else:
        race_counts[row[7]] = 1
        


print(sex_counts)
print()
print(race_counts)

{'M': 86349, 'F': 14449}

{'Asian/Pacific Islander': 1326, 'White': 66237, 'Native American/Native Alaskan': 917, 'Black': 23296, 'Hispanic': 9022}


### . The rate of guns deaths by year is almost the same
### . The rate of guns deaths of Men is five times of Women
### .  Black people are most killed by guns 


In [7]:
# Census file
with open("census.csv","r") as f :
    csvreader = csv.reader(f)
    census = list(csvreader)

census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

In [8]:
# mapping between race_counts and census
mapping = {
    'Asian/Pacific Islander': 15159516+674625,
    'Black': 40250635,
    'Hispanic': 44618105,
    'Native American/Native Alaskan': 3739506,
    'White': 197318956
}
mapping

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}

In [9]:
race_counts

{'Asian/Pacific Islander': 1326,
 'White': 66237,
 'Native American/Native Alaskan': 917,
 'Black': 23296,
 'Hispanic': 9022}

In [10]:
race_per_hundredk = {}

for k,race_count in race_counts.items():
    race_per_hundredk[k] = (race_count/mapping[k])*100000
    
print("Deaths rate by race")
race_per_hundredk

Deaths rate by race


{'Asian/Pacific Islander': 8.374309664161762,
 'White': 33.56849303419181,
 'Native American/Native Alaskan': 24.521955573811088,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907}

In [11]:
intents = []
races = []

for row in data :
    intents.append(row[3])
    races.append(row[7])
    
homicide_race_counts = {}
for i,race in enumerate(races):
    if intents[i] == "Homicide" and race in homicide_race_counts.keys():
        homicide_race_counts[race] += 1
    elif intents[i] == "Homicide" and race not in homicide_race_counts.keys():
        homicide_race_counts[race] = 1
        
homicide_race_counts
    

{'White': 9147,
 'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Native American/Native Alaskan': 326,
 'Hispanic': 5634}

In [12]:
homicide_race_per_hundredk ={}
for k,homicide_count in homicide_race_counts.items():
    homicide_race_per_hundredk[k] = (homicide_count/mapping[k])*100000
    
print("Homicide rate by race")
homicide_race_per_hundredk

Homicide rate by race


{'White': 4.6356417981453335,
 'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Native American/Native Alaskan': 8.717729026240365,
 'Hispanic': 12.627161104219914}