# Gun Deaths Exploration using pandas

## The guns.csv file contains information on gun deaths in the US from 2012 to 2014. Each row in the dataset represents a single fatality. The columns contain demographic and other information about the victim.

First load the file using the .csv module and simply explore the .csv file

In [1]:
import csv
raw_file = open('guns.csv')
data = list(csv.reader(raw_file))
#Display the first 5 rows of data to verify everything
print(data[0:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


In [2]:
#Extract the first row of data, and assign it to the variable headers
headers = data[0]
#Remove the first row from data
data = data[1:]
print(headers)
print(data[0:5])

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


# Use a list comprehension to extract the year column from data
print(years) is going to lead us down the rabbit hole of a never-ending output statements. So let's skip it for now

In [3]:
years = [year[1] for year in data]
# print(years)

# Next let's explore how many gun deaths occur in each year. 
Remember the gun_deaths.csv file that we loaded contains only the records for the years 2012, 2013, 2014

In [4]:
years_count = {}
for year in years:
    if year in years_count:
        years_count[year] += 1
    else:
        years_count[year] = 1
print(years_count)

{'2012': 33563, '2013': 33636, '2014': 33599}


## Next, it's time to convert the 'years' in each row of the .csv file into datetime.datetime format


In [5]:
#create a datetime.datetime object for each row
import datetime
dates = [(datetime.datetime(year=int(row[1]), 
                                month=int(row[2]),
                               day=1)) for row in data]   
# Calculate each unique date occuring in dates variable
date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1


## Let's try to calculate the no. of times each item in the sex column occurs

In [6]:
sex = [item[5] for item in data]
sex_counts = {}
for element in sex:
    if element in sex_counts:
        sex_counts[element] += 1
    else:
        sex_counts[element] = 1

print(sex_counts)

{'M': 86349, 'F': 14449}


## Calculating the No. of times each item in the race column occurs


In [7]:
race = [item[7] for item in data]
race_counts = {}
for element in race:
    if element in race_counts:
        race_counts[element] += 1
    else:
        race_counts[element] = 1

print(race_counts)

{'Asian/Pacific Islander': 1326, 'White': 66237, 'Native American/Native Alaskan': 917, 'Black': 23296, 'Hispanic': 9022}


## It would be more insightful to split the Male&Female gun deaths by race. Not surprisingly (since the report is about gun deaths in the U.S., white people have reported more deaths)

In [8]:
# Census.csv data contains information 
# on the total population of the US, 
# as well as the total population of each racial group in the US

raw_census = open('census.csv')
census = list(csv.reader(raw_census))
print(census[1][14])

15159516


In [9]:
# Manually create a dictionary, mapping that maps each key
# from race_counts to the population count of the race from census
mapping = {}

mapping['Asian/Pacific Islander'] = int(census[1][14] + census[1][15])
mapping['Black'] = int(census[1][12])
mapping['Native American/Native Alaskan'] = int(census[1][13])
mapping['Hispanic'] = int(census[1][11])
mapping['White'] = int(census[1][10])


In [10]:
# Convert each race's population into a more readable format
race_per_hundredk = {}
for key in race_counts:
    race_per_hundredk[key] = (race_counts[key] / mapping[key]) * 100000
print(race_per_hundredk)

{'Asian/Pacific Islander': 8.746980714890115e-06, 'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.8773477735196, 'Hispanic': 20.220491210910907}


In [11]:
intents = [item[3] for item in data]
races = [item[7] for item in data]
homicide_race_counts = {}

for i, race in enumerate(races):
    if intents[i] == 'Homicide':
        if race not in homicide_race_counts:
            homicide_race_counts[race] = 0
        else:
            homicide_race_counts[race] += 1

race_per_hundredk = {}
for key in race_counts:
    race_per_hundredk[key] = (race_counts[key] / mapping[key]) * 100000
print(race_per_hundredk)

{'Asian/Pacific Islander': 8.746980714890115e-06, 'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.8773477735196, 'Hispanic': 20.220491210910907}
