In [1]:
import csv 
import datetime

with open('guns.csv','r') as csvfile:
    read = csv.reader(csvfile)
    data = list(read)

In [2]:
header = data[:1]
data = data[1:]


### Next: I want to get some unique data. I want the years in the data. Which should be 2012, 2013, 2014 and how many deaths per year.

In [3]:
years = [row[1] for row in data]
year_counts = {}
for i in years:
    if i not in year_counts:
        year_counts[i] = 1
    year_counts[i] += 1
year_counts

{'2012': 33564, '2013': 33637, '2014': 33600}

### Next: I want to break it down even more. I want to see deaths per month. Since day is not given in the original data we are just assigning a 1 to the day.

In [4]:
dates = [datetime.datetime(year=int(row[1]),month=int(row[2]),day=1) for row in data]
date_counts = {}
for i in dates:
    if i not in date_counts:
        date_counts[i] = 1
    date_counts[i] += 1
# date_counts

### Next: I want to see how many deaths: Male vs. Female. Also breaking it down to race specific categories and how many deaths in each.

In [5]:
sex = [row[5] for row in data]
race = [row[7] for row in data]

sex_counts = {}
race_counts = {}

for i in sex:
    if i not in sex_counts:
        sex_counts[i] = 1
    sex_counts[i] += 1

for i in race:
    if i not in race_counts:
        race_counts[i] = 1
    race_counts[i] += 1
    
sex_counts, race_counts


({'M': 86350, 'F': 14450},
 {'Asian/Pacific Islander': 1327,
  'White': 66238,
  'Native American/Native Alaskan': 918,
  'Black': 23297,
  'Hispanic': 9023})

## Next: This entire next section is comparing the data from gun deaths to the total population of each race in the U.S. The goal is to find the percent of death per 100,000 in the U.S. relative to the specific category of race.

In [6]:
with open('census.csv','r') as csvfile:
    read = csv.reader(csvfile)
    census = list(read)
    header = census[:1]
    census = census[1:]
    

### Next: Created a custom dict because the verbage of both csv files, in relation to the race, are different.

In [7]:
mapping = {
    'Asian/Pacific Islander': 15159516 + 674625,
    'Black': 40250635,
    'Native American/Native Alaskan': 3739506,
    'Hispanic': 44618105,
    'White': 197318956,
}

race_per_hundredk = {}

for key, value in race_counts.items():
    race_per_hundredk[key] = '{}%'.format(round((value / mapping[key]) * 100000))
    

### Next: I wanted to extract the data that specifically addressed homicide victims. Then try to find the unique data. > How many homicide victims per race category.

In [8]:
homicide_data = []
for row in data:
    if row[3] == "Homicide":
        homicide_data.append(row)

homicide_victim_by_race = {}

for row in homicide_data:
    if row[7] not in homicide_victim_by_race:
        homicide_victim_by_race[row[7]] = 1
    homicide_victim_by_race[row[7]] += 1
homicide_victim_by_race

{'White': 9148,
 'Asian/Pacific Islander': 560,
 'Black': 19511,
 'Native American/Native Alaskan': 327,
 'Hispanic': 5635}

### Next: I wanted to find the percent per 100,000, per race category, of homicide victims.

In [9]:
race_per_hundredtk = {}
for key, value in homicide_victim_by_race.items():
    race_per_hundredtk[key] = '{}%'.format(round((value / mapping[key]) * 100000))
race_per_hundredtk

{'White': '5%',
 'Asian/Pacific Islander': '4%',
 'Black': '48%',
 'Native American/Native Alaskan': '9%',
 'Hispanic': '13%'}

### Next: I just wanted to find the % of the population each race occupies in the U.S.

In [10]:
percent_pop_by_race = {}
for key, value in race_counts.items():
    percent_pop_by_race[key] = '{}%'.format(round((mapping[key] / 316200000) * 100))
percent_pop_by_race

{'Asian/Pacific Islander': '5%',
 'White': '62%',
 'Native American/Native Alaskan': '1%',
 'Black': '13%',
 'Hispanic': '14%'}

### Next: this data is seeing all deaths related to guns per race category. Not homicide specific. (per 100,000)

In [11]:
race_per_hundredk = {}

for key, value in race_counts.items():
    race_per_hundredk[key] = '{}%'.format(round((value / mapping[key]) * 100000))

race_per_hundredk

{'Asian/Pacific Islander': '8%',
 'White': '34%',
 'Native American/Native Alaskan': '25%',
 'Black': '58%',
 'Hispanic': '20%'}

### Next: This was the way the homicide extraction was done, but I like my way from previous. I could just be noob.

In [12]:
intents = [row[3] for row in data]
races = [row[7] for row in data]

homicide_race_counts = {}
for i,race in enumerate(races):
    if race not in homicide_race_counts:
        homicide_race_counts[race] = 0
    if intents[i] == "Homicide":
        homicide_race_counts[race] += 1

race_per_hundred_thou = {}
for k,v in homicide_race_counts.items():
    race_per_hundred_thou[k] = '{}%'.format(round((v / mapping[k]) * 100000))

race_per_hundred_thou


{'Asian/Pacific Islander': '4%',
 'White': '5%',
 'Native American/Native Alaskan': '9%',
 'Black': '48%',
 'Hispanic': '13%'}

### Next: I wanted to find the difference in suicides between males and females.

In [21]:
death_by_suicide = []
for row in data:
    if row[3] == "Suicide":
        death_by_suicide.append(row)

male_v_female_suicide = {}
for row in death_by_suicide:
    if row[5] not in male_v_female_suicide:
        male_v_female_suicide[row[5]] = 1
    male_v_female_suicide[row[5]] += 1

male_v_female_suicide
    


{'M': 54487, 'F': 8690}

In [22]:
total_suicides = len(death_by_suicide)
total_suicides

63175

In [23]:
percent_per_gender = {}

for key, value in male_v_female_suicide.items():
    percent_per_gender[key] = '{}%'.format(round((value / total_suicides) * 100))

percent_per_gender


{'M': '86%', 'F': '14%'}

### As you can see... Males dominate this category with 86%, whereas Females are 14% of the total commited in this time frame.