### 1. Reading in a first dataset.

In [1]:
import csv
f = open('guns.csv')
data = list(csv.reader(f))

In [2]:
data[:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

### 2. Extracting header row from data, displaying header and the first 5 rows of data to verify that header is not in data anymore.

In [3]:
headers = data[:1]
data = data[1:]
print(headers)
print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']]
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


### 3. Counting gun deaths by year.

In [4]:
years = [each[1] for each in data]
year_counts = {}
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

### Findings:
Difference between the total number of gun deaths per each year is insignificant and the results are insufficient for a full analysis.

### 4. Exploring gun deaths by month and year.

In [19]:
import datetime
dates = [datetime.datetime(year=int(each[1]), month=int(each[2]), day=1) for each in data]
date_counts = {}
for date in dates:
    if date in sorted(date_counts):
        date_counts[date] += 1
    else:
        date_counts[date] = 1
        
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

### 5.1. Exploring gun deaths by sex.

In [6]:
genders = [each[5] for each in data]
gender_counts = {}
for gender in genders:
    if gender in gender_counts:
        gender_counts[gender] += 1
    else:
        gender_counts[gender] = 1

gender_counts

{'F': 14449, 'M': 86349}

### Findings:

Gun deaths are much more common among men (86% of total quality). Women represent 14% of the total quality.

### 5.2 Exploring gun deaths by race.

In [7]:
races = [each[7] for each in data]
race_counts = {}
for race in races:
    if race in race_counts:
        race_counts[race] += 1
    else:
        race_counts[race] = 1
        
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

### Findings:

According to the сalculation above, people of white race had the highest number of gun death. Considering the fact that people of White race are the majority of the US population, I will continue my analysis.

### 6. Reading in a second dataset. 

In [8]:
import csv
with open('census.csv') as f:
    reader = csv.reader(f)
    census = list(reader)

In [9]:
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

### 7. Computing rates of gun deaths per race.

In [10]:
mapping = {
    "Asian/Pacific Islander": 15159516 + 674625,
    "Native American/Native Alaskan": 3739506,
    "Black": 40250635,
    "Hispanic": 44618105,
    "White": 197318956 
}
race_per_hundredk = {}
for k,v in race_counts.items():
    race_per_hundredk[k] = (v/mapping[k])*100000
    
    
race_per_hundredk
    
    

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

### Findings:
The calculation above provides a deeper analysis of gun death by race and here we took into account the total population of the US. The results show us that the the highest rate of gun death belong to people of Black race (58 per 100000). White people have the second highest result (34 per 100000).

### 8. Filtering by intent.

In [11]:
intents = [each[3] for each in data]
races = [each[7] for each in data]
homicide_race_counts = {}

for i, race in enumerate(races):
        if race not in homicide_race_counts:
            homicide_race_counts[race] = 0
        if intents[i] == 'Homicide':
            homicide_race_counts[race] += 1

race_per_hundredk = {}
for k,v in homicide_race_counts.items():
    race_per_hundredk[k] = (v/mapping[k])*100000

race_per_hundredk


{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

### Findings:

Gun related homicides disproportionally affect people of Black and Hispanic races (1st and 2nd places respectively)

### 8. Finding a link between month and homicide rate

In [12]:
months = [each[2] for each in data]
homicide_rate_per_month = {}

for i, month in enumerate(months):
    if month not in homicide_rate_per_month:
        homicide_rate_per_month[month] = 0
    if intents[i] == 'Homicide':
        homicide_rate_per_month[month] += 1

total_population = sum(mapping[k] for k in mapping)
for k in homicide_rate_per_month:
    homicide_rate_per_month[k] = (homicide_rate_per_month[k]/total_population)*100000

homicide_rate_per_month

{'01': 0.9374958276216315,
 '02': 0.721762429324819,
 '03': 0.9212578299003661,
 '04': 0.9427980309591875,
 '05': 0.9862098207854278,
 '06': 1.037243527909405,
 '07': 1.083306419404423,
 '08': 1.0355865893664187,
 '09': 0.9828959436994553,
 '10': 0.9835587191166497,
 '11': 0.9673207213953843,
 '12': 1.0574581781338372}

### Findings:

Gun deaths are the more frequent during summer months and at the end of the year. The lowest rate is in February. 

### 10. Exploring:
#### - homicide rate by gender
#### - accidental rate by gender
#### - accidental rate by race

In [20]:
def calc_rate_intent(column_num, intent):
    column_lst = [each[column_num] for each in data]
    rate_dict = {}
    for i, item in enumerate(column_lst):
        if intents[i] == intent:
            if item in rate_dict:
                rate_dict[item] += 1
            else:
                rate_dict[item] = 1
    
    for key in rate_dict:
        rate_dict[key] = (rate_dict[key]/total_population)*1000000
    return rate_dict

homicide_rate_gender = calc_rate_intent(5,'Homicide')
accidental_rate_gender = calc_rate_intent(5, 'Accidental')
accidental_rate_race = calc_rate_intent(7, 'Accidental')

homicide_rate_gender

{'F': 17.805461582930455, 'M': 98.7634787932396}

In [21]:
accidental_rate_gender

{'F': 0.7224252047420135, 'M': 4.7090193391669795}

In [22]:
accidental_rate_race

{'Asian/Pacific Islander': 0.039766525031670476,
 'Black': 1.086951684198993,
 'Hispanic': 0.48051217746601826,
 'Native American/Native Alaskan': 0.07290529589139588,
 'White': 3.751308861320915}

### Findings:
1. Gun deaths are much more frequent among men despite the difference in intent (homicide or accidental).
2. Accidental gun death are more frequent among people of White race. People of Black race have the second greatest result.

### 11. Exploring the correlation of:
#### - gun death rate and location
#### - gun death rate and education

In [16]:
def calc_rate(column_num):
    column_lst = [each[column_num] for each in data]
    rate_dict = {}
    for i, item in enumerate(column_lst):
            if item in rate_dict:
                rate_dict[item] += 1
            else:
                rate_dict[item] = 1
    
    for key in sorted(rate_dict):
        rate_dict[key] = (rate_dict[key]/total_population)*100000
    return rate_dict

gun_death_location = calc_rate(9)
gun_death_education = calc_rate(10)

In [17]:
gun_death_location

{'Farm': 0.15575222304070938,
 'Home': 20.044316942213502,
 'Industrial/construction': 0.08218415173211897,
 'NA': 0.45864058869859947,
 'Other specified': 4.556912380920839,
 'Other unspecified': 2.938414812131851,
 'Residential institution': 0.06727170484524256,
 'School/instiution': 0.2223611524687574,
 'Sports': 0.042417626700448505,
 'Street': 3.695304338567979,
 'Trade/service area': 1.1396423298659564}

### Findings:
Gun deaths are the most frequent at homes (20 cases per 100000 people)

In [18]:
gun_death_education

{'1': 7.231873964717873,
 '2': 14.225480166954322,
 '3': 7.184485522388465,
 '4': 4.29014527550005,
 '5': 0.4536697730696407,
 'NA': 0.017563548555654458}

#### Findings:

Gun deaths are the most frequent among people with High School education.