In [1]:
import csv

Import CSV to data variable

In [2]:
with open("guns.csv", "r") as f:
    data = list(csv.reader(f))

Dump data header and first row

In [29]:
data[:2]

[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3']]

In [4]:
headers = data[:1] # Create headers

In [67]:
headers

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education']]

In [5]:
data = data[1:] # Remove headers from data

In [30]:
data[:1] # Check if headers were removed

[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4']]

In [82]:
column_year = 1
column_month = 2
column_intent = 3
column_sex = 5
column_race = 7

In [83]:
years = [y[column_year] for y in data]

In [84]:
def cnt_unique_items_in_list(list_with_items):
    dic = {}
    for item in list_with_items:
        if item not in dic:
            dic[item] = 1
        else:
            dic[item] +=1
    return dic


In [85]:
year_counts = cnt_unique_items_in_list(years)

In [86]:
import datetime
dates =  [datetime.datetime(month=int(r[column_month]), year=int(r[column_year]), day=1)  for r in data]

In [87]:
date_counts = cnt_unique_items_in_list(dates)

In [88]:
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

In [89]:
sex_counts = cnt_unique_items_in_list([item[column_sex] for item in data])

In [90]:
sex_counts


{'F': 14449, 'M': 86349}

In [91]:
race_counts = cnt_unique_items_in_list([item[column_race] for item in data])

In [92]:
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

In [93]:
with open("census.csv", "r") as census_file:
    census = list(csv.reader(census_file))

In [94]:
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

In [95]:
# Total number of residents based on the race
mapping = {
    "Asian/Pacific Islander": (674625 + 15159516) ,
    "Black": 40250635,
    "Native American/Native Alaskan": 3739506,
    "Hispanic": 44618105,
    "White": 197318956
    
}

In [96]:
race_per_hundredk = {}

In [97]:
for key, value in mapping.items():
    print(key)
    print(value)
    race_per_hundredk[key] = race_counts[key]/value * 100000

Asian/Pacific Islander
15834141
Black
40250635
Native American/Native Alaskan
3739506
Hispanic
44618105
White
197318956


In [98]:
race_per_hundredk # e.g. ~33 white persons die because of guns :(

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

In [72]:
intents = [ r[column_intent] for r in data]

In [73]:
races = [ r[column_race] for r in data]


In [99]:
homicide_race_counts = {}

In [100]:
for i, race in enumerate(races):
    if intents[i] in "Homicide":
        if race not in homicide_race_counts:
            homicide_race_counts[race] = 0
        else:
            homicide_race_counts[race] += 1
         

In [102]:
homicide_race_counts   

{'Asian/Pacific Islander': 558,
 'Black': 19509,
 'Hispanic': 5633,
 'Native American/Native Alaskan': 325,
 'White': 9146}

In [103]:
homicide_race_counts_per_hundredk = {}
for key, value in mapping.items():
    print(key)
    print(value)
    homicide_race_counts_per_hundredk[key] = homicide_race_counts[key]/value * 100000

Asian/Pacific Islander
15834141
Black
40250635
Native American/Native Alaskan
3739506
Hispanic
44618105
White
197318956


In [104]:
homicide_race_counts_per_hundredk 

{'Asian/Pacific Islander': 3.5240307636517825,
 'Black': 48.468800554326656,
 'Hispanic': 12.624919861567406,
 'Native American/Native Alaskan': 8.690987526159873,
 'White': 4.635135004464548}