In [2]:
import csv
import datetime

###### function to read the csv files and make lists from them

In [3]:
def csv_reader(file_name):
    with open(file_name, "r") as file:
        data = csv.reader(file)
        data = list(data)
    return data

###### the main lists 

In [4]:
data = csv_reader('guns.csv')
census = csv_reader('census.csv')

###### extract the header from the main data

In [5]:
headers = data[0]
data = data[1:]

In [6]:
print(headers)

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


###### headers explanation:
* '' -- this is an identifier column, which contains the row number. It's common in CSV files to include a unique identifier for each row, but we can ignore it in this analysis.
* year -- the year in which the fatality occurred.
* month -- the month in which the fatality occurred.
* intent -- the intent of the perpetrator of the crime. This can be Suicide, Accidental, NA, Homicide, or Undetermined.
* police -- whether a police officer was involved with the shooting. Either 0 (false) or 1 (true).
* sex -- the gender of the victim. Either M or F.
* age -- the age of the victim.
* race -- the race of the victim. 
  - Either Asian/Pacific Islander, Native American/Native Alaskan, Black, Hispanic, or White.
* hispanic -- a code indicating the Hispanic origin of the victim.
* place -- where the shooting occurred. Has several categories, which you're encouraged to explore on your own
* education -- educational status of the victim. Can be one of the following:
  1. -- Less than High School
  2. -- Graduated from High School or equivalent
  3. -- Some College
  4. -- At least graduated from College
  5. -- Not available


###### get the datetime format from the provided year and month columns

In [7]:
dates = []
for row in data:
    date = datetime.datetime(year = int(row[1]), month= int(row[2]), day = 1)
    dates.append(date)
#print (dates[0])

###### make the date_counts dict to count the number of shootings in each month in each year 

In [8]:
date_counts = {}
for date in dates:
    if date.strftime('%Y,%m') in date_counts:
        date_counts[date.strftime('%Y,%m')] += 1
    else:
        date_counts[date.strftime('%Y,%m')] = 1


# Number of shootings in US for each month from 2012 to 2014

In [9]:
print (date_counts)

{'2012,01': 2758, '2012,02': 2357, '2012,03': 2743, '2012,04': 2795, '2012,05': 2999, '2012,06': 2826, '2012,07': 3026, '2012,08': 2954, '2012,09': 2852, '2012,10': 2733, '2012,11': 2729, '2012,12': 2791, '2013,01': 2864, '2013,02': 2375, '2013,03': 2862, '2013,04': 2798, '2013,05': 2806, '2013,06': 2920, '2013,07': 3079, '2013,08': 2859, '2013,09': 2742, '2013,10': 2808, '2013,11': 2758, '2013,12': 2765, '2014,01': 2651, '2014,02': 2361, '2014,03': 2684, '2014,04': 2862, '2014,05': 2864, '2014,06': 2931, '2014,07': 2884, '2014,08': 2970, '2014,09': 2914, '2014,10': 2865, '2014,11': 2756, '2014,12': 2857}


###### make the sex_counts dict to count the number of shootings for each sex 

In [10]:
sex_counts = {}
for row in data:
    if row[5] in sex_counts:
        sex_counts[row[5]] += 1
    else:
        sex_counts[row[5]] = 1

# Number of shootings in US for each sex from 2012 to 2014

In [11]:
print(sex_counts)

{'M': 86349, 'F': 14449}


###### make the race_counts dict to count the number of shootings for each race 

In [12]:
race_counts = {}
for row in data:
    if row[7] in race_counts:
        race_counts[row[7]] += 1
    else:
        race_counts[row[7]] = 1

###### make the census_dict to have the total number of each race in US
###### make the mapping dict from census file based on the names in data file


In [13]:
census_dict = {}
for idx,item in enumerate(census[0]):
    census_dict[item]= census[1][idx]
print(census_dict)

{'Id': '0100000US', 'Year': 'April 1, 2010 Census', 'Sex': 'Both Sexes', 'Hispanic Origin': 'Total', 'Id2': '', 'Geography': 'United States', 'Total': '308745538', 'Race Alone - White': '197318956', 'Race Alone - Hispanic': '44618105', 'Race Alone - Black or African American': '40250635', 'Race Alone - American Indian and Alaska Native': '3739506', 'Race Alone - Asian': '15159516', 'Race Alone - Native Hawaiian and Other Pacific Islander': '674625', 'Two or More Races': '6984195'}


In [14]:
mapping = {'Asian/Pacific Islander':(int(census_dict['Race Alone - Asian'])+int(census_dict['Race Alone - Native Hawaiian and Other Pacific Islander'])),'Black': census_dict['Race Alone - Black or African American'],'Native American/Native Alaskan': census_dict['Race Alone - American Indian and Alaska Native'],'Hispanic':census_dict['Race Alone - Hispanic'], 'White':census_dict['Race Alone - White']}

###### make the race_per_hundredk dict to count the number of shootings for each race per 10000 poeple


In [15]:
race_per_hundredk = {}
for key in race_counts.keys():
    race_per_hundredk[key] = (int(race_counts[key])/int(mapping[key]))*100000


# Number of shootings in US for each race per 10000 people from 2012 to 2014

In [16]:
print(race_per_hundredk)

{'Asian/Pacific Islander': 8.374309664161762, 'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.8773477735196, 'Hispanic': 20.220491210910907}


###### make the education_counts and education_mapping dict to count the number of shootings based on education

In [26]:
education_counts = {}
for row in data:
    if row[10] in education_counts:
        education_counts[row[10]] += 1
    else:
        education_counts[row[10]] = 1
eduaction_mapping = {'Less than High School':education_counts['1'], 'Graduated from High School or equivalent':education_counts['2'],\
'Some College':education_counts['3'], 'At least graduated from College':education_counts['4'],\
'Not available':(education_counts['5']+education_counts['NA'])}

# Number of shootings in US based on education from 2012 to 2014

In [30]:
print (eduaction_mapping)

{'Less than High School': 21823, 'Graduated from High School or equivalent': 42927, 'Some College': 21680, 'At least graduated from College': 12946, 'Not available': 1422}
