In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
from datetime import datetime

In [2]:
#Load the raw data
mass_2014 = pd.read_csv("Original_Mass_Shooting_Data/2014.csv")
mass_2015 = pd.read_csv("Original_Mass_Shooting_Data/2015.csv")
mass_2016 = pd.read_csv("Original_Mass_Shooting_Data/2016.csv")
mass_2017 = pd.read_csv("Original_Mass_Shooting_Data/2017.csv")

In [3]:
#add column for total victims from killed and injured
mass_2014['Total_Victims'] = mass_2014['# Killed'] + mass_2014['# Injured']
mass_2015['Total_Victims'] = mass_2015['# Killed'] + mass_2015['# Injured']
mass_2016['Total_Victims'] = mass_2016['# Killed'] + mass_2016['# Injured']
mass_2017['Total_Victims'] = mass_2017['# Killed'] + mass_2017['# Injured']

In [4]:
#totals 2014
total2014 = mass_2014['Total_Victims'].sum()
total_killed_2014 = mass_2014['# Killed'].sum()
total_injured_2014 = mass_2014['# Injured'].sum()

In [5]:
#totals 2015
total2015 = mass_2015['Total_Victims'].sum()
total_killed_2015 = mass_2015['# Killed'].sum()
total_injured_2015 = mass_2015['# Injured'].sum()

In [6]:
#totals 2016
total2016 = mass_2016['Total_Victims'].sum()
total_killed_2016 = mass_2016['# Killed'].sum()
total_injured_2016 = mass_2016['# Injured'].sum()

In [7]:
#totals 2017
total2017 = mass_2017['Total_Victims'].sum()
total_killed_2017 = mass_2017['# Killed'].sum()
total_injured_2017 = mass_2017['# Injured'].sum()

In [8]:
#combine all totals for visualizations
totals = {'Year': ['2014', '2015','2016','2017'], 'Total Injured or Killed': [total2014, total2015, total2016, total2017], 
          'Total Killed': [total_killed_2014, total_killed_2015, total_killed_2016, total_killed_2017],
          'Total Injured': [total_injured_2014, total_injured_2015, total_injured_2016, total_injured_2017]}

In [9]:
#Review dataframe
totals_df = pd.DataFrame(data = totals)
totals_df

Unnamed: 0,Year,Total Injured or Killed,Total Killed,Total Injured
0,2014,1340,262,1078
1,2015,1705,368,1337
2,2016,1989,451,1538
3,2017,2240,437,1803


In [10]:
#export as excel for visualization
totals_df.to_csv("totals_df.csv", header=True)

# Total incident count

In [11]:
#groupby State & convert to df for later merge
mass_2014_states = mass_2014.groupby(['State']).count()['Incident Date']
mass_2014_states = pd.DataFrame(data = mass_2014_states)
mass_2015_states = mass_2015.groupby(['State']).count()['Incident Date']
mass_2015_states = pd.DataFrame(data = mass_2015_states)
mass_2016_states = mass_2016.groupby(['State']).count()['Incident Date']
mass_2016_states = pd.DataFrame(data = mass_2016_states)
mass_2017_states = mass_2017.groupby(['State']).count()['Incident Date']
mass_2017_states = pd.DataFrame(data = mass_2017_states)


In [12]:
#Merge all years and rename columns
state_count_1 = pd.merge(mass_2014_states, mass_2015_states, how = 'outer', on = 'State')
state_count_2 = pd.merge(mass_2016_states,mass_2017_states, how = 'outer', on = 'State')
state_count_all = pd.merge(state_count_1, state_count_2,how = 'outer', on = 'State')
state_count_all.columns = ['2014', '2015', '2016', '2017']
#fill na values with 0
state_count_all = state_count_all.fillna(0)
state_count_all.to_csv("total_count_by_state.csv", header=True)

In [26]:
#state totals
state_count_all['total'] = state_count_all['2014'] + state_count_all['2015'] + state_count_all['2016'] + state_count_all['2017']

#sort table for basic stats
state_count_all.sort_values(by=['total'])

In [14]:
#Rename States & reset index for later merge with census data
state_count_all.index.names = ['States']
state_count_all = state_count_all.reset_index()
mass_2014_states.index.names = ['States']
mass_2014_states = mass_2014_states.reset_index()
mass_2015_states.index.names = ['States']
mass_2015_states = mass_2015_states.reset_index()
mass_2016_states.index.names = ['States']
mass_2016_states = mass_2016_states.reset_index()
mass_2017_states.index.names = ['States']
mass_2017_states = mass_2017_states.reset_index()

In [15]:
state_count_all.sum() #total count over the years

States    AlabamaAlaskaArizonaArkansasCaliforniaColorado...
2014                                                    268
2015                                                    335
2016                                                    382
2017                                                    346
dtype: object

In [16]:
total_mass_shootings = state_count_all.sum()  #get total of mass shootings
#create a data frame for visualization
years = ['2014', '2015', '2016', '2017']
data = [268, 335, 382, 346]
total_mass_shootings = {'years': years, 'total_shootings' : data}
total_mass_shootings = pd.DataFrame(data = total_mass_shootings)

#export as excel for visualization
total_mass_shootings.to_csv("total_mass_shootings.csv", index = False, header=True)

# Pull US Census Data (2018 Data Not Available)

In [17]:
# Census & gmaps API Keys
from config import (census_key, gkey)
c = Census(census_key, year=2017)

# Run Census Search to retrieve data on all states
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"),{"for": "state:*"})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "state": "State"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rates"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

#Add in Unemployment Rate
census_pd["Unemployment Rates"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd_2017 = census_pd[["State","Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rates", "Unemployment Count", "Unemployment Rates"]]

census_pd_2017.head()

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rates,Unemployment Count,Unemployment Rates
0,72,Puerto Rico,3468963.0,40.1,19775.0,12081.0,1543220.0,44.486494,220597.0,6.359163
1,1,Alabama,4850771.0,38.7,46472.0,25746.0,849699.0,17.516782,165391.0,3.409582
2,2,Alaska,738565.0,33.9,76114.0,35065.0,73380.0,9.935483,29548.0,4.000731
3,4,Arizona,6809946.0,37.2,53510.0,27964.0,1128046.0,16.564683,225911.0,3.317368
4,5,Arkansas,2977944.0,37.9,43813.0,24426.0,524211.0,17.603118,85128.0,2.858617


In [18]:
# Census & gmaps API Keys
c = Census(census_key, year=2016)

# Configure gmaps
gmaps.configure(api_key="AIzaSyDH3ugUF6TPqs5vaqe5M03_9DXKzT8WAe8")
# Run Census Search to retrieve data on all states
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"),{"for": "state:*"})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "state": "State"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rates"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

#Add in Unemployment Rate
census_pd["Unemployment Rates"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd_2016 = census_pd[["State","Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rates", "Unemployment Count", "Unemployment Rates"]]

census_pd_2016.head()

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rates,Unemployment Count,Unemployment Rates
0,1,Alabama,4841164.0,38.6,44758.0,24736.0,868666.0,17.943329,184479.0,3.810633
1,2,Alaska,736855.0,33.6,74444.0,34191.0,72826.0,9.883356,30139.0,4.090221
2,4,Arizona,6728577.0,37.1,51340.0,26686.0,1165636.0,17.323663,249972.0,3.71508
3,5,Arkansas,2968472.0,37.7,42336.0,23401.0,542431.0,18.273071,93190.0,3.139326
4,6,California,38654206.0,36.0,63783.0,31458.0,6004257.0,15.533257,1683726.0,4.355868


In [19]:
# Census & gmaps API Keys
c = Census(census_key, year=2015)

# Run Census Search to retrieve data on all states
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"),{"for": "state:*"})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "state": "State"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rates"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

#Add in Unemployment Rate
census_pd["Unemployment Rates"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd_2015 = census_pd[["State","Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rates", "Unemployment Count", "Unemployment Rates"]]

census_pd_2015.head()

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rates,Unemployment Count,Unemployment Rates
0,1,Alabama,4830620.0,38.4,43623.0,24091.0,887260.0,18.367415,207097.0,4.287172
1,2,Alaska,733375.0,33.4,72515.0,33413.0,72957.0,9.948117,31285.0,4.265894
2,4,Arizona,6641928.0,36.8,50255.0,25848.0,1180690.0,17.776314,275712.0,4.151084
3,5,Arkansas,2958208.0,37.7,41371.0,22798.0,553644.0,18.71552,104943.0,3.547519
4,6,California,38421464.0,35.8,61818.0,30318.0,6135142.0,15.968007,1891081.0,4.921939


In [20]:
# Census & gmaps API Keys
c = Census(census_key, year=2014)

# Configure gmaps
gmaps.configure(api_key="AIzaSyDH3ugUF6TPqs5vaqe5M03_9DXKzT8WAe8")
# Run Census Search to retrieve data on all states
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"),{"for": "state:*"})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "state": "State"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rates"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

#Add in Unemployment Rate
census_pd["Unemployment Rates"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd_2014 = census_pd[["State","Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rates", "Unemployment Count", "Unemployment Rates"]]

census_pd_2014.head()

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rates,Unemployment Count,Unemployment Rates
0,1,Alabama,4817678.0,38.2,43511.0,23936.0,889710.0,18.46761,228716.0,4.747432
1,2,Alaska,728300.0,33.4,71829.0,33129.0,71866.0,9.867637,32097.0,4.407112
2,4,Arizona,6561516.0,36.5,49928.0,25537.0,1169309.0,17.820714,304067.0,4.634097
3,5,Arkansas,2947036.0,37.6,41264.0,22595.0,549303.0,18.639168,115090.0,3.90528
4,6,California,38066920.0,35.6,61489.0,29906.0,6115244.0,16.064457,2084564.0,5.476051


In [21]:
#rename columns for later merging
census_pd_2014 = census_pd_2014.rename(columns = {'Name':'States'})
census_pd_2015 = census_pd_2015.rename(columns = {'Name':'States'})
census_pd_2016 = census_pd_2016.rename(columns = {'Name':'States'})
census_pd_2017 = census_pd_2017.rename(columns = {'Name':'States'})

In [22]:
#merge Census and mass shooting data
merged_2014 = pd.merge(mass_2014_states, census_pd_2014, on = 'States')
merged_2015 = pd.merge(mass_2015_states, census_pd_2015, on = 'States')
merged_2016 = pd.merge(mass_2016_states, census_pd_2016, on = 'States')
merged_2017 = pd.merge(mass_2017_states, census_pd_2017, on = 'States')

In [23]:
#load in coordinates for map
coordinates = pd.read_csv('Original_Centroid/state_centroids.csv')
coordinates = pd.DataFrame(data = coordinates)
#rename column for merge with full df
coordinates = coordinates.rename(columns = {'State':'States'})

In [24]:
#merge census & mass shooting data
merged_2014 = pd.merge(merged_2014, coordinates, on = 'States')
merged_2015 = pd.merge(merged_2015, coordinates, on = 'States')
merged_2016 = pd.merge(merged_2016, coordinates, on = 'States')
merged_2017 = pd.merge(merged_2017, coordinates, on = 'States')

#rename columns to tidy up the dataset
merged_2014 = merged_2014.rename(columns = {'Incident Date': 'Mass Shooting Count'})
merged_2015 = merged_2015.rename(columns = {'Incident Date': 'Mass Shooting Count'})
merged_2016 = merged_2016.rename(columns = {'Incident Date': 'Mass Shooting Count'})
merged_2017 = merged_2017.rename(columns = {'Incident Date': 'Mass Shooting Count'})

#add column for per capita
merged_2014['Per Capita Mass Shooting'] = merged_2014['Mass Shooting Count'] / merged_2014['Population']
merged_2015['Per Capita Mass Shooting'] = merged_2015['Mass Shooting Count'] / merged_2015['Population']
merged_2016['Per Capita Mass Shooting'] = merged_2016['Mass Shooting Count'] / merged_2016['Population']
merged_2017['Per Capita Mass Shooting'] = merged_2017['Mass Shooting Count'] / merged_2017['Population']



In [25]:
#export as excel for visualization
merged_2014.to_csv("merged_2014.csv", index = False, header=True)
merged_2015.to_csv("merged_2015.csv", index = False, header=True)
merged_2016.to_csv("merged_2016.csv", index = False, header=True)
merged_2017.to_csv("merged_2017.csv", index = False, header=True)
