# Analysis of State Firearm Law Provisions and Gun Deaths in the United States from 1999 to 2016
## The objective of this project is to utilize data mining techniques and tools to analyze relationships among state firearm law provisions and gun deaths in the U.S. from 1999-2016. 
### The data for state firearm law provisions comes from https://www.statefirearmlaws.org/resources.
### The data for gun deaths comes from https://wonder.cdc.gov/controller/saved/D76/D46F404.

## 1) Import packages

In [1]:
#import packages
import os
import numpy as np
import pandas as pd
import bokeh
from bokeh.io import output_notebook, export_png
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure, show, output_file, save
from bokeh.layouts import column, row, gridplot
from bokeh.sampledata.us_states import data as states

## 2) Read in data

In [2]:
#set working directory
os.getcwd()

'/Users/mgibbs/Documents/George Washington University 2018-2019/DATS 6103 Introduction to Data Mining /DATS 6103 - Research Paper - Mary Gibbs'

In [3]:
#read in data
cb = pd.read_excel('firearm_provisions.xlsx', sheetname = 'Codebook', index_col = 0)
fp = pd.read_excel('firearm_provisions.xlsx', sheetname = 'Database', index_col = 0)
gd = pd.read_csv('gun_deaths.txt', sep = '\t', index_col = 1)

In [4]:
#check codebook data frame - cb
cb.head()
#cb.tail()

Unnamed: 0_level_0,Category,Sub-Category,Variable Name,Brief Description of Provision,Detailed Description of Provision,Coding Notes,Coding Instructions,Notes,Data Source and Attribution
Category Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Dealer regulations,Licensing,dealer,State dealer license required for sale of all ...,All firearm dealers are required to have a sta...,State requires all persons engaged in the busi...,If all firearm dealers are required to have a ...,,"Coded by Michael Siegel, MD, MPH, Boston Unive..."
1,Dealer regulations,Licensing,dealerh,State dealer license required for sale of hand...,All firearm dealers that sell handguns are req...,State requires all persons engaged in the busi...,If only dealers of handguns or handguns AND as...,,"Coded by Michael Siegel, MD, MPH, Boston Unive..."
1,Dealer regulations,Recordkeeping,recordsdealer,Licensed dealers are required to keep and reta...,Licensed dealers are required to keep and reta...,Recordkeeping refers to the recording of sales...,If record keeping and retention is required on...,Federal law requires licensed dealers to keep ...,"Coded by Michael Siegel, MD, MPH, Boston Unive..."
1,Dealer regulations,Recordkeeping,recordsdealerh,Licensed dealers are required to keep and reta...,Licensed dealers are required to keep and reta...,Recordkeeping refers to the recording of sales...,If record keeping and retention is required on...,Federal law requires licensed dealers to keep ...,"Coded by Michael Siegel, MD, MPH, Boston Unive..."
1,Dealer regulations,Recordkeeping,recordsall,All private sellers and licensed dealers are r...,Both private sellers and licensed dealers are ...,Recordkeeping refers to the recording of sales...,If all private sellers and licensed dealers ar...,Federal law requires licensed dealers to keep ...,"Coded by Michael Siegel, MD, MPH, Boston Unive..."


In [5]:
#check firearm provisions data frame - fp
fp.head()
#fp.tail()

Unnamed: 0_level_0,year,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,violent,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,1991,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,15
Alaska,1991,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,10
Arizona,1991,0,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,12
Arkansas,1991,1,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,15
California,1991,1,1,0,1,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,58


In [6]:
#check gun deaths data frame - gd
gd.head()
#gd.tail()

Unnamed: 0_level_0,Notes,State Code,Year,Year Code,Injury Intent,Injury Intent Code,Cause of death,Cause of death Code,Deaths,Population,Crude Rate
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alabama,,1.0,1999.0,1999.0,Unintentional,1.0,Discharge from other and unspecified firearms,W34,47.0,4430141.0,1.1
Alabama,,1.0,1999.0,1999.0,Suicide,2.0,Intentional self-harm by handgun discharge,X72,94.0,4430141.0,2.1
Alabama,,1.0,1999.0,1999.0,Suicide,2.0,"Intentional self-harm by rifle, shotgun and la...",X73,63.0,4430141.0,1.4
Alabama,,1.0,1999.0,1999.0,Suicide,2.0,Intentional self-harm by other and unspecified...,X74,278.0,4430141.0,6.3
Alabama,,1.0,1999.0,1999.0,Homicide,3.0,Assault by handgun discharge,X93,37.0,4430141.0,0.8


## 3) Clean state firearm law provisions data frame

In [7]:
#select year values 1999-2016
fp = fp.iloc[400:1300, :]

In [8]:
#create region dictionary
region = {'New England': ['Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island'], 
          'Middle Atlantic': ['New York', 'Pennsylvania', 'New Jersey'],
          'South Atlantic': ['West Virginia', 'Maryland', 'Delaware', 'Virginia', 'North Carolina', 'South Carolina', 'Georgia', 'Florida'],
          'East North Central': ['Wisconsin', 'Michigan', 'Illinois', 'Indiana', 'Ohio'], 
          'East South Central': ['Kentucky', 'Tennessee', 'Mississippi', 'Alabama'], 
          'West North Central': ['North Dakota', 'South Dakota', 'Minnesota', 'Nebraska', 'Iowa', 'Kansas', 'Missouri'],
          'West South Central': ['Oklahoma', 'Arkansas', 'Texas', 'Louisiana'], 
          'Mountain': ['Idaho', 'Montana', 'Wyoming', 'Nevada', 'Utah', 'Colorado', 'Arizona', 'New Mexico'],
          'Pacific': ['Alaska', 'Hawaii', 'Washington', 'Oregon', 'California']}
#create region column
fp['region'] = fp.index.values
for key, value in region.items():
    fp = fp.replace(value, key)

In [9]:
#create firearm provisions dictionary
cb.columns = cb.columns.str.lower()
cb.columns = cb.columns.str.replace(' ', '_')
fps = cb['variable_name'].tolist()
fps_cat = {'dealer_regulations': fps[0:17], 'buyer_regulations': fps[17:34], 'high_risk_gun_possession_prohibitions': fps[34:45], 
           'background_checks': fps[45:56], 'ammunition_regulations': fps[56:63], 'possession_regulations': fps[63:75],
           'concealed_carry_permits': fps[75:82], 'assault_weapons_large_capacity_magazines_bans': fps[82:90], 'child_access_preventions': fps[90:101], 
           'gun_trafficking': fps[101:108], 'stand_your_ground': fps[108:109], 'preemption_': fps[109:112], 'immunity_': fps[112:113],
           'domestic_violence': fps[113:134]}
#create firearm provision category columns 
for key, value in fps_cat.items():
    fp[key] = fp.loc[:, value].sum(axis = 1)
#remove firearm provision columns
fp.drop(fps, axis = 1, inplace = True)

In [10]:
#fix fp column names 
fp = fp.rename(index = str, columns = {'preemption_':'preemption', 'immunity_': 'immunity', 'lawtotal': 'law_total'})
#get fp column names  
fp_columns = fp.columns.tolist()
#make region first column
fp_columns.insert(0, fp_columns.pop(2))
#make law_total last column
fp_columns.insert(len(fp_columns), fp_columns.pop(2))
#recreate fp
fp = fp[fp_columns]

In [11]:
#check fp for missing values
fp.isnull().values.any()

False

In [12]:
#check fp data types
fp.dtypes

region                                           object
year                                              int64
dealer_regulations                                int64
buyer_regulations                                 int64
high_risk_gun_possession_prohibitions             int64
background_checks                                 int64
ammunition_regulations                            int64
possession_regulations                            int64
concealed_carry_permits                           int64
assault_weapons_large_capacity_magazines_bans     int64
child_access_preventions                          int64
gun_trafficking                                   int64
stand_your_ground                                 int64
preemption                                        int64
immunity                                          int64
domestic_violence                                 int64
law_total                                         int64
dtype: object

In [13]:
#describe fp
fp.describe()

Unnamed: 0,year,dealer_regulations,buyer_regulations,high_risk_gun_possession_prohibitions,background_checks,ammunition_regulations,possession_regulations,concealed_carry_permits,assault_weapons_large_capacity_magazines_bans,child_access_preventions,gun_trafficking,stand_your_ground,preemption,immunity,domestic_violence,law_total
count,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0,900.0
mean,2007.5,2.681111,2.278889,2.852222,1.968889,0.557778,2.7,3.89,0.545556,1.576667,0.717778,0.751111,0.533333,0.377778,3.353333,24.784444
std,5.191012,3.824498,3.391575,2.507685,3.224822,1.195094,2.152423,1.650196,1.606831,2.523683,1.263994,0.43261,1.10515,0.485101,4.760053,23.640809
min,1999.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
25%,2003.0,0.0,0.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,10.0
50%,2007.5,1.0,1.0,2.0,0.0,0.0,2.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,15.0
75%,2012.0,5.0,3.0,4.0,4.0,1.0,3.0,5.0,0.0,3.0,2.0,1.0,0.0,1.0,5.0,26.75
max,2016.0,14.0,15.0,10.0,11.0,6.0,9.0,7.0,8.0,11.0,6.0,1.0,3.0,1.0,18.0,104.0


In [14]:
#save fp
fp.to_csv('clean_firearm_provisions.csv')

## 4) Clean gun deaths data frame

In [15]:
#select year, injury_intent, cause_of_death, deaths, and population columns
gd.index.names = ['state']
gd.columns = gd.columns.str.lower()
gd.columns = gd.columns.str.replace(' ', '_')
gd_columns = ['year', 'injury_intent', 'cause_of_death', 'deaths', 'population']
gd = gd[gd_columns]
#remove District of Columbia row
gd = gd.drop('District of Columbia')
#make injury_intent and cause_of_death values lowercase
gd['injury_intent'] = gd['injury_intent'].str.lower()
gd['cause_of_death'] = gd['cause_of_death'].str.lower()

In [16]:
#create region column
gd['region'] = gd.index.values
for key, value in region.items():
    gd = gd.replace(value, key)

In [17]:
#get gd column names
gd_columns = gd.columns.tolist()
#make region first column
gd_columns.insert(0, gd_columns.pop(-1))
#recreate gd
gd = gd[gd_columns]

In [18]:
#check gd for missing values
gd.isnull().values.any()

True

In [19]:
#remove gd missing values
gd = gd.dropna(axis = 0)
gd.isnull().values.any()

False

In [20]:
#check gd data types
gd.dtypes

region             object
year              float64
injury_intent      object
cause_of_death     object
deaths            float64
population        float64
dtype: object

In [21]:
#change year values to int64
gd['year'] = gd['year'].astype(int)
#change deaths values to int64
gd['deaths'] = gd['deaths'].astype(int)
#change population values to int64
gd['population'] = gd['population'].astype(int)
gd.dtypes

region            object
year               int64
injury_intent     object
cause_of_death    object
deaths             int64
population         int64
dtype: object

In [22]:
#describe gd
gd.describe()

Unnamed: 0,year,deaths,population
count,4270.0,4270.0,4270.0
mean,2007.383372,128.664871,7495301.0
std,5.189847,189.472738,7615873.0
min,1999.0,10.0,491780.0
25%,2003.0,22.0,2874554.0
50%,2007.0,50.0,5273477.0
75%,2012.0,142.0,9118037.0
max,2016.0,1571.0,39250020.0


In [23]:
#save gd
gd.to_csv('clean_gun_deaths.csv')

## 5) Analysis

### State Firearm Law Provisions and Gun Deaths in the U.S. from 1999-2016

In [24]:
#get total state firearm law provisions in the U.S. from 1999-2016
fp_total = fp.groupby('year').sum()
#get total gun deaths in the U.S. from 1999-2016
gd_total = gd.groupby('year').sum()
#get gun deaths per 1,000,000 people in the U.S. from 1999-2016
gd_total['rate'] = (gd_total['deaths']/gd_total['population'])*1000000

In [25]:
#function for line plot 
def line_plot(df, y, title, xlabel, ylabel, color):
    """
    create a line plot with data frame index as x and data frame column values as y
    
    args:
        df (DataFrame): data frame 
        y (str): y
        title (str): title
        xlabel (str): x-axis label
        ylabel (str): y-axis label
        color (str): color
    
    returns:
        line plot
    """
    plot = figure(height = 375, width = 550, tools = 'pan, wheel_zoom, box_zoom, reset')
    plot.line(df.index.values, df[y], line_width = 3, color = color)
    plot.circle(df.index.values, df[y], size = 6, color = color)
    plot.title.text = title
    plot.title.text_font_size = '8pt'
    plot.xaxis.axis_label = xlabel 
    plot.xaxis.axis_label_text_font_size = '8pt'
    plot.yaxis.axis_label = ylabel
    plot.yaxis.axis_label_text_font_size = '8pt'
    plot.add_tools(HoverTool(tooltips = [(str.lower(xlabel), '$x{int}'), (str.lower(ylabel), '$y{int}')]))
    return plot

In [26]:
#function for scatter plot
def scatter_plot(df_x, df_y, x, y, title, xlabel, ylabel, color):
    """
    create a scatter plot with data frame column values as x and data frame column values as y
    
    args:
        df_x (DataFrame): data frame for x
        df_y (DataFrame): data frame for y
        x (str): x
        y (str): y
        title (str): title
        xlabel (str): x-axis label
        ylabel (str): y-axis label
        color (str): color
    
    returns:
        scatter plot
    """
    plot = figure(height = 375, width = 550, tools = 'pan, wheel_zoom, box_zoom, reset')
    plot.circle(df_x[x], df_y[y], size = 6, color = color)
    best_line = np.polyfit(df_x[x], df_y[y], deg = 1)
    best_line_y = best_line[0]*df_x[x] + best_line[1]
    plot.line(df_x[x], best_line_y, line_width = 3, color = 'darkgrey')
    plot.title.text = title
    plot.title.text_font_size = '8pt'
    plot.xaxis.axis_label = xlabel
    plot.xaxis.axis_label_text_font_size = '8pt'
    plot.yaxis.axis_label = ylabel
    plot.yaxis.axis_label_text_font_size = '8pt'
    plot.add_tools(HoverTool(tooltips = [(str.lower(xlabel), '$x{int}'), (str.lower(ylabel), '$y{int}')]))
    return plot

In [27]:
#plot total state firearm law provisions in the U.S. from 1999-2016
fp_total_plot = line_plot(fp_total, 'law_total', 'Total State Firearm Law Provisions in the U.S. from 1999-2016', 
                          'Year', 'Number of State Firearm Law Provisions', 'navy') 
#save plot as html
output_file('fp_total_plot.html')
save(fp_total_plot)
#output notebook
output_notebook()
#show plot
show(fp_total_plot)
#export plot as png
#export_png(fp_total_plot, filename = 'fp_total_plot.png')

In [28]:
#plot total gun deaths in the U.S. from 1999-2016
gd_total_plot = line_plot(gd_total, 'deaths', 'Total Gun Deaths in the U.S. from 1999-2016', 
                          'Year', 'Number of Gun Deaths', 'darkred')
#plot gun deaths per 1,000,000 people in the U.S. from 1999-2016
gd_rate_total_plot = line_plot(gd_total, 'rate', 'Gun Deaths per 1,000,000 People in the U.S. from 1999-2016', 
                               'Year', 'Number of Gun Deaths per 1,000,000 People', 'forestgreen')
#show plots
show(row(gd_total_plot, gd_rate_total_plot))
#save plots as html
#output_file('gd_total_plot.html')
#save(gd_total_plot)
#output_file('gd_rate_total_plot.html')
#save(gd_rate_total_plot)
#export plots as png
#export_png(gd_total_plot, filename = 'gd_total_plot.png')
#export_png(gd_rate_total_plot, filename = 'gd_rate_total_plot.png')

In [29]:
#plot total gun deaths vs. total state firearm law provisions in the U.S. from 1999-2016
fp_gd_total_plot = scatter_plot(fp_total, gd_total, 'law_total', 'deaths', 'Total Gun Deaths vs. Total State Firearm Law Provisions in the U.S. from 1999-2016', 
                                'Number of State Firearm Law Provisions', 'Number of Gun Deaths', 'indigo')
#plot gun deaths per 1,000,000 people in the U.S. vs. total state firearm law provisions in the U.S. from 1999-2016
fp_gd_rate_total_plot = scatter_plot(fp_total, gd_total, 'law_total', 'rate', 'Gun Deaths per 1,000,000 People vs. Total State Firearm Law Provisions in the U.S. from 1999-2016', 
                                     'Number of State Firearm Law Provisions', 'Number of Gun Deaths per 1,000,000 People', 'darkorange')
#show plots 
show(row(fp_gd_total_plot, fp_gd_rate_total_plot))
#save plots as html
#output_file('fp_gd_total_plot.html')
#save(fp_gd_total_plot)
#output_file('fp_gd_rate_total_plot.html')
#save(fp_gd_rate_total_plot)
#export plots as png
#export_png(fp_gd_total_plot, filename = 'fp_gd_total_plot.png')
#export_png(fp_gd_rate_total_plot, filename = 'fp_gd_rate_total_plot.png')

### State Firearm Law Provisions and Gun Deaths for Each Region in the U.S. from 1999-2016

In [30]:
#pick U.S. region colors
region_colors = ['royalblue', 'darkblue', 'salmon', 'crimson', 'lightgreen', 'mediumseagreen', 'mediumorchid', 'mediumslateblue', 'orange']
#get data for U.S. region map 
state = pd.DataFrame(states).transpose()
state = state.rename(columns = {'name': 'state'})
state = state.reset_index(drop = True).set_index('state')
state = state.drop('region', axis = 1)
#get total state firearm law provisions for each region in the U.S. from 1999-2016
fp_region_total = fp.groupby(['region', 'year']).sum()
#get total gun deaths for each region in the U.S. from 1999-2016
gd_region_total = gd.groupby(['region', 'year']).sum()
#get gun deaths per 1,000,000 people for each region in the U.S. from 1999-2016
gd_region_total['rate'] = (gd_region_total['deaths']/gd_region_total['population'])*1000000

In [31]:
#function for region map 
def region_map(region_dict, states_df, title, colors):
    """
    create a region map
    
    args:
        region_dict (dictionary): dictionary with regions as keys and states as values
        states_df (DataFrame): data frame of states with latitutdes and longitudes
        title (str): title
        colors (list): colors
    
    returns:
        region map
    """
    region_map = figure(height = 400, width = 850, tools = 'pan, wheel_zoom, box_zoom, reset') 
    for num, reg in zip(range(0, len(region_dict.keys())), sorted(region_dict.keys())):
        exclude = []
        lats = []
        lons = []
        for state in region_dict.get(reg):
            if state in ['Alaska', 'Hawaii']:
                exclude.append(state)
            else:
                lats.append(states_df.loc[state][1])
                lons.append(states_df.loc[state][0])
        region_map.patches(lats, lons, fill_color = colors[num], line_color = 'black', line_width = 2, legend = reg)
    region_map.title.text = title
    region_map.legend[0].plot = None
    legend = region_map.legend[0]
    region_map.add_layout(legend, 'right')
    region_map.legend.location = 'center'
    region_map.legend.click_policy = 'hide'
    region_map.toolbar_sticky = False
    return region_map

In [32]:
#function for index multiple line plot
def idx_multi_line_plot(df, y, title, xlabel, ylabel, colors):
    """
    create a multiple line plot with data frame first index as lines, data frame second index as x, and 
    data frame column values as y
    
    args:
        df (DataFrame): data frame 
        y (str): y
        title (str): title
        xlabel (str): x-axis label
        ylabel (str): y-axis label
        colors (list): colors
    
    returns:
        multiple line plot
    """
    plot = figure(height = 350, width = 750, tools = 'pan, wheel_zoom, box_zoom, reset')
    for lab, lab_color in zip(df.index.get_level_values(0).unique(), colors):
        plot.line(df.index.get_level_values(1).unique(), df.loc[lab, y], line_width = 3, color = lab_color, legend = lab)
        plot.circle(df.index.get_level_values(1).unique(), df.loc[lab, y], size = 6, color = lab_color, legend = lab)
    plot.title.text = title
    plot.title.text_font_size = '8pt'
    plot.xaxis.axis_label = xlabel
    plot.xaxis.axis_label_text_font_size = '8pt'
    plot.yaxis.axis_label = ylabel
    plot.yaxis.axis_label_text_font_size = '8pt'
    plot.legend[0].plot = None
    legend = plot.legend[0]
    plot.add_layout(legend, 'right')
    plot.legend.location = 'center'
    plot.legend.click_policy = 'hide'
    plot.add_tools(HoverTool(tooltips = [(str.lower(xlabel), '$x{int}'), (str.lower(ylabel), '$y{int}')]))
    plot.toolbar_sticky = False
    return plot

In [33]:
#function for multiple scatter plots
def multi_scatter_plot(df_x, df_y, x, y, title, xlabel, ylabel, colors):
    """
    create multiple scatter plots with data frame column values as x and data frame column values as y
    
    args:
        df_x (DataFrame): data frame for x
        df_y (DataFrame): data frame for y
        x (str): x
        y (str): y
        title (str): title
        xlabel (str): x-axis label
        ylabel (str): y-axis label
        colors (list): colors
    
    returns:
        multiple scatter plots
    """
    plots = []
    for num, lab, lab_color in zip([x for x in range(len(df_x.index.get_level_values(0).unique()))], df_x.index.get_level_values(0).unique(), colors):
        plot = figure(height = 280, width = 430, tools = 'pan, wheel_zoom, box_zoom, reset')
        plot.circle(df_x.loc[lab, x], df_y.loc[lab, y], size = 6, color = lab_color, legend = False)
        best_line = np.polyfit(df_x.loc[lab, x], df_y.loc[lab, y], deg = 1)
        best_line_y = best_line[0]*df_x.loc[lab, x] + best_line[1]
        plot.line(df_x.loc[lab, x], best_line_y, line_width = 3, color = 'darkgrey')
        plot.title.text = lab + ': ' + title
        plot.title.text_font_size = '4.75pt'
        plot.xaxis.axis_label = xlabel
        plot.xaxis.axis_label_text_font_size = '4.75pt'
        plot.yaxis.axis_label = ylabel
        plot.yaxis.axis_label_text_font_size = '4.75pt'
        plot.add_tools(HoverTool(tooltips = [(str.lower(xlabel), '$x{int}'), (str.lower(ylabel), '$y{int}')]))
        plot.toolbar_sticky = False
        plots.append(plot)
    return plots

In [34]:
#create U.S. region map
#Pacific includes Alaska and Hawaii
us_region_map = region_map(region, state, 'U.S. Region Map', region_colors)
#show map
show(us_region_map)
#save map as html
#output_file('us_region_map.html')
#save(us_region_map)
#export map as png
#export_png(us_region_map, filename = 'us_region_map.png')

In [35]:
#plot total state firearm law provisions for each region in the U.S. from 1999-2016
fp_region_total_plot = idx_multi_line_plot(fp_region_total, 'law_total', 'Total State Firearm Law Provisions for Each Region in the U.S. from 1999-2016', 
                                           'Year', 'Number of State Firearm Law Provisions', region_colors)
#show plot 
show(fp_region_total_plot)
#save plot as html
#output_file('fp_region_total_plot.html')
#save(fp_region_total_plot)
#export plot as png
#export_png(fp_region_total_plot, filename = 'fp_region_total_plot.png')

In [36]:
#plot total gun deaths for each region in the U.S. from 1999-2016
gd_region_total_plot = idx_multi_line_plot(gd_region_total, 'deaths', 'Total Gun Deaths for Each Region in the U.S. from 1999-2016',
                                           'Year', 'Number of Gun Deaths', region_colors)
#plot gun deaths per 1,000,000 people for each region in the U.S. from 1999-2016
gd_region_rate_total_plot = idx_multi_line_plot(gd_region_total, 'rate', 'Gun Deaths per 1,000,000 People for Each Region in the U.S. from 1999-2016',
                                                'Year', 'Number of Gun Deaths per 1,000,000 People', region_colors)
#show plots
show(row(gd_region_total_plot, gd_region_rate_total_plot))
#save plots as html
#output_file('gd_region_total_plot.html')
#save(gd_region_total_plot)
#output_file('gd_region_rate_total_plot.html')
#save(gd_region_rate_total_plot)
#export plots as png
#export_png(gd_region_total_plot, filename = 'gd_region_total_plot.png')
#export_png(gd_region_rate_total_plot, filename = 'gd_region_rate_total_plot.png')

In [37]:
#plot total gun deaths vs. total state firearm law provisions for each region in the U.S. from 1999-2016
fp_gd_region_total_plot = multi_scatter_plot(fp_region_total, gd_region_total, 'law_total', 'deaths', 'Total Gun Deaths vs. Total State Firearm Law Provisions from 1999-2016',
                                             'Number of State Firearm Law Provisions', 'Number of Gun Deaths', region_colors)
#show plots 
show(gridplot([fp_gd_region_total_plot[0:3], fp_gd_region_total_plot[3:6], fp_gd_region_total_plot[6:10]]))
#save plots as html
#output_file('fp_gd_region_total_plot.html')
#save(gridplot([fp_gd_region_total_plot[0:3], fp_gd_region_total_plot[3:6], fp_gd_region_total_plot[6:10]]))
#export plots as png
#export_png(gridplot([fp_gd_region_total_plot[0:3], fp_gd_region_total_plot[3:6], fp_gd_region_total_plot[6:10]]), filename = 'fp_gd_region_total_plot.png')

In [38]:
#plot gun deaths per 1,000,000 people vs. total state firearm law provisions for each region in the U.S. from 1999-2016
fp_gd_region_rate_total_plot = multi_scatter_plot(fp_region_total, gd_region_total, 'law_total', 'rate', 'Gun Deaths per 1,000,000 People vs. Total State Firearm Law Provisions from 1999-2016',
                                                  'Number of State Firearm Law Provisions', 'Number of Gun Deaths per 1,000,000 People', region_colors)
#show plots
show(gridplot([fp_gd_region_rate_total_plot[0:3], fp_gd_region_rate_total_plot[3:6], fp_gd_region_rate_total_plot[6:10]]))
#save plots as html
#output_file('fp_gd_region_rate_total_plot.html')
#save(gridplot([fp_gd_region_rate_total_plot[0:3], fp_gd_region_rate_total_plot[3:6], fp_gd_region_rate_total_plot[6:10]]))
#export plots as png 
#export_png(gridplot([fp_gd_region_rate_total_plot[0:3], fp_gd_region_rate_total_plot[3:6], fp_gd_region_rate_total_plot[6:10]]), filename = 'fp_gd_region_rate_total_plot.png')

### State Firearm Law Provision Categories and Gun Death Categories in the U.S. from 1999-2016

In [39]:
#pick state firearn law provision category colors
fp_cat_upper_colors = ['darkturquoise','cornflowerblue', 'mediumblue', 'midnightblue', 'lightgreen', 'darkseagreen', 'darkgreen']
fp_cat_lower_colors = ['mediumorchid', 'indigo', 'mediumslateblue', 'salmon', 'mediumvioletred', 'crimson', 'darkred']
fp_cat_colors = fp_cat_upper_colors + fp_cat_lower_colors
#get total state firearm law provisions per category in the U.S. from 1999-2016
fp_cat_total = fp.iloc[:, :-1].groupby(['year']).sum(axis = 0)
#split fp_cat_total into upper (> 100 in 2016) and lower (< 100 in 2016) 
fp_2016 = fp_cat_total[fp_cat_total.index == 2016]
upper = []
lower = []
for i in range(1, len(fp_2016.columns)):
    if fp_2016.iloc[0, i] >= 100:
        upper.append(fp_2016.columns[i])
    else:
        lower.append(fp_2016.columns[i])
fp_cat_upper_total = fp_cat_total[upper]
fp_cat_lower_total = fp_cat_total[lower]
#pick total gun death category colors
gd_cat_total_colors = ['salmon', 'limegreen', 'slateblue']
#pick gun deaths per 1,000,000 people category colors
gd_cat_rate_colors = ['crimson', 'darkgreen', 'darkslateblue']
#get total gun deaths per category in the U.S. from 1999-2016
gd_cat_total = gd.groupby(['injury_intent', 'year']).sum()
#get gun deaths per 1,000,000 people per category in the U.S. from 1999-2016
gd_cat_total['rate'] = (gd_cat_total['deaths']/gd_cat_total['population'])*1000000

In [40]:
#function for column multiple line plot
def col_multi_line_plot(df, y, title, xlabel, ylabel, colors):
    """
    create a multiple line plot with data frame columns as lines, data frame first index as x,
    and data frame column values as y
     
    args:
        df (DataFrame): data frame 
        y (str): y
        title (str): title
        xlabel (str): x-axis label
        ylabel (str): y-axis label
        colors (list): colors
    
    returns:
        multiple line plot
    """
    plot = figure(height = 350, width = 910, tools = 'pan, wheel_zoom, box_zoom, reset')
    for lab, lab_color in zip(df.columns, colors):
        plot.line(df.index.get_level_values(0).unique(), df[lab], line_width = 3, color = lab_color, legend = lab)
        plot.circle(df.index.get_level_values(0).unique(), df[lab], size = 6, color = lab_color, legend = lab)
    plot.title.text = title
    plot.title.text_font_size = '8pt'
    plot.xaxis.axis_label = xlabel
    plot.xaxis.axis_label_text_font_size = '8pt'
    plot.yaxis.axis_label = ylabel
    plot.yaxis.axis_label_text_font_size = '8pt'
    plot.legend[0].plot = None
    legend = plot.legend[0]
    plot.add_layout(legend, 'right')
    plot.legend.location = 'center'
    plot.legend.click_policy = 'hide'
    plot.add_tools(HoverTool(tooltips = [(str.lower(xlabel), '$x{int}'), (str.lower(ylabel), '$y{int}')]))
    plot.toolbar_sticky = False
    return plot

In [41]:
#plot total state firearm law provisions per category in the U.S. from 1999-2016
fp_cat_upper_total_plot = col_multi_line_plot(fp_cat_upper_total, 'law_total', 'Upper Total State Firearm Law Provisions per Category in the U.S. from 1999-2016', 
                                              'Year', 'Number of State Firearm Law Provisions', fp_cat_upper_colors)
fp_cat_lower_total_plot = col_multi_line_plot(fp_cat_lower_total, 'law_total', 'Lower Total State Firearm Law Provisions per Category in the U.S. from 1999-2016', 
                                              'Year', 'Number of State Firearm Law Provisions', fp_cat_lower_colors)
#show plots
show(row(fp_cat_upper_total_plot, fp_cat_lower_total_plot))
#save plots as html
#output_file('fp_cat_upper_total_plot.html')
#save(fp_cat_upper_total_plot)
#output_file('fp_cat_lower_total_plot.html')
#save(fp_cat_lower_total_plot)
#export plots as png
#export_png(fp_cat_upper_total_plot, filename = 'fp_cat_upper_total_plot.png')
#export_png(fp_cat_lower_total_plot, filename = 'fp_cat_lower_total_plot.png')

In [42]:
#plot total gun deaths per category in the U.S. from 1999-2016
gd_cat_total_plot = idx_multi_line_plot(gd_cat_total, 'deaths', 'Total Gun Deaths per Category in the U.S. from 1999-2016', 
                                        'Year', 'Number of Gun Deaths', gd_cat_total_colors)
#plot gun deaths per 1,000,000 people per category in the U.S. from 1999-2016
gd_cat_rate_total_plot = idx_multi_line_plot(gd_cat_total, 'rate', 'Gun Deaths per 1,000,000 People per Category in the U.S. from 1999-2016', 
                                             'Year', 'Number of Gun Deaths per 1,000,000 People', gd_cat_rate_colors)
#show plots
show(row(gd_cat_total_plot, gd_cat_rate_total_plot))
#save plots as html
#output_file('gd_cat_total_plot.html')
#save(gd_cat_total_plot)
#output_file('gd_cat_rate_total_plot.html')
#save(gd_cat_rate_total_plot)
#export plots as png
#export_png(gd_cat_total_plot, filename = 'gd_cat_total_plot.png')
#export_png(gd_cat_rate_total_plot, filename = 'gd_cat_rate_total_plot.png')

### State Firearm Law Provision Categories and Gun Death Categories for Each Region in the U.S. in 2016

In [43]:
#get total state firearm law provisions per category for each region in the U.S. in 2016
fp_reg_cat_total_2016 = fp[fp['year'] == 2016].groupby('region').sum()
fp_reg_cat_total_2016 = fp_reg_cat_total_2016.iloc[:, 1:-1]
#get total gun deaths per category for each region in the U.S. in 2016
gd_reg_cat_total_2016 = gd.loc[gd['year'] == 2016].groupby(['year', 'region', 'injury_intent']).sum()
#get gun deaths per 1,000,000 people per category for each region in the U.S. in 2016
gd_reg_cat_total_2016['rate'] = (gd_reg_cat_total_2016['deaths']/gd_reg_cat_total_2016['population'])*1000000

In [44]:
#function for column multiple bar plots
def col_multi_bar_plot(df, title, ylabel, colors):
    """
    create multiple bar plots with data frame columns as bars and data frame column values as y
    
    args:
        df (DataFrame): data frame 
        title (str): title
        ylabel (str): y-axis label
        colors (list): colors
    
    returns:
        multiple bar plots
    """
    plots = []
    for lab in df.index:
        plot = figure(x_range = list(df.columns), height = 450, width = 410, tools = 'pan, wheel_zoom, box_zoom, reset')
        plot.vbar(df.columns, top = df.loc[lab].values, width = 0.25, color = colors, legend = False)
        plot.vbar(df.columns, top = list(len(region.get(lab))*np.array([len(value) for value in fps_cat.values()])), width = 0.25, color = 'lightgray', fill_alpha = 0.2, legend = False)
        plot.title.text = lab + ': ' + title
        plot.title.text_font_size = '5.75pt'
        plot.xaxis.major_label_orientation = 0.96
        plot.xaxis.axis_label_text_font_size = '5.75pt'
        plot.yaxis.axis_label = ylabel
        plot.yaxis.axis_label_text_font_size = '5.75pt'
        plot.yaxis.axis_label = ylabel
        plot.add_tools(HoverTool(tooltips = [(str.lower(ylabel), '$y{int}')]))
        plot.toolbar_sticky = False
        plots.append(plot)
    return plots

In [45]:
#function for index multiple bar plots
def idx_multi_bar_plot(df, y, title, ylabel, colors):
    """
    create multiple bar plots with data frame third index as bars and data frame column values as y
    
    args:
        df (DataFrame): data frame 
        title (str): title
        ylabel (str): y-axis label
        colors (list): colors
    
    returns:
        multiple bar plots
    """
    plots = []
    for lab in list(df.index.get_level_values(1).unique()):
        plot = figure(x_range = list(df.index.get_level_values(2).unique()), y_range = [0, round(df.loc[:, y].max(), -1) + 25], height = 450, width = 410, tools = 'pan, wheel_zoom, box_zoom, reset')
        x = list(df.loc[(slice(None),[lab], slice(None)), :].index.get_level_values(2))
        top = list(df.loc[(slice(None),[lab], slice(None)), :][y])
        plot.vbar(x, top = top, width = 0.25, color = colors[0:len(x)], legend = False)
        plot.title.text = lab + ': ' + title
        plot.title.text_font_size = '5.75pt'
        plot.xaxis.major_label_orientation = 0.96
        plot.xaxis.axis_label_text_font_size = '5.75pt'
        plot.yaxis.axis_label = ylabel
        plot.yaxis.axis_label_text_font_size = '5.75pt'
        plot.add_tools(HoverTool(tooltips = [(str.lower(ylabel), '$y{int}')]))
        plot.toolbar_sticky = False
        plots.append(plot)
    return plots

In [46]:
#plot total state firearm law provisions per category for each region in the U.S. in 2016
#transparent bars represent possible total number of state firearm law provisions per category for each region in the U.S. in 2016
fp_region_cat_total_2016_plot = col_multi_bar_plot(fp_reg_cat_total_2016, 'Total State Firearm Law Provisions per Category in 2016', 
                                                   'Number of State Firearm Law Provisions', fp_cat_colors)
#plot total gun deaths per category for each region in the U.S. in 2016
gd_region_cat_total_2016_plot = idx_multi_bar_plot(gd_reg_cat_total_2016, 'deaths', 'Total Gun Deaths per Category in 2016', 
                                                   'Number of Gun Deaths', gd_cat_total_colors)
#plot gun deaths per 1,000,000 people per category for each region in the U.S. in 2016
gd_region_cat_rate_total_2016_plot = idx_multi_bar_plot(gd_reg_cat_total_2016, 'rate', 'Gun Deaths per 1,000,000 People per Category in 2016', 
                                                        'Number of Gun Deaths per 1,000,000 People', gd_cat_rate_colors)
#show plots
grid = gridplot([[fp_region_cat_total_2016_plot[0], gd_region_cat_total_2016_plot[0], gd_region_cat_rate_total_2016_plot[0]], 
         [fp_region_cat_total_2016_plot[1], gd_region_cat_total_2016_plot[1], gd_region_cat_rate_total_2016_plot[1]], 
         [fp_region_cat_total_2016_plot[2], gd_region_cat_total_2016_plot[2], gd_region_cat_rate_total_2016_plot[2]], 
         [fp_region_cat_total_2016_plot[3], gd_region_cat_total_2016_plot[3], gd_region_cat_rate_total_2016_plot[3]], 
         [fp_region_cat_total_2016_plot[4], gd_region_cat_total_2016_plot[4], gd_region_cat_rate_total_2016_plot[4]], 
         [fp_region_cat_total_2016_plot[5], gd_region_cat_total_2016_plot[5], gd_region_cat_rate_total_2016_plot[5]], 
         [fp_region_cat_total_2016_plot[6], gd_region_cat_total_2016_plot[6], gd_region_cat_rate_total_2016_plot[6]], 
         [fp_region_cat_total_2016_plot[7], gd_region_cat_total_2016_plot[7], gd_region_cat_rate_total_2016_plot[7]], 
         [fp_region_cat_total_2016_plot[8], gd_region_cat_total_2016_plot[8], gd_region_cat_rate_total_2016_plot[8]]])
show(grid)
#save plots as html
#output_file('fp_gd_region_cat_plot.html')
#save(grid)
#export plots as png
#export_png(grid, filename = 'fp_gd_region_cat_plot.png')