In [143]:
import plotly.offline as py
import plotly.graph_objs as go
import cufflinks as cf
import pandas as pd
import numpy as np

In [144]:
# enable plotly inline
py.init_notebook_mode(connected=True)

In [145]:
files = {
    'gun-violence': 'gun-violence.csv',
    'population': 'population-estimates.csv'
}

In [146]:
# create population data frame
pop_data = pd.read_csv(files['population'])
pop_data = pop_data.ix[2:]

In [147]:
# filter to relevant year
pop_data = pop_data[['GEO.display-label', 'respop72015']].set_index('GEO.display-label')
pop_data = pd.to_numeric(pop_data['respop72015'], errors='coerce')

In [148]:
# create data frame
data = pd.read_csv(files['gun-violence'])
data['incident_year'] = data['incident_date'].str[-2:]
data['mci'] = np.where(data['num_killed']>1, 'y', 'n')

In [149]:
# group by state and mci / non-mci
plot_df = data.groupby(['state', 'mci'])['gva_id'].sum().unstack('mci')
plot_df['total_incidents'] = plot_df['n'] + plot_df['y']

In [150]:
# join data frames
plot_df = pd.concat([plot_df, pop_data], axis=1, join='inner').dropna()

# helper functions

In [151]:
def annotate(data, col):
    # Specify the y-position of the labels
    y1 = data[col]
    xcoord = range(0,len(plot_df.index))
    return [dict(
                x = xi,
                y = yi * 1.0005,
                text = str(int(round(yi,0))),
                yanchor = 'bottom',
                showarrow = False,
                font = dict(
                    size=9
                )) for xi, yi in zip(xcoord, y1)]

# Per Capita Plotting

In [152]:
# calculate per capita values
plot_df['mci-per-capita'] = plot_df['y'].astype(float) / plot_df['respop72015']
plot_df['sci-per-capita'] = plot_df['n'].astype(float) / plot_df['respop72015']
plot_df['incidents_per_capita'] = plot_df['total_incidents'].astype(float) / plot_df['respop72015']

In [153]:
# sort by total incidents
plot_df = plot_df.sort_values('incidents_per_capita', ascending=False)

In [154]:
trace1 = go.Bar(
    x = plot_df.index,
    y = plot_df['mci-per-capita'],
    name = 'Multi-Casualty Incidents'
)
trace2 = go.Bar(
    x = plot_df.index,
    y = plot_df['sci-per-capita'],
    name = 'Single-Casualty Incidents'
)
data = [trace1, trace2]
layout = go.Layout(
    barmode='stack',
    annotations=annotate(plot_df, 'incidents_per_capita')
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='State Gun Violence per Capita')

# Stats Plotting

In [None]:
# create min / max data frame
stats_df = data.groupby(['state', 'mci'])['num_killed'].max().unstack()
tmp_df = data.groupby(['state', 'mci'])['num_injured'].max().unstack()
stats_df = pd.merge(stats_df, tmp_df, how='inner', left_index=True, right_index=True, sort=True,
         suffixes=('_killed', '_injured'))
del tmp_df

In [140]:
stats_df['total_values'] = stats_df['n_killed'] + stats_df['y_killed'] + stats_df['n_injured'] + stats_df['y_injured']

In [158]:
# sort by total incidents
stats_df = stats_df.sort_values('y_killed', ascending=False)

In [160]:
trace3 = go.Bar(
    x = stats_df.index,
    y = stats_df['y_killed'],
    name = 'Max Killed in MCI'
)
data = [trace3]
layout = go.Layout(
    barmode='stack',
    annotations=annotate(stats_df, 'y_killed')
)

fig2 = go.Figure(data=data, layout=layout)
py.iplot(fig2, filename='US Gun Violence')