In [11]:
import pandas as pd
import numpy as np
import datetime

import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [35]:
marker_color = {
    "Black":"#fd7f6f",
    "Not known":"#7eb0d5",
    "White": "#b2e061",
    "Hispanic":"#bd7ebe",
    "American Indian":"#ffb55a",
    "Asian": "#ffee65",
}

color_dark = '#000000'
color_light = '#FFFFFF'
color_dark_accent = '#14213D'
color_light_accent = '#E5E5E5'
color_highlight = '#FCA311'
color_dark_accent_2 = '#3d3d3d'

In [13]:
def convert_timestring_to_time(date_string):
    if date_string is np.nan:
        return date_string
    idx = date_string.find('M')
    if( idx ==-1):
        return datetime.datetime.strptime(date_string, '%H:%M')
    idx -=2
    if date_string[idx] == ' ':
        return datetime.datetime.strptime(date_string, '%I:%M %p')
    return datetime.datetime.strptime(date_string, '%I:%M%p')

In [14]:
df = pd.read_csv("../../data/school_shooting_data.csv")
df['time'] = df['time'].apply(lambda x: convert_timestring_to_time(x))
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')

df = df[df['long'].notna()]
df.reset_index(drop=True, inplace=True)

In [15]:
races= {'a': 'Asian', 'ai':'American Indian','b':'Black','h':'Hispanic','w':'White'}
def assign_race(race):
    if race is np.nan:
        return "Not known"
    if race in races:
        return races[race]
    return "Not known"

In [16]:
df['gender_shooter1'].isna().sum()
# assign race from dictionary
df['race_ethnicity_shooter'] = df['race_ethnicity_shooter1'].apply(lambda x: assign_race(x))
# reove cells where age of the shooter was not known
df = df[df['age_shooter1'].notna()]
# convert age to int
df['age_shooter1'] = df['age_shooter1'].astype(int)
# separate data for men and women
women = df[df['gender_shooter1'] == 'f']
men = df[df['gender_shooter1'] == 'm']
# aggregate women
women_ethnicity = women.loc[:,['age_shooter1','race_ethnicity_shooter']].value_counts(dropna=False).unstack() 
women_ethnicity = women_ethnicity.fillna(0)
women_ethnicity = women_ethnicity.reset_index()
#aggregate men
men_ethnicity = men.loc[:,['age_shooter1','race_ethnicity_shooter']].value_counts(dropna=False).unstack() 
men_ethnicity = men_ethnicity.fillna(0)
men_ethnicity = men_ethnicity.reset_index()

In [131]:
fig = make_subplots(rows=1, cols=2, column_widths=[0.3, 0.7], specs=[[{"type": "bar"}, {"type": "bar"}]], shared_xaxes=False,
                    shared_yaxes=True, horizontal_spacing=0)

base = np.zeros(shape=women_ethnicity.shape[0])
base = pd.Series(base)
for col in women_ethnicity.columns[1:]:
    fig.append_trace(go.Bar(name="Ethincity "+col,
                            x=women_ethnicity[col],
                            y=women_ethnicity['age_shooter1'],
                            hovertemplate='Ethinicity: ' + col + '<br>Count: ' + women_ethnicity[col].map(
                                '{:,.0f}'.format) + '<br>Age: ' + women_ethnicity['age_shooter1'].map('{:,.0f}'.format),
                            offsetgroup=0,
                            orientation='h',
                            width=0.7,
                            marker_color=marker_color[col],
                            showlegend=False,
                            base=base),
                     1, 1)  # 1,1 represents row 1 column 1 in the plot grid
    base = base.add(women_ethnicity[col])


base = np.zeros(shape=men_ethnicity.shape[0])
base = pd.Series(base)
for col in men_ethnicity.columns[1:]:
    fig.append_trace(go.Bar(name=col,
                            x=men_ethnicity[col],
                            y=men_ethnicity['age_shooter1'],
                            hovertemplate='Ethinicity: ' + col + '<br>Count: ' + men_ethnicity[col].map(
                                '{:,.0f}'.format) + '<br>Age: ' + men_ethnicity['age_shooter1'].map('{:,.0f}'.format),
                            offsetgroup=1,
                            orientation='h',
                            width=0.7,
                            marker_color=marker_color[col],
                            showlegend=True,
                            base=base,
                            
                            ),
                     1, 2)  # 1,1 represents row 1 column 1 in the plot grid
    base = base.add(men_ethnicity[col])

fig.update_xaxes(
    showticklabels=True,
    # title_text="Female",

    row=1,
    col=1,
    range=[20, 0]
)
fig.update_xaxes(
    showticklabels=True,
    # title_text="Male",
    row=1,
    col=2
)

fig.update_layout(
    title_text="Distribution of shooters vs age with ethnicity distinguished<br><br><sup>Female | Male</sup>",
    title_x=.5,
    title_y=.96,
    width=630,
    height=1000,
    xaxis1={'side': 'top'},
    xaxis2={'side': 'top'},
    barmode='relative'
)

fig.update_layout(
    legend=dict(
        yanchor="top",
        y=1,
        xanchor="right",
        x=1
    ),
    xaxis=dict(
        showline=True,
        mirror=True,
        linecolor="#000000",
    ),
    xaxis2=dict(
        showline=True,
        mirror=True,
        linecolor="#000000",
    ),
    yaxis=dict(
        showline=True,
        mirror=True,
        linecolor="#000000",
        title=dict(
            text="Age"
        )
    ),
    yaxis2=dict(
        showline=True,
        mirror=True,
        linecolor="#000000"
    ),
    plot_bgcolor=color_light,
    font=dict(family='Rubik', size=12, color=color_dark),
    modebar=dict(
        orientation='v',
    )
)

fig.update_traces(
    marker_line_color=color_dark_accent_2,
)

fig.show()


In [130]:
chart_div = plotly.offline.plot(fig, output_type='div')
with open("../docs/_includes/shooters.html", 'w') as f:
    f.write(chart_div)