In [276]:
import pandas as pd
import numpy as np
import datetime
import math
import calendar

import folium

import matplotlib.pyplot as plt
import matplotlib as mpl
import plotly
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline

In [247]:
def convert_timestring_to_time(date_string):
    if date_string is np.nan:
        return date_string
    idx = date_string.find('M')
    if( idx ==-1):
        return datetime.datetime.strptime(date_string, '%H:%M')
    idx -=2
    if date_string[idx] == ' ':
        return datetime.datetime.strptime(date_string, '%I:%M %p')
    return datetime.datetime.strptime(date_string, '%I:%M%p')

In [248]:
df = pd.read_csv("./school-shootings-data.csv")
df['time'] = df['time'].apply(lambda x: convert_timestring_to_time(x))
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')

In [240]:
df = df[df['long'].notna()]
df.reset_index(drop=True, inplace=True)

# Aggregate by time

In [263]:
# hour
df['hour'] = df['time'].dt.hour
shootings_per_hour = df.loc[:, ['hour']]
shootings_per_hour = shootings_per_hour.groupby(['hour']).aggregate(count=('hour', 'count')).reset_index()
# shootings_per_hour.head(20)
fig = px.bar(shootings_per_hour, x="hour", y="count", height=400)
# fig.for_each_trace(lambda t: t.update(name = labels[t.name],
#                                       legendgroup = labels[t.name],
#                                       hovertemplate = t.hovertemplate.replace(t.name, labels[t.name])
#                                      )
                #   )
fig.update_traces(hovertemplate='Hour: %{x} <br> Incidents: %{y}')
fig.update_layout(legend_title="Incidents", 
                    xaxis_range=[0,23],
                    xaxis = dict(
                    tickmode = 'linear',
                    tick0 = 0,
                    dtick = 1
                ),
                    xaxis_title="Hour", 
                    yaxis_title="Count")
fig.show()

In [267]:
# month
df['month'] = df['date'].dt.month
shootings_per_month = df.loc[:, ['month']]
shootings_per_month = shootings_per_month.groupby(['month']).aggregate(count=('month', 'count')).reset_index()
fig = px.bar(shootings_per_month, x="month", y="count", height=400)
fig.update_traces(hovertemplate='Month: %{x} <br> Incidents: %{y}')
fig.update_layout(legend_title="Incidents", 
                    # xaxis_range=[0,13],
                    xaxis = dict(
                    tickmode = 'linear',
                    tick0 = 1,
                    dtick = 1
                ),
                    xaxis_title="Month", 
                    yaxis_title="Count")
fig.show()

In [285]:
# weekday
cat = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
# df['weekday'] = df['date'].dt.day_of_week
shootings_per_weekday = df.loc[:, ['day_of_week']]
shootings_per_weekday = shootings_per_weekday.groupby(['day_of_week']).aggregate(count=('day_of_week', 'count')).reindex(cat)
shootings_per_weekday['weekday'] = shootings_per_weekday.index

fig = px.bar(shootings_per_weekday, x="weekday", y="count", height=400)
fig.update_traces(hovertemplate='Weekday: %{x} <br> Incidents: %{y}')
fig.update_layout(  xaxis_title="Weekday", 
                    yaxis_title="Count")
fig.show()

# Shooter characteristic

In [327]:
races= {'a': 'Asian', 'ai':'American Indian','b':'Black','h':'Hispanic','w':'White'}
def assign_race(race):
    if race in races:
        return races[race]
    return race

In [333]:
df['gender_shooter1'].isna().sum()
df['race_ethnicity_shooter1'] = df['race_ethnicity_shooter1'].apply(lambda x: assign_race(x))
women = df[df['gender_shooter1'] == 'f']
men = df[df['gender_shooter1'] == 'm']
# aggregate women
# women_ethnicity = women.loc[:,['age_shooter1','race_ethnicity_shooter1']].value_counts(dropna=False).unstack() 
# women_ethnicity = women_ethnicity.reset_index()
# women_ethnicity.head(20)
#aggregate men
men_ethnicity = men.groupby(['age_shooter1', 'race_ethnicity_shooter1'], dropna=False).agg(ethnicity=('race_ethnicity_shooter1', 'count'), age=('age_shooter1', 'count')).reset_index()
men_ethnicity.head(20)

Unnamed: 0,age_shooter1,race_ethnicity_shooter1,ethnicity,age
0,6.0,Black,1,1
1,6.0,,0,2
2,7.0,White,1,1
3,8.0,,0,3
4,9.0,White,1,1
5,11.0,,0,2
6,12.0,Hispanic,2,2
7,12.0,White,2,2
8,12.0,,0,3
9,13.0,American Indian,1,1


In [337]:
from plotly.subplots import make_subplots

# fig = make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=False,
#                     shared_yaxes=True, horizontal_spacing=0)

# # fig = px.bar(shootings_per_year, x="year", y=["sum_killed", "sum_injured"], height=400)
# fig.append_trace(px.bar(women_ethnicity,x='age_shooter1',
#                      y=['Black', 'White', 'NaN'], 
#                     #  text=df["Male"].map('{:,.0f}'.format), #Display the numbers with thousands separators in hover-over tooltip 
#                     #  textposition='inside',
#                      orientation='h', 
#                      width=0.7, 
#                     #  showlegend=False, 
#                     #  marker_color='#4472c4'), 
#  ), 1, 1) # 1,1 represents row 1 column 1 in the plot grid

# fig.append_trace(go.Bar(x=df['Female'],
#                      y=df['Industry'], 
#                      text=df["Female"].map('{:,.0f}'.format),
#                      textposition='inside',
#                      orientation='h', 
#                      width=0.7, 
#                      showlegend=False, 
#                      marker_color='#ed7d31'), 
#                      1, 2) # 1,2 represents row 1 column 2 in the plot grid
# women_ethnicity.reset_index()
fig = px.bar(men_ethnicity, x="age_shooter1",
                     y="ethnicity", color='race_ethnicity_shooter1',
                    #  text=df["Male"].map('{:,.0f}'.format), #Display the numbers with thousands separators in hover-over tooltip 
                    #  textposition='inside',
                     orientation='h', 
                    #  width=0.7,
                    #  showlegend=False, 
                    #  marker_color='#4472c4'), 
 )
fig.show()

In [178]:
def generate_color(killed, injured):
    if killed ==0 and injured==0:
        return 'blue'
    if killed ==0 :
        return 'gray'
    return 'crimson'

def calculate_circle_radius(max_affected, current_affected):
    return math.log(current_affected/ max_affected * 100 + 1.) * 2 + 4

In [None]:
max_killed_and_injured = max(df['killed']) + max(df['injured'])
# Make an empty map
m = folium.Map(location=[40,-100], tiles="OpenStreetMap", zoom_start=4.5)
for i in range(0,df.shape[0]):
   killed_and_injured = df.iloc[i]['killed'] + df.iloc[i]['injured']
   radius = calculate_circle_radius(max_killed_and_injured, killed_and_injured)
   folium.CircleMarker(
      location=[df.iloc[i]['lat'], df.iloc[i]['long']],
      popup=df.iloc[i]['school_name'],
      radius=radius,
      color=generate_color(df.iloc[i]['killed'], df.iloc[i]['injured']),
      fill=True,
      fill_color=generate_color(df.iloc[i]['killed'], df.iloc[i]['injured'])
   ).add_to(m)

# Show the map
m


In [142]:
accidents_per_year = df.groupby(['year']).agg(
     sum_killed = ('killed','count'),
     sum_injured = ('injured','sum'),
     sum=('year', 'count')
     ).reset_index()
accidents_per_year.head()

Unnamed: 0,year,sum_killed,sum_injured,sum
0,1999,7,7,7
1,2000,12,12,12
2,2001,13,13,13
3,2002,5,5,5
4,2003,12,12,12


# Shootings per year

In [320]:

shootings_per_year = df.groupby(['year']).agg(
     sum_killed = ('killed','sum'),
     sum_injured = ('injured','sum'),
     sum=('year', 'count')
     ).reset_index()
shootings_per_year.head()


Unnamed: 0,year,sum_killed,sum_injured,sum
0,1999,14,35,7
1,2000,4,3,12
2,2001,5,22,13
3,2002,1,7,5
4,2003,5,8,12


In [321]:
labels={"sum_killed": "Killed", "sum_injured":"Injured"}
fig = px.bar(shootings_per_year, x="year", y=["sum_killed", "sum_injured"], height=400)
fig.for_each_trace(lambda t: t.update(name = labels[t.name],
                                      legendgroup = labels[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, labels[t.name])
                                     )
                  )
fig.update_traces(hovertemplate='Year: %{x} <br> Victimes: %{y}')
fig.update_layout(legend_title="Victims", xaxis_title="Year", yaxis_title="Count")
fig.show()

In [43]:
fig = go.Figure(
    data = [
        go.Bar(x=shootings_per_year['year'], y=shootings_per_year['sum_injured'], offsetgroup=0, name='People injured', hovertemplate='Year: %{x} <br> Injured: %{y}', hovertextsrc=''),
        go.Bar(x=shootings_per_year['year'], y=shootings_per_year['sum_killed'], offsetgroup=0, base=shootings_per_year['sum_injured'],  name='People killed', hovertemplate='Year: %{x} <br> Killed: %{y}'),
        go.Line(x=shootings_per_year['year'], y=shootings_per_year['sum'], name='All incidents', hovertemplate='Year: %{x} <br> Incidents count: %{y}'),
    ]
)
fig.update_layout(width=1000, xaxis_title="Year", yaxis_title="Count")
fig.show()  


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




# Plot student's ethnicity distribution in schools

In [83]:
def plot_student_race_distribution(races, column_names, school_name, file_path):
    races = races.fillna(0)
    fig = px.pie(values=races.values[0], names=column_names, title='Population of '+school_name, width=700)
    fig.update_layout(legend_title="Ethnicity")
    fig.update_traces(hovertemplate='Ethnicity: %{label} <br>  Count: %{value}')
    plotly.offline.plot(fig, filename=file_path)

In [127]:
df.tail()
df.reset_index(drop=True, inplace=True)

In [128]:
column_names = ['White', 'Black', 'Hispanic', 'Asian',
       'American/Indian/Alaska native', 'Hawaiian native/Pacific islander']
df['ethnicity_diagram_path'] = np.nan
for i in range(0,df.shape[0]):
    races = df.loc[[i], ['white', 'black', 'hispanic', 'asian',
       'american_indian_alaska_native', 'hawaiian_native_pacific_islander']]
    races = races._convert(numeric=True)
    schoolname = df.iloc[i]['school_name']
    filepath = './images/ethinicity_diagrams/' + schoolname.replace('/', '_') +'_ethinicity.html'
    plot_student_race_distribution(races, column_names, df.iloc[i]['school_name'], filepath)
    df.at[i,'ethnicity_diagram_path']= filepath

# Map with student's ethnicity in school

In [130]:
# Make an empty map
m = folium.Map(location=[40,-100], tiles="OpenStreetMap", zoom_start=4.5)
for i in range(0,df.shape[0]):
   html="""
    <iframe src=\"""" + df['ethnicity_diagram_path'][i] + """\" width="850" height="400"  frameborder="0">    
    """
   diagram = folium.Popup(folium.Html(html, script=True))
   folium.Circle(
      location=[df.iloc[i]['lat'], df.iloc[i]['long']],
      radius=6000,
      popup=diagram,
      color=generate_color(df.iloc[i]['killed'], df.iloc[i]['injured']),
      fill=True,
      fill_color=generate_color(df.iloc[i]['killed'], df.iloc[i]['injured'])
   ).add_to(m)

# Show the map
# m
m.save('index.html')
