In [73]:
import numpy as np
import pandas as pd
import plotly
import plotly.graph_objs as go

### Read data and display one row of data

The data dates back to 1966 up to 2016.

In [63]:
df = pd.read_csv("mass_shooting_events_stanford_msa_release_06142016.csv")
df.dropna(how='all', inplace=True)

In [9]:
df.iloc[0,]

CaseID                                                                                  1
Title                                                       University of Texas at Austin
Location                                                                    Austin, Texas
City                                                                               Austin
State                                                                               Texas
Latitude                                                                          30.1989
Longitude                                                                        -97.8442
Number of Victim Fatalities                                                            16
Total Number of Fatalities                                                             17
Number of Victims Injured                                                              32
Total Number of Victims                                                                48
Descriptio

### 1. Interactive US map of mass shoot. Description as hover text. Color represents number of victims

We will include description of the mass shoot as hover text. However, the description is pretty long to fit in one line. 

So we created here a helper function to add 'br' (break) every ten words in the description.

In [64]:
def split_line(text):
    word_list = text.split(' ')
    n = round(len(word_list)/10)
    for i in range(1, n+1):
        word_list.insert(i*10, '<br>')
    return ' '.join(word_list)

Plot the world map

In [65]:
df['text'] = df['Description'].apply(split_line)

scl = [ [0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
    [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"] ]

data = [ dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df['Longitude'],
        lat = df['Latitude'],
        text = df['text'],
        mode = 'markers',
        marker = dict(
            size = 8,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            symbol = 'square',
            line = dict(
                width=1,
                color='rgba(102, 102, 102)'
            ),
            colorscale = scl,
            cmin = 0,
            color = df['Total Number of Victims'],
            cmax = df['Total Number of Victims'].max(),
            colorbar=dict(
                title="Number of Victims"
            )
        ))]

layout = dict(
        title = 'Mass Shoots of most Victims <br>(Hover for Description)',
        colorbar = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 0.5,
            subunitwidth = 0.5
        ),
    )

fig = dict( data=data, layout=layout )
plotly.offline.plot( fig, validate=False, filename='worldmap.html' )

'file:///Users/QIAN/Documents/dataVisualization/project_proposal/beta/worldmap.html'

### 2. Interactive radar graph of motives of shooters. Number represents number of mass shoots

Display the frequency of different motives behind each mass shoot

In [68]:
df['Possible Motive - General'].str.lower().value_counts().sort_values(ascending=False)

unknown                            84
social dispute                     37
mental illness                     33
multiple motives                   32
domestic dispute                   30
terminated/denied/reprimanded      22
political/religious ideals         11
financial dispute                  10
rejection                           7
legal dispute                       6
race                                6
failure                             6
harassment                          5
expulsion/suspension                5
financial difficulties              5
drug use                            3
drug use/financial difficulties     2
robbery                             1
gender                              1
Name: Possible Motive - General, dtype: int64

I'm only going to include motives that are linked to 5+ mass shoots in the radar plot.

In [75]:
data = [go.Scatterpolar(
  r = [84, 37, 33, 30, 17, 40, 6, 11, 6],
  theta = ['Unknown', 'Social Dispute','Mental Illness', 'Domestic Dispute', 'Financial Dispute/Difficulties', 'Suspension/Reprimanded/Failure/Rejection', 'Legal Dispute','Political/religious Ideals', 'Drug Use', 'Race'],
  fill = 'toself'
)]

layout = go.Layout(
    title='Possible Motives for shooters and number of mass shoots caused',
    polar = dict(
        radialaxis = dict(
          visible = True,
          range = [0, 90]
        )
    ),
    showlegend = False
)

fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, filename = "radar.html")

'file:///Users/QIAN/Documents/dataVisualization/project_proposal/beta/radar.html'