In [1]:
import numpy as np
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.tools as tls

In [35]:
tls.set_credentials_file(username='qli42' , api_key = 'cIWK7hxrZMlZlG4yeMoe')

### Read data and display one row of data

The data dates back to 1966 up to 2016.

In [2]:
df = pd.read_csv("mass_shooting_events_stanford_msa_release_06142016.csv")
df.dropna(how='all', inplace=True)

In [107]:
table = []
for i in range(len(df.columns)):
    table.append([df.columns[i], df.loc[0,][i]])

In [82]:
from plotly.figure_factory import create_table
table = create_table(table)
py.iplot(table, filename='data-preview')

### 1. Interactive US map of mass shoot. Description as hover text. Color represents number of victims

We will include description of the mass shoot as hover text. However, the description is pretty long to fit in one line. 

So we created here a helper function to add 'br' (break) every ten words in the description.

In [6]:
def split_line(text):
    word_list = text.split(' ')
    n = round(len(word_list)/10)
    for i in range(1, n+1):
        word_list.insert(i*10, '<br>')
    return ' '.join(word_list)

Plot the world map

In [12]:
df['text'] = df['Description'].apply(split_line)

scl = [ [0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
    [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"] ]

data = [ dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = df['Longitude'],
        lat = df['Latitude'],
        text = df['text'],
        mode = 'markers',
        marker = dict(
            size = 8,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            symbol = 'square',
            line = dict(
                width=1,
                color='rgba(102, 102, 102)'
            ),
            colorscale = scl,
            cmin = 0,
            color = df['Total Number of Victims'],
            cmax = df['Total Number of Victims'].max(),
            colorbar=dict(
                title="Number of Victims"
            )
        ))]

layout = dict(
        title = 'Mass Shoots of most Victims <br>(Hover for Description)',
        colorbar = True,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 0.5,
            subunitwidth = 0.5
        ),
    )

fig = dict( data=data, layout=layout )
py.iplot(fig, validate=False, filename='map')
#plotly.offline.plot( fig, validate=False, filename='map.html' )

### 2. Interactive radar graph of motives of shooters. Number represents number of mass shoots

Display the frequency of different motives behind each mass shoot

In [8]:
df = pd.read_csv("mass_shooting_events_stanford_msa_release_06142016.csv")
df.dropna(how='all', inplace=True)

In [15]:
from datetime import datetime

In [29]:
def get_year(date):
    year = date.split('/')[2]
    if len(year) == 4:
        return int(year)
    elif len(year) == 2:
        yr = int(year)
        if yr > 18:
            return yr + 1900
        else:
            return yr + 2000
   # datetime.strptime('11/12/72', '%m/%d/%y').year

In [33]:
df['Year'] = df.Date.apply(get_year)

In [90]:
df[df.Year ==2013]['Possible Motive - General'].str.lower().value_counts().sort_values(ascending=False)

mental illness                   4
domestic dispute                 3
legal dispute                    2
multiple motives                 1
social dispute                   1
harassment                       1
political/religious ideals       1
terminated/denied/reprimanded    1
unknown                          1
financial dispute                1
Name: Possible Motive - General, dtype: int64

In [89]:
df[df.Year ==2014]['Possible Motive - General'].str.lower().value_counts().sort_values(ascending=False)

social dispute                4
mental illness                3
multiple motives              2
rejection                     1
legal dispute                 1
political/religious ideals    1
unknown                       1
Name: Possible Motive - General, dtype: int64

In [88]:
df[df.Year ==2015]['Possible Motive - General'].str.lower().value_counts().sort_values(ascending=False)

unknown                            23
domestic dispute                   15
social dispute                      7
multiple motives                    3
drug use                            2
rejection                           2
political/religious ideals          2
drug use/financial difficulties     2
race                                2
financial difficulties              2
financial dispute                   2
terminated/denied/reprimanded       1
mental illness                      1
gender                              1
Name: Possible Motive - General, dtype: int64

In [87]:
df[df.Year ==2016]['Possible Motive - General'].str.lower().value_counts().sort_values(ascending=False)

unknown             48
social dispute      13
domestic dispute     5
robbery              1
drug use             1
multiple motives     1
Name: Possible Motive - General, dtype: int64

#### Plot the radar plot, where user can select which year to present

In [106]:
data = [
    go.Scatterpolar(
        r = [4, 1, 2, 3, 1, 0, 0, 1, 2], #[84, 37, 33, 30, 17, 40, 6, 11, 6],
        theta = ['Mental Illness','Multiple Motives', 'Social Dispute/Rejection/Harassment', 'Domestic Dispute', 'Financial Dispute/Difficulties',  'Drug Use', 'Race', 'Political/religious Ideals','Legal Dispute'],
        fill = 'toself',
        name = 'Year 2013'),
    go.Scatterpolar(
        r = [3, 2, 5,0, 0, 0, 0, 1, 1],
        theta = ['Mental Illness', 'Multiple Motives','Social Dispute/Rejection/Harassment', 'Domestic Dispute','Financial Dispute/Difficulties', 'Drug Use', 'Race', 'Political/religious Ideals','Legal Dispute'],
        fill = 'toself',
        name = 'Year 2014'),
    go.Scatterpolar(
        r = [1, 3, 8, 15, 5, 2, 2, 1, 0],
        theta = ['Mental Illness', 'Multiple Motives','Social Dispute/Rejection/Harassment','Domestic Dispute', 'Financial Dispute/Difficulties', 'Drug Use', 'Race', 'Political/religious Ideals', 'Legal Dispute'],
        fill = 'toself',
        name = 'Year 2015'),
    go.Scatterpolar(
        r = [0, 1, 13, 5, 0, 1, 0, 0, 0],
        theta = ['Mental Illness', 'Multiple Motives','Social Dispute/Rejection/Harassment','Domestic Dispute', 'Financial Dispute/Difficulties', 'Drug Use', 'Race', 'Political/religious Ideals','Legal Dispute'],
        fill = 'toself',
        name = 'Year 2016')
       ]

sliders = [dict(
    active = 4,
    currentvalue = {"prefix": "Frequency: "},
    pad = {"t": 50},
    steps = data
)]

layout = dict(sliders=sliders)

fig = dict(data=data, layout=layout)

py.iplot(fig, validate=False, filename='radarplot/multiple')
#plotly.offline.plot(fig, filename = "radar.html")