# Exploring Bokeh
Sean Wade

In [1]:
import pickle
import os
import numpy as np
import pandas as pd
from bokeh.plotting import figure, output_notebook, output_file, show, Figure
from bokeh.models import HoverTool, ColumnDataSource, WMTSTileSource
from pyproj import Proj, transform
import warnings
from __future__ import division
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline

## Problem 1

In [3]:
# Import the data
path = './fars_data/Accidents/'
accidents_list = []
for i, name in enumerate(os.listdir(path)):
    with open(os.path.join(path, name)) as inFile:
        accidents_list.append(pickle.load(inFile))
        
path = './fars_data/Person/'
person_list = []
for i, name in enumerate(os.listdir(path)):
    with open(os.path.join(path, name)) as inFile:
        person_list.append(pickle.load(inFile))
        
path = './fars_data/Vehicle/'
vehicle_list = []
for i, name in enumerate(os.listdir(path)):
    with open(os.path.join(path, name)) as inFile:
        vehicle_list.append(pickle.load(inFile))
        
with open('Pickle/id_to_state.pickle') as pickelObj:
    id_to_state = pickle.load(pickelObj)

with open('Pickle/us_states.pickle') as pickelObj:
    us_states = pickle.load(pickelObj)

In [4]:
a_list = []

for i in xrange(len(accidents_list)):
    a, v = accidents_list[i], vehicle_list[i]
    
    # Remove unecissary columns
    v = v[["ST_CASE", "SPEEDREL"]]
    a = a[["ST_CASE", "STATE", "LATITUDE", "LONGITUD", "FATALS", "HOUR", "DAY", "MONTH", "YEAR", "DRUNK_DR"]]

    # drop null vals
    a["LONGITUD"] = a["LONGITUD"].replace([777.7777, 888.8888, 999.9999], np.nan)
    a["LATITUDE"] = a["LATITUDE"].replace([77.7777, 88.8888, 99.9999], np.nan)
    a = a.dropna()

    # Write state string
    a["STATE"] = a["STATE"].replace(0, 49)
    a["STATE"] = [id_to_state[x] for x in a["STATE"]]

    # Combine accedents and vehical dataFrames
    v["SPEEDREL"] = np.where((v["SPEEDREL"] >= 8), 0, v["SPEEDREL"])
    c = pd.merge(a, v, on="ST_CASE")

    # Create speeding column
    a["SPEEDING"] = c.groupby("ST_CASE").sum()["SPEEDREL"].values
    a["SPEEDING"] = a["SPEEDING"] != 0
    a["SPEEDING"] = a["SPEEDING"].astype(int)
    a_list.append(a)
    
accidents = pd.concat(a_list)

In [5]:
accidents.head()

Unnamed: 0,ST_CASE,STATE,LATITUDE,LONGITUD,FATALS,HOUR,DAY,MONTH,YEAR,DRUNK_DR,SPEEDING
0,10001,AL,32.641064,-85.354692,1,4,15,1,2010,1,0
1,10002,AL,31.430447,-86.956694,1,6,11,1,2010,0,0
2,10003,AL,30.691631,-88.085778,1,15,14,1,2010,0,1
3,10004,AL,33.8687,-86.291164,1,1,21,1,2010,0,0
4,10005,AL,33.309742,-86.787222,1,6,4,1,2010,0,0


In [6]:
print "Speeding: %d" % accidents["SPEEDING"].sum()
print "Length: %d" % len(accidents)

Speeding: 44223
Length: 149698


## Problem 2

In [7]:
def convert(longitudes, latitudes):
    """Converts latlon coordinates to meters.
    Inputs:
    longitudes (array-like) : array of longitudes
    latitudes (array-like) : array of latitudes
    Example:
    x,y = convert(accidents.LONGITUD, accidents.LATITUDE)
    """
    from_proj = Proj(init="epsg:4326")
    to_proj = Proj(init="epsg:3857")
    
    x_vals = []
    y_vals = []
    for lon, lat in zip(longitudes, latitudes):
        x, y = transform(from_proj, to_proj, lon, lat)
        x_vals.append(x)
        y_vals.append(y)
    return x_vals, y_vals

accidents["x"], accidents["y"] = convert(accidents.LONGITUD, accidents.LATITUDE)

## Problem 3


In [8]:
d_list = []
for i in xrange(len(person_list)):
    p = person_list[i]
    v = vehicle_list[i]
    p = p[["ST_CASE", "VEH_NO", "PER_TYP", "AGE", "DRINKING"]]
    v = v[["SPEEDREL", "ST_CASE", "VEH_NO",]]
    d = pd.merge(p, v, on=["ST_CASE", "VEH_NO"])
    d["YEAR"] = 2010
    d_list.append(d)
    
drivers = pd.concat(d_list)

In [9]:
drivers.head()

Unnamed: 0,ST_CASE,VEH_NO,PER_TYP,AGE,DRINKING,SPEEDREL,YEAR
0,10001,1,1,51,9,0,2010
1,10001,1,2,999,8,0,2010
2,10002,1,1,44,0,0,2010
3,10003,1,1,27,9,1,2010
4,10003,2,1,45,0,0,2010


## Problem 4

In [10]:
fig = Figure(plot_width=1100, plot_height=650,
    x_range=(-13000000, -7000000), y_range=(2750000, 6250000),
    tools=["wheel_zoom", "pan"], active_scroll="wheel_zoom", webgl=True)

fig.axis.visible = False

STAMEN_TONER_BACKGROUND = WMTSTileSource(
url='http://tile.stamen.com/toner-background/{Z}/{X}/{Y}.png',
attribution=(
'Map tiles by <a href="http://stamen.com">Stamen Design</a>, '
'under <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a>.'
'Data by <a href="http://openstreetmap.org">OpenStreetMap</a>, '
'under <a href="http://www.openstreetmap.org/copyright">ODbL</a>')
)
fig.add_tile(STAMEN_TONER_BACKGROUND)

show(fig)

## Problem 5

In [11]:
state_xs = [us_states[code]["lons"] for code in us_states]
state_ys = [us_states[code]["lats"] for code in us_states]

In [12]:
x_vals, y_vals = [], []
x_vals, y_vals = convert(state_xs, state_ys)

In [13]:
speeding_accidents = accidents[accidents["SPEEDING"] != 0]
drinking_accidents = accidents[accidents["DRUNK_DR"] != 0]
other_accidents = accidents[(accidents["DRUNK_DR"] == 0) & (accidents["SPEEDING"] == 0)]

In [14]:
total_acc = [len(accidents[accidents['STATE']==state_code]) for state_code in us_states]
total_drunk = [len(drinking_accidents[drinking_accidents["STATE"]==state_code]) for state_code in us_states]
total_speeding = [len(speeding_accidents[speeding_accidents["STATE"]==state_code]) for state_code in us_states]

perc_drunk = [a / b for a, b in zip(total_drunk, total_acc)]
perc_speeding = [a / b for a, b in zip(total_speeding, total_acc)]

# convert to strings for tooltip
total = [str(x) for x in total_acc]
perc_drunk = [str(x*100)[:4]+"%" for x in perc_drunk]
perc_speeding = [str(x*100)[:4]+"%" for x in perc_speeding]

border_source = ColumnDataSource(dict(
        xs=x_vals, 
        ys=y_vals,
        total=total_acc,
        state = us_states.keys(),
        perc_drunk = perc_drunk,
        perc_speeding = perc_speeding
    ))

In [15]:
states = fig.patches("xs", "ys", source=border_source, alpha=.5, line_color="red", hover_color="green", hover_alpha=.8, hover_line_color='black')

In [16]:
show(fig)

## Problem 6

In [17]:
speeding_source = ColumnDataSource(dict(
    x=speeding_accidents['x'],
    y=speeding_accidents['y'] ))

drinking_source = ColumnDataSource(dict(
    x=drinking_accidents['x'],
    y=drinking_accidents['y'] ))

other_source = ColumnDataSource(dict(
    x=other_accidents['x'],
    y=other_accidents['y'] ))

fig.circle('x', 'y', source=speeding_source, fill_color="red", size=2, line_color="red")
fig.circle('x', 'y', source=drinking_source, fill_color="green", size=2, line_color="green")
fig.circle('x', 'y', source=other_source, fill_color="blue", size=2, line_color="blue")

<bokeh.models.renderers.GlyphRenderer at 0x14fc03e50>

In [18]:
show(fig)

## Problem 7
Done above...

## Problem 8

Done in problem 5...

## Problem 9

In [19]:
fig.add_tools(HoverTool(renderers=[states], tooltips=[("State", "@state"), ("Total", "@total"), ("Drunk Percent", "@perc_drunk"), ("Speeding Percent",  "@perc_speeding")]))

In [20]:
show(fig)

## Problem 10
this is the bokeh_problem10.py file