In [3]:
import numpy as np
import pandas as pd
from subprocess import call
from bokeh.plotting import figure
from bokeh.layouts import column, layout, row
from bokeh.charts import Bar, Line, TimeSeries,BoxPlot, Histogram, Scatter, output_file, show, save, output_notebook
from bokeh.charts.attributes import cat, color
from bokeh.charts.operations import blend
from bokeh.client import push_session

from bokeh.io import output_file, show
from bokeh.models import (
    CustomJS, Slider, RangeSlider, ColumnDataSource, WidgetBox, Paragraph, HoverTool, Div, Plot,
    GMapPlot, GMapOptions, ColumnDataSource, Circle, CircleCross, Range1d, PanTool, Line, 
    WheelZoomTool, BoxSelectTool, ResetTool, ZoomInTool, ZoomOutTool
)
from bokeh.models.widgets import Toggle
from bokeh.models.layouts import Column
from bokeh.models import BasicTicker
from bokeh.driving import cosine
from bokeh.plotting import figure, curdoc
from bokeh.models.glyphs import ImageURL
import pickle
#from subset_data import subset_all, subset_day_night

output_notebook()


In [4]:
path = 'C:/Users/Samir Patel/Desktop/DATA_515/Final_Project/Ax-Wx/data/merged.p'
raw_data = pd.DataFrame(pickle.load(open(path, 'rb')))

#List of contributing factors of interest to subset out of the dataset.
factors = ['Driver Not Distracted', 'Follow Too Closely', 'Under Influence of Alcohol',
          'Unknown Driver Distraction']

In [5]:
def addnewcategories(raw_data):
    """
    Function which adds new columns for rain bin, severity of accident and unique id for accident.
    
    :param data: DataFrame
        A Pandas dataframe input of raw data used to create new columns for visualization.
    """
    
    #Column to bin by precip criterion
    raw_data['rain_bin'] = ['< 0.05 in' if x < 0.05 else 'greater than 0.25' if x > 0.25 else '0.05 - 0.25 in' for x in raw_data['wx_PrecipRate_inhr_last_1hr']]

    #Column to classify severe injuries
    inj_list = ['Dead on Arrival', 'Died at Hospital', 'Dead at Scene', 'Serious Injury', 'Evident Injury']
    raw_data['severe_ax'] = ['yes' if x in inj_list else 'no' for x in raw_data['driver_injury']]
    
    #Column to create unique accident id
    raw_data['accident_id'] = raw_data['date'].astype(str) + raw_data['time_of_day'].astype(str) + raw_data['lat'].astype(str) + raw_data['lon'].astype(str)

    return raw_data


def chart_data_split(data, factors):
    """
    Function to subset the data for each chart type for both day and night categories.
    
    :param data: DataFrame
        A Pandas dataframe input to subset on.
    :param factors: list
        A list of accident types from the 'contributing_factor_' columns to subset the data.
    :return: DataFrame
        The output is three DataFrame objects, for each chart type.

    """
    
    #subset based off factors
    ix = data['contributing_factor_1'].isin(factors)
    sub_data = data.loc[ix]
    
    #change to string and sort by totals
    sub_data['contributing_factor_1'] = sub_data['contributing_factor_1'].astype('str')
    sub_data = sub_data.sort_values(by='contributing_factor_1',ascending=False)
    
    #creating dataset for accident totals by type for frequency plot
    crash_type = pd.DataFrame()
    crash_type['total'] = sub_data.groupby(sub_data['contributing_factor_1'], as_index = False).size()
    crash_type = crash_type.reset_index()
    crash_type = crash_type.sort_values(by='total',ascending=False)
    
    #creating dataset for accident totals by date/type for timeseries plot
    crash_typeday = pd.DataFrame()
    crash_typeday['total'] = sub_data.groupby(['contributing_factor_1', 'date'],as_index = False).size()
    crash_typeday = crash_typeday.reset_index()
    #crash_typeday['contributing_factor_1'] = crash_type['contributing_factor_1'].astype(int)
    crash_typeday = crash_typeday.sort_values(by='total',ascending=False)
    crash_typeday
    
    return sub_data, crash_type, crash_typeday

def makebardata(data):
    """
    Function to create subset aggregate data for barplot with accident count
    normalized by # days.
    
    param data: DataFrame
        A Pandas dataframe input of raw dataset.
    """
    ix = data['contributing_factor_1'].isin(factors)
    sub_data = data.loc[ix]

    #change to string and sort by totals
    sub_data['contributing_factor_1'] = sub_data['contributing_factor_1'].astype('str')
    sub_data = sub_data.sort_values(by='contributing_factor_1',ascending=False)

    #creating dataset for accident totals by type for frequency plot
    crash_type = pd.DataFrame()
    crash_type['total'] = sub_data.groupby(['contributing_factor_1', 'rain_bin'], as_index = False).size()
    crash_type = crash_type.reset_index()
    crash_type = crash_type.sort_values(by='total',ascending=False)
    
    crash_typeday = pd.DataFrame()
    crash_typeday['total'] = sub_data.groupby(['contributing_factor_1', 'rain_bin', 'date'],as_index = False).size()
    crash_typeday = crash_typeday.reset_index()
    crash_typeday = crash_typeday.sort_values(by='total',ascending=False)
    day_cnts = pd.DataFrame(crash_typeday.groupby(['contributing_factor_1', 'rain_bin'],as_index = False).size())
    day_cnts = day_cnts.reset_index()
    day_cnts.rename(columns={0: '#days'}, inplace=True)
    day_cnts
    
    data_bar = crash_type.merge(day_cnts, left_on=['contributing_factor_1', 'rain_bin'], right_on=['contributing_factor_1', 'rain_bin'], how='outer')
    data_bar['Accidents Per Day'] = data_bar['total']/data_bar['#days']
    data_bar
    
    return data_bar

In [6]:
def get_logo():
    """
    Function to import AxWx logo and output in Bokeh Plot object to output on dashboard.
    
    :return: Bokeh plot object
        The output is a Bokeh plot object containing the AxWx logo
    """
    
    url = "dashboard_title4.jpg"

    source = ColumnDataSource(dict(
        url = [url],
        x1  = [0],
        y1  = [0],
        w1  = [400],
        h1  = [400],
        x2  = [0],
        y2  = [0],
    ))

    xdr = Range1d(start=-200, end=200)
    ydr = Range1d(start=-200, end=200)

    plot = Plot(
        title=None, x_range=xdr, y_range=ydr, plot_width=1200, plot_height=340,
        h_symmetry=False, v_symmetry=False, min_border=0, toolbar_location=None)

    image1 = ImageURL(url="url", x="x1", y="y1", w="w1", h="h1", anchor="center")
    plot.add_glyph(source, image1)
    plot.border_fill_color = "white"
    plot.outline_line_width = 1
    plot.outline_line_alpha = 0.1
    plot.outline_line_color = "white"
    
    return [plot]

In [7]:
def bar_chart(data_bar):
    """
    Function to create Bokeh bar plots for day and night categories.
    
    :param data_bar: DataFrame
        A Pandas dataframe input of data used to create the bar plot.
    :return: list
        The outputs are a list containing two Bokeh BoxPlot objects
        
    """
    
    b1 = Bar(data_bar, 
            #'Location', 
            values='Accidents Per Day',
            group = 'rain_bin',
            #stack=cat(sort=False),
            label=cat(columns='contributing_factor_1',sort=False),
            color = 'rain_bin',
            title="Accidents by Category and Precipitation Bin",
            xlabel = 'Contributing Factor',
            ylabel = 'Accidents per Day',
            plot_width=700, plot_height = 700)
    
    b1.legend.location = "top_right"
    b1.title.text_font_size = '12pt'
    b1.xaxis.axis_label_text_font_size = '12pt'
    b1.yaxis.axis_label_text_font_size = '12pt'
    b1.axis.major_label_text_font_size = '11pt'


    #show(b1)
    return b1

In [8]:
def create_map(data, title):
    """
    Function to create Bokeh map plots using Google Maps base for day and night categories.
    
    :param data: DataFrame
        A Pandas dataframe input of the dataset used for plotting
    :param title: string
        A string input to title the plot.
    :return: Bokeh map plot
        The output is a Bokeh map plot object
    """
    
    map_style = """
    [{"featureType":"administrative","elementType":"all","stylers":[{"visibility":"on"},{"lightness":33}]},{"featureType":"landscape","elementType":"all","stylers":[{"color":"#f2e5d4"}]},{"featureType":"poi.park","elementType":"geometry","stylers":[{"color":"#c5dac6"}]},{"featureType":"poi.park","elementType":"labels","stylers":[{"visibility":"on"},{"lightness":20}]},{"featureType":"road","elementType":"all","stylers":[{"lightness":20}]},{"featureType":"road.highway","elementType":"geometry","stylers":[{"color":"#c5c6c6"}]},{"featureType":"road.arterial","elementType":"geometry","stylers":[{"color":"#e4d7c6"}]},{"featureType":"road.local","elementType":"geometry","stylers":[{"color":"#fbfaf7"}]},{"featureType":"water","elementType":"all","stylers":[{"visibility":"on"},{"color":"#acbcc9"}]}]
    """
    map_options = GMapOptions(lat=47.6101, lng=-122.3421, map_type="roadmap", zoom=11, scale_control = True,
                             styles= map_style)
    API_KEY = "AIzaSyDYri9kA5L5jKhyiNsl5YI2wIilZBmW92c"

    s1 = ColumnDataSource(data)

    hover = HoverTool(
        tooltips="""
        <div>
            <div>
                <img
                    src="https://github.com/rexthompson/axwx/blob/master/images/axwx-logo.jpg?raw=true"
                    height="50" width="50"
                    style="float: left; margin: 0px 15px 15px 0px;"
                    border="0"
                ></img>
            </div>
            <div>
                <span style="font-size: 12px;">Time:</span>
                <span style="font-size: 12px; color: #966;">@time_of_day</span>
            </div>
            <div>
                <span style="font-size: 12px;">Injury:</span>
                <span style="font-size: 12px; color: #696;">@driver_injury</span>
            </div>
            <div>
                <span style="font-size: 12px;">Factor:</span>
                <span style="font-size: 12px; color: #696;">@contributing_factor_1</span>
            </div>
        </div>
        """
    )

    plot = GMapPlot(x_range=Range1d(), y_range=Range1d(), map_options=map_options, api_key= API_KEY, webgl=True)
    plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool(), ResetTool(), ZoomInTool(), ZoomOutTool(), hover)
    
    plot.plot_height = 750
    plot.plot_width = 720

    circle1 = Circle(x="lon", y="lat", size=10, fill_color="blue", fill_alpha=0.2, line_color=None)
    plot.add_glyph(s1, circle1)
    plot.title.text = title + '- Map Plot of Accidents in Seattle'
    plot.title.text_font_size = '12pt'



    #show(plot)
    return plot


In [9]:
def histogram1(raw_data):
    """
    Function to create Bokeh histogram plot of Temperature.
    
    :param raw_data: DataFrame
        A Pandas dataframe input of the dataset used for plotting.
    :return: Bokeh histogram plot
        The output is a Bokeh histogram plot object.
    """
    
    #Data preparation specifically for plot 
    raw_data['temp_round'] = np.round(raw_data['wx_TemperatureF_latest'])
    hist = pd.DataFrame()
    hist['temp'] = raw_data['temp_round']
    hist = hist.dropna(axis=0, how='any')
    
    h1 = Histogram(hist, values='temp', legend='top_right',title="Histogram of accidents by Temperature", 
                   plot_width=700, density = True)
    h1.title.text_font_size = '12pt'
    h1.xaxis.axis_label_text_font_size = '12pt'
    h1.yaxis.axis_label_text_font_size = '12pt'
    h1.axis.major_label_text_font_size = '11pt'

    #show(h1)
    return h1

In [10]:
def histogram2(raw_data):
    """
    Function to create Bokeh histogram plot of Wind Speed.
    
    :param raw_data: DataFrame
        A Pandas dataframe input of the dataset used for plotting.
    :return: Bokeh histogram plot
        The output is a Bokeh histogram plot object.
    """
    
    #Data preparation specifically for plot 
    hist2 = pd.DataFrame()
    hist2['wind_latest'] = np.round(raw_data['wx_WindSpeedMPH_latest'], 1)
    hist2 = hist2.dropna(axis=0, how='any')

    h2 = Histogram(hist2, values='wind_latest', legend='top_right',title="Histogram of accidents by Wind Latest", 
                   bins = 20, plot_width=700, density = True)
    h2.title.text_font_size = '12pt'
    h2.xaxis.axis_label_text_font_size = '12pt'
    h2.yaxis.axis_label_text_font_size = '12pt'
    h2.axis.major_label_text_font_size = '11pt'

    #show(h2)
    
    return h2

In [11]:
def boxplot1(raw_data):
    """
    Function to create Bokeh box plot of Precip Rate in the last 1 hour 
    vs Roadway Surface Condition (all).
    
    :param raw_data: DataFrame
        A Pandas dataframe input of the dataset used for plotting.
    :return: Bokeh box plot
        The output is a Bokeh box plot object
    
    """
    box1 = BoxPlot(raw_data, values='wx_PrecipRate_inhr_last_1hr', label='roadway_surface_condition', 
                  title="Precip Rate in last 1hr by WSP road classification (ALL)", plot_width=700, legend = False,
                  color = 'contributing_factor_1')

    box1.y_range = Range1d(-0.2,3)
    box1.title.text_font_size = '12pt'
    box1.xaxis.axis_label_text_font_size = '12pt'
    box1.yaxis.axis_label_text_font_size = '12pt'
    box1.axis.major_label_text_font_size = '11pt'

    #show(box1)
    
    return box1

In [12]:
def boxplot2(raw_data):
    """
    Function to create Bokeh box plot of of Precip Rate in the last 1 hour 
    vs Roadway Surface Condition (wet and dry)
    
    :param raw_data: DataFrame
        A Pandas dataframe input of the dataset used for plotting.
    :return: Bokeh box plot
        The output is a Bokeh box plot object
    """
    
    #Data preparation specifically for plot 
    roadcondlist = ['Wet', 'Dry', 'Icy']
    raw_data['roadway_surface_condition'] = raw_data['roadway_surface_condition'].astype('str')
    ix = raw_data['roadway_surface_condition'].isin(roadcondlist)
    sub_data = raw_data.loc[ix]


    box2 = BoxPlot(sub_data, values='wx_PrecipRate_inhr_last_1hr', label='roadway_surface_condition', 
                  title="Precip Rate in last 1hr  by WSP road classification (Wet vs Dry)", plot_width=700, legend = False,
                  color = 'contributing_factor_1')

    box2.y_range = Range1d(-0.2,3)
    box2.title.text_font_size = '12pt'
    box2.xaxis.axis_label_text_font_size = '12pt'
    box2.yaxis.axis_label_text_font_size = '12pt'
    box2.axis.major_label_text_font_size = '11pt'

    #show(box2)
    
    return box2

In [13]:
def run_dash(raw_data, factors):
    """
    Function to run functions for data preparation and Bokeh plot creation
    and outputs to html.
    
    :param path: DataFrame
        A Pandas dataframe input of location of the data.
    :param factors: list
        A list input of contributing factors of accidents. 
        
    """
    
    #Data preparation - creating new columns, splits for each plot
    raw_data = addnewcategories(raw_data)
    map_data, bar_data, ts_data = chart_data_split(raw_data, factors)
    data_bar = makebardata(raw_data)
    
    #Bokeh Plot object initialization and push to HTML
    barp = bar_chart(data_bar)
    hp1 = histogram1(raw_data)
    hp2 = histogram2(raw_data)
    box1 = boxplot1(raw_data)
    box2 = boxplot2(raw_data)
    map1 = create_map(map_data, 'Accidents in Seattle')
    
    lo_input = [get_logo(),[map1, barp], [hp1, hp2], [box1, box2]]

    output_file('AxWx_dashboard.html')
                                                                
    l = layout(lo_input, sizing_mode='fixed')  
    
    show(l)

In [14]:
run_dash(raw_data, factors)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
