# Plotting a weekly time series line chart
This workbook goes through the steps of creating a simple line plot with Bokeh, to show multiple different series on the same chart. It shows how to wrap up the Bokeh settings into a simple function to quickly produce nice line plots from specified columns of a pandas dataframe object.

In [1]:
# Dependencies
import bokeh.models
import bokeh.plotting
import bokeh.palettes
import bokeh.layouts
import bokeh.core.properties
import bokeh.io

# Show the Bokeh plots inline in the notebook
bokeh.io.output_notebook()

import pandas as pd
import dateutil
import datetime

## Get the example data
The data is a fake example of a weekly time-series showing values recorded by footfall cameras and public wifi signups.

In [2]:
# Get the data
week_df = pd.read_csv("./example_weekly_data.csv")
week_df["datetime"] = [dateutil.parser.parse(week_df.loc[row, "datetime"], dayfirst = True) for row in range(len(week_df))]
week_df.index = week_df["datetime"]
week_df.drop(labels = "datetime", axis = 1, inplace = True)
week_df.tail()

Unnamed: 0_level_0,Total Wifi Users,New Wifi Users,The Pub,The Local Store,Total Weekly Footfall,Pizza,Estimated new visitors,display_date
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-09-04,557,129,1618,6953,4858,232,3210,Monday 04 September 2017
2017-09-11,1691,314,1241,4647,12951,264,8347,Monday 11 September 2017
2017-09-18,1392,265,2892,949,11179,2880,7220,Monday 18 September 2017
2017-09-25,405,114,4599,4818,11410,2271,7361,Monday 25 September 2017
2017-10-02,377,130,1395,6720,10022,619,6450,Monday 02 October 2017


## Bokeh
Create the figure object with various parameters:

In [3]:
# Set the size attributes of the figure
plot_width_in_pixels = 500
plot_height_in_pixels = 500

# Set the tools which will be available to the user in the figure
hover_tool = bokeh.models.HoverTool() # This has lots of options that can be configured also.
plot_tools = [hover_tool, "box_zoom", "reset"]

# Set titles and labels 
x_axis_label = "Date"
y_axis_label = "Value"
title = "My plot"

Set the ranges for the axes:

In [4]:
# Scalar ranges can be set with a simple tuple:
y_range = (0, 200)

# Or more complex ranges can be set using one the of the ranges objects:
x_range = bokeh.models.ranges.Range1d(start = datetime.datetime(2017,1,1),
                                      end = datetime.datetime(2017,1,31))

Set the tick intervals:

In [5]:
# This is perhaps not 100% intuitive - the value given will determine the
# number of ticks appearing between each major tick; it has no relation to 
# the scale of the actual values being plotted.
x_minor_ticks = 10 # Note that this doesn't work for datetime axes.
y_minor_ticks = 10

Create the figure:

In [6]:
# Choose whether to have the Bokeh logo on the figure (as a general rule - don't!)
logo = None

fig_out = bokeh.plotting.figure(plot_width = plot_width_in_pixels,
                                plot_height = plot_height_in_pixels,
                                tools = plot_tools,
                                x_axis_label = x_axis_label,
                                y_axis_label = y_axis_label,
                                title = title,
                                y_range = y_range,
                                x_range = x_range,
                                x_minor_ticks = x_minor_ticks,
                                y_minor_ticks = y_minor_ticks,
                                x_axis_type = "datetime",
                                logo = logo)

# More granular control of axis properties can be achieved with the axis methods
fig_out.xaxis.axis_label = "New x axis label"
fig_out.xaxis.axis_line_width = 1
fig_out.xaxis.axis_line_color = "blue"

fig_out.axis.minor_tick_in = -5
fig_out.axis.minor_tick_out = 5

# Manually set the xaxis tickers.
x_ticks = [number for number in range(0,32,1)]
fig_out.xaxis.ticker = bokeh.models.tickers.DaysTicker(days = x_ticks)

# Set the orientation of the labels
from math import pi
fig_out.xaxis.major_label_orientation = pi/4

# Set the format of the labels
x_formatter = bokeh.models.formatters.DatetimeTickFormatter()
fig_out.xaxis.formatter = x_formatter

#### Now that the figure has been created it can be put in a layout and displayed.
(Although this is pointless at the moment as it contains no data!)

In [7]:
# Add the figure to a layout
layout_row = bokeh.layouts.row(fig_out)

# Show the plot. This will create a warning as the figure contains no data.
bokeh.plotting.show(layout_row)

W-1001 (NO_DATA_RENDERERS): Plot has no data renderers: Figure(id='77f979bb-84f1-4d8d-a2e7-e4e908086190', ...)


## Adding data glyphs to a figure

In [8]:
# Create a new figure object with the appropriate ranges
new_x_range = bokeh.models.ranges.Range1d(start = datetime.datetime(2017,1,1),
                                          end = datetime.datetime(2017,10,1))

new_fig = bokeh.plotting.figure(plot_width = 700,
                                plot_height = 350,
                                tools = plot_tools,
                                x_axis_label = x_axis_label,
                                y_axis_label = y_axis_label,
                                title = "Weekly Footfall",
                                y_range = (0, 20000),
                                x_range = new_x_range,
                                x_axis_type = "datetime",
                                logo = logo)

# Pass in a whole pandas DataFrame as a Column Data Source.
source = bokeh.models.ColumnDataSource(week_df)

# We can then add specific columns as lines to the figure object we created.
new_fig.line(source = source,
             x = "datetime",
             y = "Total Weekly Footfall",
             line_width = 4,
             line_join = "bevel",
             line_color = "#3385ff",
             legend = "Total Footfall")

new_layout_row = bokeh.layouts.row(new_fig)
bokeh.plotting.show(new_layout_row)

This is fine to create a basic plot with everything hard coded manually, but we can be a bit more clever than that, and start to determine things programatically. 

In [9]:
# Choose a group of columns to plot on the same plot
columns_to_plot = list(week_df.columns.drop("display_date"))

# Find the maximum values in any of the columns
max_value = max([week_df[col].max() for col in columns_to_plot])

# Use this to determine the y_axis max to the closest 500
y_axis_max = (max_value//500 +1) * 500 

# Set the X-axis range
x_min = week_df.index.min()
x_max = week_df.index.max()

# Since we know we are using weekly data we can also use this to create 
# weekly tick marks on the x-axis.
num_weeks = ((x_max - x_min) / 7).days
x_ticks = [x_min + datetime.timedelta(week * 7) for week in range(num_weeks)]

# Now we can put all this together to create a better plot
x_range_2 = bokeh.models.ranges.Range1d(start = x_min, end = x_max)

new_fig_2 = bokeh.plotting.figure(plot_width = 750,
                                  plot_height = 350,
                                  tools = plot_tools,
                                  x_axis_label = "Week Start Date",
                                  y_axis_label = "Total Weekly Footfall",
                                  title = "Weekly Footfall",
                                  y_range = (0, y_axis_max),
                                  x_range = x_range_2,
                                  x_axis_type = "datetime",
                                  logo = None)

# We can then add specific columns as lines to the figure object we created.
# We can iterate through a list of different colour values to make the plot
# look better.
colours = [bokeh.core.properties.value(colour) for colour in \
           ['#3385ff', '#00cc66', '#ff66b3','#ffcc80', '#ffff99', '#00b3b3', '#b380ff']]

colour_index = 0

for col in columns_to_plot:
    
    # Pay attention to the way legend names have to be specified, which 
    # is completely unintuitive and likely to cause massive amounts of 
    # grief when creating line plots, but the reasons for which are
    # explained here: https://github.com/bokeh/bokeh/issues/5365
    
    new_fig_2.line(source = source,
                   x = "datetime",
                   y = col,
                   line_width = 4,
                   line_join = "bevel",
                   line_color = colours[colour_index],
                   legend = dict(value = col)) 
    
    colour_index += 1

# Set the location of the legend.
new_fig_2.legend.location = "top_left"

# This allows the user to click on labels in the legend to hide/show
# the lines on the figure.
new_fig_2.legend.click_policy = "hide"

layout_row = bokeh.layouts.row(new_fig_2)
bokeh.plotting.show(layout_row)

We can then wrap it all up into a quick function to produce the desired line plots:

In [10]:
def date_time_line_plot(df, columns_to_plot, x_axis_column_name = None, **kwargs):
    """Takes a pandas DataFrame and returns a bokeh figure plotting the 
    supplied columns.
    
    By default will use the DataFrame index as the x_axis, unless a 
    different column name is specified in the x_axis_column_name arg. The
    x_axis_column_name must refer to a column of DateTime data type.
    """
    
    if isinstance(x_axis_column_name, type(None)):
        if df.index.name is None:
            x_axis_column_name = "index"            
        else:
            x_axis_column_name = df.index.name
        
        # Set the X-axis range
        x_min = df.index.min()
        x_max = df.index.max()
    
    else:
        # Set the X-axis range
        x_min = df[x_axis_column_name].min()
        x_max = df[x_axis_column_name].max()
        
    # Find the maximum value in any of the columns
    max_value = max([df[col].max() for col in columns_to_plot])
    
    # Use this to determine the y_axis max to the closest 500
    y_axis_max = (max_value//500 + 1) * 500
    
    # Set the x_axis ticks.
    num_weeks = ((x_max - x_min) / 7).days
    x_ticks = [x_min + datetime.timedelta(week * 7) for week in range(num_weeks)]
    
    # Set the x_axis range
    x_range = bokeh.models.ranges.Range1d(start = x_min, end = x_max)
    
    # Over-ride defaults with any kwargs supplied.
    if "plot_width" in kwargs:
        plot_width = kwargs["plot_width"]
    else:
        plot_width = 1000
    
    if "plot_height" in kwargs:
        plot_height = kwargs["plot_height"]
    else:
        plot_height = 500
    
    if "title" in kwargs:
        title = kwargs["title"]
    else:
        title = "Weekly Footfall"
    
    if "colours" in kwargs:
        colours = [bokeh.core.properties.value(colour) for colour in kwargs["colours"]]
    else:
        colours = ['#3385ff', '#00cc66', '#ff66b3','#ffcc80', '#ffff99', '#00b3b3', '#b380ff']
    
    if "tools" in kwargs:
        plot_tools = kwargs["tools"]
    else:
        hover_tool = bokeh.models.HoverTool()
        plot_tools = [hover_tool, "box_zoom", "reset"]
        
    # Create the figure
    fig_out = bokeh.plotting.figure(plot_width = plot_width,
                                    plot_height = plot_height,
                                    tools = plot_tools,
                                    x_axis_label = "Date",
                                    y_axis_label = "Total Weekly Footfall",
                                    title = title,
                                    y_range = (0, y_axis_max),
                                    x_range = x_range,
                                    x_axis_type = "datetime",
                                    logo = None)
    
    # Set the data source
    source = bokeh.models.ColumnDataSource(df)
    
    # Set the colours
    
    colour_index = 0

    for col in columns_to_plot:

        fig_out.line(source = source,
                     x = x_axis_column_name,
                     y = col,
                     line_width = 4,
                     line_join = "bevel",
                     line_color = colours[colour_index],
                     legend = dict(value = col),
                     name = col)

        colour_index += 1
        if colour_index > len(colours) - 1:
            colour_index = 0

    # Set the location of the legend.
    fig_out.legend.location = "top_left"

    # This allows the user to click on labels in the legend to hide/show
    # the lines on the figure.
    fig_out.legend.click_policy = "hide"

    return fig_out

Now using the function we can quickly create multiple line plots and put them all into one display.

In [11]:
fig_1 = date_time_line_plot(week_df, ["Total Weekly Footfall", "Estimated new visitors"],
                            plot_width = 750, plot_height = 350,
                            title = "Overall Footfall")
fig_2 = date_time_line_plot(week_df, ["The Pub", "The Local Store", "Pizza"],
                            plot_width = 750,
                            plot_height = 350,
                            title = "Retailer Footfall",
                            colours = ["#e6e600", "#e600ac", "#ff6600"])

layout_row = bokeh.layouts.column(fig_1, fig_2)
bokeh.plotting.show(layout_row)

## Customising tools

In [12]:
# Hover tool
hover = bokeh.models.HoverTool(tooltips=[("Week commencing","@display_date"),("Footfall", "$y{0,000}")],
                               mode = "vline")

# Box select tool
box_select = bokeh.models.BoxZoomTool(dimensions = "width")

tool_fig = date_time_line_plot(week_df, ["Total Weekly Footfall", "Estimated new visitors"],
                               plot_height = 400,
                               plot_width = 800,
                               tools = [hover, box_select, "reset"])

tool_row = bokeh.layouts.column(tool_fig)
bokeh.plotting.show(tool_row)