## Plotting 'arm' of Bokeh (a simple scatter diagram)

In [2]:
# import
import pandas as pd
from bokeh.plotting import figure, output_file, show

In [5]:
# fake data
# --+ x
x0 = np.random.normal(loc=0, scale=1, size=50)   # produce a length of 50
x1 = np.random.normal(loc=0, scale=1, size=50)
x2 = np.random.normal(loc=0, scale=1, size=50)
# --+ y
y0 = 1 + x0 * 1 * np.random.normal(loc=0, scale=0.1)  # y-axis = linear transformation of x
y1 = 1 + x1 * 1 * np.random.normal(loc=0, scale=0.2)
y2 = 1 + x2 * 1 * np.random.normal(loc=0, scale=0.4)
# --+ get a df
df0 = pd.DataFrame({'x': x0, 'y': y0, 'z': np.repeat(0, 50)})  # stacking the data over each other
df1 = pd.DataFrame({'x': x1, 'y': y1, 'z': np.repeat(1, 50)})  # z can take on the value 0 / 1 / 5
df2 = pd.DataFrame({'x': x2, 'y': y2, 'z': np.repeat(5, 50)})  # what does z do in terms of visualisation in a 2D plot?
df = pd.concat([df0, df1, df2])

# color map
colormap = {0: 'orange', 1: 'black', 5: 'purple'}  # labeling the color for each possible value of z = 0/1/5
colors = [colormap[x] for x in df['z']]

# initialize the figure
p = figure(title = "Some fake data")

# axes
p.xaxis.axis_label = 'Consumption of vinegar crisps (stones)'
p.yaxis.axis_label = 'Consumption of carbonated drink (gallons)'

# plot data
p.circle(df["x"], df["y"], color=colors, fill_alpha=0.2, size=10)   # circle = scatter plot in bokeh

# dump the chart to a html file
output_file("at_the_pub.html", title="at_the_pub.py example")

# show the chart
show(p)

## Model 'arm' of Bokeh

In [6]:
# minimal setup
import numpy as np
from bokeh.io import show
from bokeh.layouts import column   # allow us to include multiple visual forms into a same chart
from bokeh.models import ColumnDataSource, RangeTool
from bokeh.plotting import figure

In [8]:
# sample data
import bokeh.sampledata
bokeh.sampledata.download()
from bokeh.sampledata.stocks import AAPL

Creating C:\Users\Kar Whing\.bokeh directory
Creating C:\Users\Kar Whing\.bokeh\data directory
Using data directory: C:\Users\Kar Whing\.bokeh\data
Downloading: CGM.csv (1589982 bytes)
   1589982 [100.00%]
Downloading: US_Counties.zip (3171836 bytes)
   3171836 [100.00%]
Unpacking: US_Counties.csv
Downloading: us_cities.json (713565 bytes)
    713565 [100.00%]
Downloading: unemployment09.csv (253301 bytes)
    253301 [100.00%]
Downloading: AAPL.csv (166698 bytes)
    166698 [100.00%]
Downloading: FB.csv (9706 bytes)
      9706 [100.00%]
Downloading: GOOG.csv (113894 bytes)
    113894 [100.00%]
Downloading: IBM.csv (165625 bytes)
    165625 [100.00%]
Downloading: MSFT.csv (161614 bytes)
    161614 [100.00%]
Downloading: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip (4816256 bytes)
   4816256 [100.00%]
Unpacking: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.csv
Downloading: gapminder_fertility.csv (64346 bytes)
     64346 [100.00%]
Downloading: gapminder_population.csv (94509 bytes)
     94509 

In [17]:
# data manipulation
dates = np.array(AAPL['date'], dtype=np.datetime64)
source = ColumnDataSource(data=dict(date=dates, close=AAPL['adj_close']))

# initialize the plot
p = figure(plot_height=300, plot_width=800,                  # plot size
           tools="xpan", toolbar_location=None,              # tools
           x_axis_type="datetime", x_axis_location="above",  # make x-axis at the top of the chart
           x_range=(dates[1000], dates[2500],),              # the time frame you want to inspect 
           background_fill_color="#efefef")                  # background )

# plot data
p.line('date', 'close', source=source)

# decorations
p.yaxis.axis_label = 'Price'

# initialize lower panel plot
select = figure(title="Drag the middle and edges of the selection box to change the range above",
                plot_height=130, plot_width=800, y_range=p.y_range,
                x_axis_type="datetime", y_axis_type=None,
                tools="", toolbar_location=None, background_fill_color="#efefef")

# initialize data selector 
range_tool = RangeTool(x_range=p.x_range)  # allow us to inspect the data in a specific time frame only
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

# plot data
select.line('date', 'close', source=source)
select.ygrid.grid_line_color = None

# add selector
select.add_tools(range_tool)   # add the range_tool
select.toolbar.active_multi = range_tool

# show plot
show(column(p, select))