# Bokeh is an interactive visualiation library
- bokeh.models: low-level interface
- bokeh.ploting: high-level interface cetered around composing visual glyphs

# Documentation

In [1]:
from IPython.display import IFrame
documentation = IFrame(src="https://bokeh.pydata.org/en/latest/", width=1000, height=450)
display(documentation)

# Imports

In [2]:
# standard bokeh imports
from bokeh.io import output_notebook, show, reset_output

# output bokeh imports
import bokeh
from bokeh.plotting import figure

# other imports
import numpy as np
import pandas as pd
from vega_datasets import data as vds

# Troubleshooting
- reset_output(), then output_notebook() to keep from opening new tabs and display plots in notebook

# Sample Data Sets

In [3]:
from bokeh.sampledata import iris
iris_dataset = iris.flowers
iris_dataset.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


# Display Plots in Notebook
- in colab, output_notebook() is called in each cell

In [4]:
output_notebook()

# Common Steps to create plots
1. create figure- used to create/house plot
2. call plot(glyph) method (types = line, bar, scatter, etc.)
3. show figure plot

# ColumnDataSource
The ColumnDataSource is a data source used throughout Bokeh often created automatically. You can create them explicitly.

The ColumnDataSourceis a (dictionary) mapping of column names (strings) to sequences of values. 

In [5]:
from bokeh.models import ColumnDataSource

column_data_source = ColumnDataSource({
    'A': [1, 2, 3],
    'B': [4, 5, 3],
    'C': ['a', 'b', 'c']
})

column_data_source.data

{'A': [1, 2, 3], 'B': [4, 5, 3], 'C': ['a', 'b', 'c']}

# Line Plot

In [6]:
from bokeh.models import HoverTool

# data
x_line = np.arange(10)
y_line = np.random.rand(10)

# line plot
line_plot = figure(plot_width=500, plot_height=325, title='Line Plot', x_axis_label='x', y_axis_label='y')
line_plot.line(x_line, y_line, legend_label='line', line_width=2)

# add hover tool
line_plot.add_tools(HoverTool())

# another way
# line_plot.xaxis.axis_label = 'x-axis'
# line_plot.yaxis.axis_label = 'y-axis'

show(line_plot)

# Multi-Line

In [7]:
output_notebook()

# data
multi_line_x = np.arange(10)
multi_line_y1 = np.random.rand(10)
multi_line_y2 = np.random.rand(10)
multi_line_y3 = np.random.rand(10)

# plot
multi_line_plot = figure(plot_width=500, plot_height=300, toolbar_location="below")
multi_line_plot.line(multi_line_x, multi_line_y1, color='red', line_width=3)
multi_line_plot.line(multi_line_x, multi_line_y2, color='blue', line_width=3)
multi_line_plot.line(multi_line_x, multi_line_y3, color='yellow', line_width=3)

show(multi_line_plot)

# Bar Chart

In [8]:
x_bar = ['category1', 'category2', 'category3', 'category4']
y_bar = np.random.rand(4)*10

# sort data (sort x by its corresponding y)
sorted_categories= sorted(x_bar, key=lambda x:y_bar[x_bar.index(x)], reverse=True)

# plot
bar_chart = figure(x_range=sorted_categories, title='Bar Plot', x_axis_label='x', y_axis_label='y', plot_height=300)
bar_chart.vbar(x_bar, top=y_bar, color='blue', width=0.5)
bar_chart.y_range.start = 0

show(bar_chart)

# Stacked Bar Chart

In [9]:
stacked_bar_df = pd.DataFrame({
        'y': [1, 2, 3, 4, 5],
        'x1': [1, 2, 4, 3, 4],
        'x2': [1, 4, 2, 2, 3]
    }
)

cds_stacked_bar_df = ColumnDataSource(stacked_bar_df)

stacked_bar_chart = figure(
        plot_width=600,
        plot_height=300,
        title='Stacked Bar Chart'
    )

stacked_bar_chart.hbar_stack(
    ['x1', 'x2'],
    y='y',
    height = 0.8,
    color = ('grey', 'lightgrey'),
    source=stacked_bar_df           # or cds_stacked_bar_df
)

show(stacked_bar_chart)

# Grouped Bar Chart

In [10]:
from bokeh.core.properties import value
from bokeh.transform import dodge

# data
categories = ['category1', 'category2', 'category3']
grouped_bar_df = pd.DataFrame({
    'categories': categories,
    '2015': [2, 1, 4],
    '2016': [5, 3, 3],
    '2017': [3, 2, 4]
})

# plot
grouped_bar = figure(x_range=categories, y_range=(0, 10), plot_height=250)

# create each bar and place to chart
# using dodge, offset bars and place them properly on xaxis
dodge1 = dodge('categories', -0.25, range=grouped_bar.x_range)
dodge2 = dodge('categories', 0.0, range=grouped_bar.x_range)
dodge3 = dodge('categories', 0.25, range=grouped_bar.x_range)

grouped_bar.vbar(x=dodge1, top='2015', width=0.2, source=grouped_bar_df, color='gray', legend_label= '2015')
grouped_bar.vbar(x=dodge2, top='2016', width=0.2, source=grouped_bar_df, color='blue', legend_label= '2016')
grouped_bar.vbar(x=dodge3, top='2017', width=0.2, source=grouped_bar_df, color='green', legend_label= '2017')

# format legend
grouped_bar.legend.location = 'top_left'
grouped_bar.legend.orientation = 'horizontal'

show(grouped_bar)

# Stacked Area Chart

In [11]:
stacked_area_df = pd.DataFrame({
        'x': [1, 2, 3, 4, 5],
        'y1': [1, 2, 4, 3, 4],
        'y2': [1, 4, 2, 2, 3]
    }
)

stacked_area_plot = figure(plot_width=500, plot_height=300)

stacked_area_plot.varea_stack(['y1', 'y2'],
                            x='x',
                            color=('green', 'lightgreen'),
                            source=stacked_area_df)

show(stacked_area_plot)

# Scatter Plots

In [12]:
# vega datasets cars data
cars = vds.cars()
cars.tail()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
401,ford mustang gl,27.0,4,140.0,86.0,2790,15.6,1982-01-01,USA
402,vw pickup,44.0,4,97.0,52.0,2130,24.6,1982-01-01,Europe
403,dodge rampage,32.0,4,135.0,84.0,2295,11.6,1982-01-01,USA
404,ford ranger,28.0,4,120.0,79.0,2625,18.6,1982-01-01,USA
405,chevy s-10,31.0,4,119.0,82.0,2720,19.4,1982-01-01,USA


In [13]:
# scatter plot

# data
x_scatter = cars.Weight_in_lbs
y_scatter = cars.Miles_per_Gallon

# plot
scatter_plot = figure(plot_width=500, plot_height=300, x_axis_label='Weight_in_lbs', y_axis_label='Miles_per_Gallon')
scatter_plot.circle(x_scatter, y_scatter, size=5, line_color='navy', fill_color='orange', fill_alpha=0.5)

show(scatter_plot)

You can use a cross, x, diamond_cross, circle_x, circle_cross, etc.

# Cluster on Scatter

In [14]:
iris = vds.iris()
iris.tail()

Unnamed: 0,sepalLength,sepalWidth,petalLength,petalWidth,species
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
149,5.9,3.0,5.1,1.8,virginica


In [15]:
from bokeh.transform import factor_cmap, factor_mark

# plot 
species = ['setosa', 'versicolor', 'virginica']
markers = ['hex', 'cross', 'triangle']

scatter_plot_subgroups = figure(plot_width=600,
                                plot_height=400,
                                title = 'Iris',
                                x_axis_label='petalLength',
                                y_axis_label='petalWidth'
                            )
scatter_plot_subgroups.scatter(x='petalLength',
                              y='petalWidth',
                              source=iris,
                              legend_label='species',
                              fill_alpha=0.5,
                              size=15,
                              color=factor_cmap(field_name='species', palette='Dark2_3', factors=species),  
                              marker=factor_mark('species', markers, species)
                            )

# Dark2_X because we have 3 categories
# move legend
scatter_plot_subgroups.legend.location = 'top_left'
show(scatter_plot_subgroups)


# Grid and Subplots

In [16]:
from bokeh.layouts import gridplot

output_notebook()

#  data
subplot_x1 = cars['Acceleration']; subplot_y1 = cars['Miles_per_Gallon']
subplot_x2 = cars['Cylinders']; subplot_y2 = cars['Miles_per_Gallon']
subplot_x3 = cars['Horsepower']; subplot_y3 = cars['Miles_per_Gallon']
subplot_x4 = cars['Weight_in_lbs']; subplot_y4 = cars['Miles_per_Gallon']

# figures
subplot1 = figure(plot_width=300, plot_height=300)
subplot2 = figure(plot_width=300, plot_height=300)
subplot3 = figure(plot_width=300, plot_height=300)
subplot4 = figure(plot_width=300, plot_height=300)

# plots
subplot1.circle(subplot_x1, subplot_y1)
subplot2.circle(subplot_x2, subplot_y2)
subplot3.circle(subplot_x3, subplot_y3)
subplot4.circle(subplot_x4, subplot_y4)

# subplots gridplot
grid = gridplot([subplot1, subplot2, subplot3, subplot4], ncols=2)

show(grid)

# Linking Plots

In [18]:
from bokeh.layouts import gridplot

linked_data_x = np.arange(10)
linked_data_y = np.random.rand(10)

# plot1
linked_plot1 = figure(width=250, height=250)
linked_plot1.circle(linked_data_x, linked_data_y)

# create a new plots and share both ranges
linked_plot2 = figure(width=250, height=250, x_range=linked_plot1.x_range, y_range=linked_plot1.y_range)
linked_plot2.line(linked_data_x, linked_data_y)

linked_plot3 = figure(width=250, height=250, x_range=linked_plot1.x_range, y_range=linked_plot1.y_range)
linked_plot3.vbar(linked_data_x, top=linked_data_y, width=0.5)

# the subplots in a gridplot
linked_gridplot = gridplot([[linked_plot1, linked_plot2, linked_plot3]])

show(linked_gridplot)

# Linked Selection - Box Select, Lasso Select

In [20]:
# data
seattle_weather = vds.seattle_weather()
seattle_weather.tail()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
1456,2015-12-27,8.6,4.4,1.7,2.9,fog
1457,2015-12-28,1.5,5.0,1.7,1.3,fog
1458,2015-12-29,0.0,7.2,0.6,2.6,fog
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun
1460,2015-12-31,0.0,5.6,-2.1,3.5,sun


In [31]:
from bokeh.transform import factor_cmap, factor_mark

TOOLS = 'box_select', 'lasso_select', 'reset', 'wheel_zoom', 'pan'

weather_types = ['drizzle', 'rain', 'sun', 'snow', 'fog']
weather_markers = ['hex', 'cross', 'triangle', 'square', 'circle_x']

# Always use SAME CDS for linking interactions
seattle_weather_source = ColumnDataSource(seattle_weather)

# scatter_plot 1
weather_scatter = figure(plot_width=900, plot_height=300, y_axis_label='Temp', x_axis_type='datetime', tools=TOOLS)
weather_scatter.circle('date', 'temp_max', size=15, fill_alpha=0.1, source=seattle_weather_source)

# scatter plot 2
weather_scatter_zoom = figure(
                            plot_width=900, 
                            plot_height=300, 
                            y_axis_label='Temp', 
                            x_axis_type='datetime', 
                            tools=TOOLS,
                            x_range=weather_scatter.x_range, 
                            y_range=weather_scatter.y_range)

weather_scatter_zoom.scatter(
    'date', 'temp_max', size=15, fill_alpha=0.1, 
    color=factor_cmap(field_name='weather', palette='Dark2_5', factors=weather_types),
    marker=factor_mark('weather', weather_markers, weather_types),
    legend_field='weather',
    source=seattle_weather_source,
)

# shared data between plot helps the linked selection to work

# format legend
weather_scatter_zoom.legend.location = 'top_left'
weather_scatter_zoom.legend.orientation = 'horizontal'

weather_grid = gridplot([[weather_scatter], [weather_scatter_zoom]])
show(weather_grid)

# Labels and Annotations

In [36]:
from bokeh.models.annotations import Label, LabelSet
from bokeh.models.annotations import Arrow
from bokeh.models.arrow_heads import NormalHead

output_notebook()

# data
fig_with_label_data = ColumnDataSource({'x': np.arange(10),
                                        'y': [4, 7, 5, 5, 9, 2, 3, 4, 3, 4]})

# plot
fig_with_label = figure()
fig_with_label.line(x='x', y='y', source=fig_with_label_data)

# add label
label = Label(x=4, y=9, x_offset=10, text='Highest Point', text_baseline='middle')
fig_with_label.add_layout(label)

# add multiple label
labels = LabelSet(x='x', y='y', level='glyph', text='y', source=fig_with_label_data)
fig_with_label.add_layout(labels)

# arrow annotation
fig_with_label.add_layout(Arrow(end=NormalHead(fill_color="orange"), x_start=5, y_start=7.5, x_end=4.5, y_end=8.8))

show(fig_with_label)

# Color Bar

In [37]:
cars.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [38]:
from bokeh.models import LinearColorMapper, ColorBar
from bokeh.transform import transform

output_notebook()

# map numbers in a range, low, high - linearly into a sequence of colors in a palette
color_mapper = LinearColorMapper(palette='Viridis256',
                                 low=cars.Weight_in_lbs.min(),
                                 high=cars.Weight_in_lbs.max())

# plot
colorbar_fig = figure(plot_width=600,
                      plot_height=400,
                      x_axis_label='Horsepower',
                      y_axis_label='Miles_per_Gallon')
colorbar_fig.circle(x='Horsepower',
                    y='Miles_per_Gallon',
                    source=cars,
                    color=transform('Weight_in_lbs', color_mapper),    # using transform to transform the coloring
                    size=15,
                    alpha=0.5)

# render a color bar based on the color mapper
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, location=(0, 0), title='Weight')
colorbar_fig.add_layout(color_bar, 'right')

show(colorbar_fig)

# Maps

In [43]:
!pip install pyproj
import warnings
warnings.filterwarnings('ignore')




In [44]:
# convert longitude, latitude to mercator coordinates
# example - Detroit Michigan 42.334197, -83.047752

from pyproj import Proj, transform

def create_coordinates(long_arg,lat_arg):
    in_wgs = Proj(init='epsg:4326')
    out_mercator = Proj(init='epsg:3857')
    long, lat = long_arg, lat_arg
    mercator_x, mercator_y = transform(in_wgs, out_mercator, long, lat)
    print(mercator_x, mercator_y)
    
# Detroit
create_coordinates(-83.047752,42.334197)

-9244833.464166068 5211172.739903524


In [45]:
# Cleveland
create_coordinates(-81.694703,41.499437)

-9094212.73846772 5086289.9693058105


In [46]:
# Chicago 
create_coordinates(-87.629849,41.878111)

-9754910.168971453 5142738.513793045


In [50]:
from bokeh.tile_providers import get_provider, Vendors

tile_provider = get_provider(Vendors.CARTODBPOSITRON)
# tile_provider = get_provider(Vendors.STAMEN_TONER_BACKGROUND)

# range bounds supplied in web mercator coordinates
m = figure(plot_width=800, 
           plot_height=400,
           x_range=(-12000000, 9000000), 
           y_range=(-1000000, 7000000), 
           x_axis_type='mercator', 
           y_axis_type='mercator')

m.add_tile(tile_provider)

m.circle(x=-9244833, y=5211172, size=10, color='red')
m.circle(x=-9094212, y=5086289, size=10, color='blue')
m.circle(x=-9754910, y=5142738, size=10, color='orange')

show(m)

# Interactive Widgets

In [51]:
# change size of scatter plot circles
from bokeh.layouts import column
from bokeh.models import Slider

# create figure and plot
change_plot_size = figure(plot_width=600, plot_height=300)
change_plot_size_r = change_plot_size.circle([1,2,3,4,5], [3,2,5,6,4], radius=0.1, alpha=0.5)

# create widget and link
slider = Slider(start=0.1, end=1, step=0.01, value=0.2)
slider.js_link('value', change_plot_size_r.glyph, 'radius')

show(column(change_plot_size, slider))

In [53]:
!pip install sklearn

Successfully installed joblib-1.0.1 scikit-learn-0.24.1 scipy-1.6.2 sklearn-0.0 threadpoolctl-2.1.0


In [54]:
# Using toggle button and minor Linear Regression
from sklearn import linear_model
from bokeh.layouts import layout
from bokeh.models import Toggle
import numpy as np

output_notebook()

# data
x = [1,2,3,4,5,6,7,8,9,10]
X = np.array(x).reshape(-1, 1)
y = [2,2,4,1,5,6,8,2,3,7]
Y = np.array(y).reshape(-1, 1)

# linear regression object
regr = linear_model.LinearRegression()

# fit linear model
regr.fit(X, Y)

# make predictions
pred = regr.predict(X)

# plot with regression line
regr_plot = figure(plot_width=500, plot_height=300)
regr_plot.scatter(x, y, size=10)
regr_line = regr_plot.line(x, pred.flatten(), line_color='red')

toggle_button = Toggle(label='line of best fit', button_type='success', active=True)
toggle_button.js_link('active', regr_line, 'visible')

show(layout([regr_plot], [toggle_button]))

In [None]:
!pip install ipywidgets

# Interactive Widgets with ipywidgets

In [56]:
seattle_weather['year'] = pd.DatetimeIndex(seattle_weather['date']).year
seattle_weather.tail()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,year
1456,2015-12-27,8.6,4.4,1.7,2.9,fog,2015
1457,2015-12-28,1.5,5.0,1.7,1.3,fog,2015
1458,2015-12-29,0.0,7.2,0.6,2.6,fog,2015
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun,2015
1460,2015-12-31,0.0,5.6,-2.1,3.5,sun,2015


In [58]:
import ipywidgets
from bokeh.io import push_notebook
from bokeh.models import Range1d

sw = seattle_weather.copy()

# widget
drop_down = ipywidgets.Dropdown(options=[2012, 2013, 2014, 2015],
                                value=2012,
                                description='years:',
                                disabled=False)

# data
x_bar_data_ipyw = ['precipitation', 'temp_max', 'temp_min', 'wind']
y_bar_data_ipyw = [sw[sw.year==2012]['precipitation'].mean(), 
                   sw[sw.year==2012]['temp_max'].mean(), 
                   sw[sw.year==2012]['temp_min'].mean(), 
                   sw[sw.year==2012]['wind'].mean()]
    
# figure and plot
bar_chart_interactive = figure(x_range=x_bar_data_ipyw, plot_height=300)
bar_ipyw = bar_chart_interactive.vbar(x_bar_data_ipyw, top=y_bar_data_ipyw, color='green', width=0.5)
bar_chart_interactive.y_range=Range1d(0, 18)

# function - bar chart
def weather_averages(year):
    if year == 2012: 
        bar_ipyw.data_source.data['top'] = [sw[sw.year==2012]['precipitation'].mean(), 
                                            sw[sw.year==2012]['temp_max'].mean(), 
                                            sw[sw.year==2012]['temp_min'].mean(), 
                                            sw[sw.year==2012]['wind'].mean()]
    elif year == 2013: 
        bar_ipyw.data_source.data['top'] = [sw[sw.year==2013]['precipitation'].mean(), 
                                            sw[sw.year==2013]['temp_max'].mean(), 
                                            sw[sw.year==2013]['temp_min'].mean(), 
                                            sw[sw.year==2013]['wind'].mean()]
    elif year == 2014: 
        bar_ipyw.data_source.data['top'] = [sw[sw.year==2014]['precipitation'].mean(), 
                                            sw[sw.year==2014]['temp_max'].mean(), 
                                            sw[sw.year==2014]['temp_min'].mean(), 
                                            sw[sw.year==2014]['wind'].mean()]
    elif year == 2015: 
        bar_ipyw.data_source.data['top'] = [sw[sw.year==2015]['precipitation'].mean(), 
                                            sw[sw.year==2015]['temp_max'].mean(), 
                                            sw[sw.year==2015]['temp_min'].mean(), 
                                            sw[sw.year==2015]['wind'].mean()]
    push_notebook()

show(bar_chart_interactive, notebook_handle=True)
        
# interaction
# interact between the data, function, and widget
ipywidgets.interact(weather_averages, year=drop_down)

interactive(children=(Dropdown(description='years:', options=(2012, 2013, 2014, 2015), value=2012), Output()),…

<function __main__.weather_averages(year)>