# Section 23


## Data Viz with Bokeh

### Basic Line Plot

In [1]:
from bokeh.plotting import figure, output_file, show


In [2]:
x = [10,20,30,40,50,60,70,80,90]
y = [11,12,14,16,17,18,19,21,23]

In [5]:
output_file("line.html")

p = figure(title = 'Basic Line Plot', 
           x_axis_label = 'x-axis', y_axis_label = 'y-axis')
p.line(x,y,legend_label = "Price", line_width=3)
show(p)

### Bar Plot

In [6]:
#Importing the necessary modules and tools
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral10
from bokeh.plotting import figure, output_file, show
from bokeh.transform import factor_cmap
 
import pandas
 
#Reading the Excel data into a Pandas dataframe
top10 = pandas.read_excel("/Users/lorisegovia/Downloads/top10.xlsx")
 
#Creating the output HTML file in the current folder
output_file("bar.html")
 
#Referencing the two columns that contain the necessary data
language = top10["Language"]
rating = top10["Ratings"]
 
#At the most basic level, a ColumnDataSource is simply a mapping between column names and lists of data.
#The ColumnDataSource takes a data parameter which is a dict, 
#with string column names as keys and lists (or arrays) of data values as values. 
#If one positional argument is passed in to the ColumnDataSource initializer, it will be taken as data.
#Once the ColumnDataSource has been created, it can be passed into the source parameter of plotting methods
#which allows you to pass a column’s name as a stand in for the data values
#Source: https://bokeh.pydata.org/en/latest/docs/user_guide/data.html#columndatasource
source = ColumnDataSource(data = dict(language = language, rating = rating))
 
#Creating a new plot with various optional parameters
#Ranges: https://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html#setting-ranges
p = figure(x_range = language, plot_height = 800, toolbar_location = None, title = "Top 10 Programming Languages")
 
#Drawing the vertical bars and setting visual properties
#vbar: https://bokeh.pydata.org/en/latest/docs/reference/models/glyphs/vbar.html
#factor_cmap: https://bokeh.pydata.org/en/latest/docs/reference/transform.html#bokeh.transform.factor_cmap
p.vbar(x = 'language', top = 'rating', width = 0.7, source = source, legend_label = "Languages",
       line_color = 'white', fill_color = factor_cmap('language', palette = Spectral10, factors = language))
 
#Setting other optional parameters
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 16
p.legend.orientation = "horizontal"
p.legend.location = "top_right"
 
#Displaying the final result
show(p)

### Pie Chart

In [7]:
#Importing the necessary modules and tools
from math import pi
 
import pandas
 
from bokeh.palettes import Category20c
from bokeh.plotting import figure, output_file, show
from bokeh.transform import cumsum
 
#Creating the output HTML file in the current folder
output_file("pie.html")
 
#Reading the CSV data into a Pandas dataframe
data = pandas.read_csv("/Users/lorisegovia/Downloads/countries.csv")
 
#Referencing the two columns that contain the necessary data
country = data["Country"]
population = data["Population"]
 
#Configuring the pie wedge size based on the Population value
data['angle'] = data['Population'] / data['Population'].sum() * (2 * pi)
 
#Configuring the colors to use for each wedge
data['color'] = Category20c[len(data)]
 
#Creating a new plot with various optional parameters
p = figure(plot_height = 400, title = "Top 10 Countries by Population", toolbar_location = None,
           tools = "hover", tooltips = "@Country: @Population", x_range = (-0.5, 1.0))
 
#Configuring wedge visual properties
#wedge: https://bokeh.pydata.org/en/latest/docs/reference/models/glyphs/wedge.html
#cumsum: https://bokeh.pydata.org/en/latest/docs/reference/transform.html#bokeh.transform.cumsum
p.wedge(x = 0, y = 1, radius = 0.4,
        start_angle = cumsum('angle', include_zero = True), end_angle = cumsum('angle'),
        line_color = "white", fill_color = 'color', legend_field = 'Country', source = data)
 
#Setting other optional parameters
p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None
 
#Displaying the final result
show(p)

### Stock Price Vizzes

In [9]:
## Stock Vizzes
import bokeh.sampledata
bokeh.sampledata.download()
 
#Importing the necessary modules and tools
import numpy as np
 
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT
 
#Function for converting dates to the proper format
def datetime(x):
    return np.array(x, dtype = np.datetime64)
 
#Creating a new plot with various optional parameters
p = figure(x_axis_type = "datetime", title = "Stock Prices")
 
#Setting other optional parameters
p.grid.grid_line_alpha = 0.3
p.xaxis.axis_label = 'Date'
p.yaxis.axis_label = 'Price'
 
#Converting dates to the proper format and drawing the lines
p.line(datetime(AAPL['date']), AAPL['adj_close'], color = '#A6CEE3', legend_label = 'AAPL')
p.line(datetime(GOOG['date']), GOOG['adj_close'], color = '#B2DF8A', legend_label = 'GOOG')
p.line(datetime(IBM['date']), IBM['adj_close'], color = '#33A02C', legend_label = 'IBM')
p.line(datetime(MSFT['date']), MSFT['adj_close'], color = '#FB9A99', legend_label = 'MSFT')
 
#Setting the location of the legend on the plot
p.legend.location = "top_left"
 
#Creating the output HTML file in the current folder
output_file("stocks.html", title = "Stocks Comparison")
 
#Displaying the final result
show(p)


Creating /Users/lorisegovia/.bokeh directory
Creating /Users/lorisegovia/.bokeh/data directory
Using data directory: /Users/lorisegovia/.bokeh/data
Downloading: CGM.csv (1589982 bytes)
   1589982 [100.00%]
Downloading: US_Counties.zip (3171836 bytes)
   3171836 [100.00%]
Unpacking: US_Counties.csv
Downloading: us_cities.json (713565 bytes)
    713565 [100.00%]
Downloading: unemployment09.csv (253301 bytes)
    253301 [100.00%]
Downloading: AAPL.csv (166698 bytes)
    166698 [100.00%]
Downloading: FB.csv (9706 bytes)
      9706 [100.00%]
Downloading: GOOG.csv (113894 bytes)
    113894 [100.00%]
Downloading: IBM.csv (165625 bytes)
    165625 [100.00%]
Downloading: MSFT.csv (161614 bytes)
    161614 [100.00%]
Downloading: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip (4816256 bytes)
   4816256 [100.00%]
Unpacking: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.csv
Downloading: gapminder_fertility.csv (64346 bytes)
     64346 [100.00%]
Downloading: gapminder_population.csv (94509 bytes)
     94509 