## 高级接口bokeh.charts的转型
* 对标seaborn，能够用最少的代码实现常用统计分析需求
* DataFrame或者table-like数据，内部实现时会自动转化成df

## 柱状图

In [1]:
from bokeh.charts import Bar, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Bar(df, 'cyl', values='mpg', title="Total MPG by CYL") # cyl作为分组标签

output_notebook()

show(p)

You can access Timestamp as pandas.Timestamp
  if pd and isinstance(obj, pd.tslib.Timestamp):


In [2]:
df.head()

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


### 设置集合操作
* sum
* mean
* count
* nunique
* median
* min
* max

In [4]:
from bokeh.charts  import Bar, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Bar(df, label='yr', values='mpg', agg='mean',
        title="Average MPG by YR")

output_notebook()

show(p)

### 设置柱图宽度

In [6]:
from bokeh.charts  import Bar, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Bar(df, 'yr', values='displ',
        title="Total DISPL by YR", bar_width=0.4)

output_notebook()

show(p)

### 设置柱图颜色

In [7]:
from bokeh.charts  import Bar, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Bar(df, 'yr', values='displ',
        title="Total DISPL by YR", color="wheat")

output_notebook()

show(p)

### 绘制分组柱图

In [8]:
from bokeh.charts  import Bar, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Bar(df, label='yr', values='mpg', agg='median', group='origin',
        title="Median MPG by YR, grouped by ORIGIN", legend='top_right')

output_notebook()

show(p)

### 绘制堆积柱图

In [9]:
from bokeh.charts  import Bar, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Bar(df, label='origin', values='mpg', agg='mean', stack='cyl',
        title="Avg MPG by ORIGIN, stacked by CYL", legend='top_right')

output_notebook()

show(p)

## 箱式图

In [10]:
from bokeh.charts  import BoxPlot, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df, values='mpg', label='cyl', legend='',
            title="MPG Summary (grouped by CYL)")

output_notebook()

show(p)

### 组合分组箱图

In [12]:
from bokeh.charts  import BoxPlot, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df, values='mpg', label=['cyl', 'origin'], legend='',
            title="MPG Summary (grouped by CYL, ORIGIN)")

output_notebook()

show(p)

### 设置箱体颜色

In [13]:
from bokeh.charts  import BoxPlot, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df, values='mpg', label='cyl', color='#00cccc', legend='',
            title="MPG Summary (grouped by CYL)")

output_notebook()

show(p)

In [14]:
from bokeh.charts import BoxPlot, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df, values='mpg', label='cyl', color='cyl', legend='',
            title="MPG Summary (grouped and shaded by CYL)")

output_notebook()

show(p)

### 设置猫须颜色

In [15]:
from bokeh.charts import BoxPlot, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df, values='mpg', label='cyl', whisker_color='goldenrod', legend='',
            title="MPG Summary (grouped by CYL, shaded whiskers)")

output_notebook()

show(p)

In [16]:
from bokeh.charts import BoxPlot, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df, values='mpg', label='cyl', whisker_color='cyl', legend='',
            title="MPG Summary (grouped and whiskers shaded by CYL)")

output_notebook()

show(p)

### 设置外点属性

In [17]:
from bokeh.charts import BoxPlot, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df, values='mpg', label='cyl', outliers=False, legend='',
            title="MPG Summary (grouped by CYL, no outliers)")

output_notebook()

show(p)

In [18]:
from bokeh.charts import BoxPlot, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = BoxPlot(df, values='mpg', label='cyl', marker='square', legend='',
            title="MPG Summary (grouped by CYL, square marker)")

output_notebook()

show(p)

## 灰度图

In [19]:
from bokeh.charts import Histogram, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Histogram(df['mpg'], title="MPG Distribution")

output_notebook()

show(p)

In [20]:
from bokeh.charts import Histogram, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Histogram(df, 'hp', title="HP Distribution")

output_notebook()

show(p)

In [21]:
from bokeh.charts import Histogram, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Histogram(df, values='displ', title="DISPL Distribution")

output_notebook()

show(p)

### 设置bins

In [22]:
from bokeh.charts import Histogram, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Histogram(df, values='mpg', bins=50,
              title="MPG Distribution (50 bins)")

output_notebook()

show(p)

### 设置颜色

In [23]:
from bokeh.charts import Histogram, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Histogram(df, values='hp', color='navy', title="HP Distribution")

output_notebook()

show(p)

### 分组灰度图

In [24]:
from bokeh.charts import Histogram, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Histogram(df, values='hp', color='cyl',
              title="HP Distribution (color grouped by CYL)",
              legend='top_right')

output_notebook()

show(p)

## 散点图

In [25]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df, x='mpg', y='hp', title="HP vs MPG",
            xlabel="Miles Per Gallon", ylabel="Horsepower")

output_notebook()

show(p)

### 设置颜色

In [26]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df, x='mpg', y='hp', title="HP vs MPG", color="navy",
            xlabel="Miles Per Gallon", ylabel="Horsepower")

output_notebook()

show(p)

### 分组散点图

In [27]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df, x='mpg', y='hp', color='cyl', title="HP vs MPG (shaded by CYL)",
            xlabel="Miles Per Gallon", ylabel="Horsepower")

output_notebook()

show(p)

### 设置图例

In [28]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df, x='displ', y='hp', color='cyl',
            title="HP vs DISPL (shaded by CYL)", legend="top_left",
            legend_sort_field = 'color',
            legend_sort_direction = 'ascending',
            xlabel="Displacement",
            ylabel="Horsepower")

output_notebook()

show(p)

In [29]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df, x='displ', y='hp', color='cyl',
            title="HP vs DISPL (shaded by CYL)", legend="top_left",
            legend_sort_field = 'color',
            legend_sort_direction = 'ascending',
            xlabel="Displacement",
            ylabel="Horsepower")

output_notebook()

show(p)

### 设置标记

In [30]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df, x='displ', y='hp', marker='square',
            title="HP vs DISPL", legend="top_left",
            xlabel="Displacement", ylabel="Horsepower")

output_notebook()

show(p)

In [31]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df, x='displ', y='hp', marker='cyl',
            title="HP vs DISPL (marked by CYL)", legend="top_left",
            xlabel="Displacement", ylabel="Horsepower")

output_notebook()

show(p)

In [32]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

p = Scatter(df, x='displ', y='hp', marker='cyl', color='cyl',
            title="HP vs DISPL (marked by CYL)", legend="top_left",
            xlabel="Displacement", ylabel="Horsepower")

output_notebook()

show(p)

In [33]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

tooltips=[
    ('Cylinders', '@cyl'),
    ('Displacement', '@displ'),
    ('Weight', '@weight'),
    ('Acceleration', '@accel')
]

p = Scatter(df, x='mpg', y='hp', title="HP vs MPG",
            xlabel="Miles Per Gallon", ylabel="Horsepower",
            tooltips=tooltips)

output_notebook()

show(p)

### 设置数据提示
* 适用于散点图和折线图

In [34]:
from bokeh.charts import Scatter, output_notebook, show
from bokeh.sampledata.autompg import autompg as df

tooltips=[
    ('Cylinders', '@cyl'),
    ('Displacement', '@displ'),
    ('Weight', '@weight'),
    ('Acceleration', '@accel')
]

p = Scatter(df, x='mpg', y='hp', title="HP vs MPG",
            xlabel="Miles Per Gallon", ylabel="Horsepower",
            tooltips=tooltips)

output_notebook()

show(p)

## 面积图

In [35]:
from bokeh.charts import Area, show, output_notebook

# create some example data
data = dict(
    python=[2, 3, 7, 5, 26, 221, 44, 233, 254, 265, 266, 267, 120, 111],
    pypy=[12, 33, 47, 15, 126, 121, 144, 233, 254, 225, 226, 267, 110, 130],
    jython=[22, 43, 10, 25, 26, 101, 114, 203, 194, 215, 201, 227, 139, 160],
)

area = Area(data, title="Area Chart", legend="top_left",
            xlabel='time', ylabel='memory')

output_notebook()
show(area)

## 弦图
* 用于刻画二维数据的连接关系

In [36]:
import pandas as pd
from bokeh.charts import Chord
from bokeh.io import show, output_notebook
from bokeh.sampledata.les_mis import data

nodes = data['nodes']
links = data['links']

nodes_df = pd.DataFrame(nodes)
links_df = pd.DataFrame(links)

source_data = links_df.merge(nodes_df, how='left', left_on='source', right_index=True)
source_data = source_data.merge(nodes_df, how='left', left_on='target', right_index=True)
source_data = source_data[source_data["value"] > 5]  # Select those with 5 or more connections

chord_from_df = Chord(source_data, source="name_x", target="name_y", value="value")
output_notebook()
show(chord_from_df)

In [37]:
source_data.head()

Unnamed: 0,source,target,value,group_x,name_x,group_y,name_y
1,2,0,8,1,Mlle.Baptistine,1,Myriel
2,3,0,10,1,Mme.Magloire,1,Myriel
3,3,2,6,1,Mme.Magloire,1,Mlle.Baptistine
47,23,11,9,3,Fantine,2,Valjean
49,24,11,7,4,Mme.Thenardier,2,Valjean


In [38]:
links_df.head()

Unnamed: 0,source,target,value
0,1,0,1
1,2,0,8
2,3,0,10
3,3,2,6
4,4,0,1


In [39]:
nodes_df.head()

Unnamed: 0,group,name
0,1,Myriel
1,1,Napoleon
2,1,Mlle.Baptistine
3,1,Mme.Magloire
4,1,CountessdeLo


## 多纳圈图
* 用于刻画多级表格

In [41]:
from bokeh.charts import Donut, show, output_notebook
from bokeh.charts.utils import df_from_json
from bokeh.sampledata.olympics2014 import data

import pandas as pd

# utilize utility to make it easy to get json/dict data converted to a dataframe
df = df_from_json(data)

# filter by countries with at least eight medal and sort by total medals
df = df[df['total'] > 8]
df = df.sort_values(by="total", ascending=False)
df = pd.melt(df, id_vars=['abbr'],
             value_vars=['bronze', 'silver', 'gold'],
             value_name='medal_count', var_name='medal')

# original example
d = Donut(df, label=['abbr', 'medal'], values='medal_count',
          text_font_size='8pt', hover_text='medal_count')

output_notebook()

show(d)

  df.sortlevel(inplace=True)


## 热力图

In [52]:
from bokeh.charts import HeatMap, output_file, show

# (dict, OrderedDict, lists, arrays and DataFrames are valid inputs)
data = {'fruit': ['apples']*3 + ['bananas']*3 + ['pears']*3,
        'fruit_count': [4, 5, 8, 1, 2, 4, 6, 5, 4],
        'sample': [1, 2, 3]*3}

hm = HeatMap(data, x='fruit', y='sample', values='fruit_count',
             title='Fruits', stat=None)

output_file('heatmap.html')
show(hm)

AttributeError: 'pandas._libs.interval.Interval' object has no attribute 'split'

## 折线图

In [46]:
import numpy as np
from bokeh.charts import Line, output_notebook, show

# (dict, OrderedDict, lists, arrays and DataFrames are valid inputs)
xyvalues = np.array([[2, 3, 7, 5, 26], [12, 33, 47, 15, 126], [22, 43, 10, 25, 26]])

line = Line(xyvalues, title="line", legend="top_left", ylabel='Languages')

output_notebook()
show(line)

## 时间序列图

In [51]:
from bokeh.charts import Step, show, output_file

# build a dataset where multiple columns measure the same thing
data = dict(
           stamp=[.33, .33, .34, .37, .37, .37, .37, .39, .41, .42,
                  .44, .44, .44, .45, .46, .49, .49],
           postcard=[.20, .20, .21, .23, .23, .23, .23, .24, .26, .27,
                     .28, .28, .29, .32, .33, .34, .35]
       )

# create a step chart where each column of measures receives a unique color and dash style
step = Step(data, y=['stamp', 'postcard'],
            dash=['stamp', 'postcard'],
            color=['stamp', 'postcard'],
            title="U.S. Postage Rates (1999-2015)",
            ylabel='Rate per ounce', legend=True)

output_file("steps.html")

show(step)