In [1]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure

In [2]:
output_notebook()

# Overview
We've seen how Bokeh can work well with Python lists, NumPy arrays, Pandas series, etc. At lower levels, these inputs are converted to a Bokeh ColumnDataSource. This data type is the central data source object used throughout Bokeh. Although Bokeh often creates them for us transparently, there are times when it is useful to create them explicitly.

In later sections we will see features like hover tooltips, computed transforms, and CustomJS interactions that make use of the ColumnDataSource, so let's take a quick look now.

# Creating with Python Dicts
The columndatasource can be imported from bokeh.models:

In [3]:
from bokeh.models import ColumnDataSource

The ColumnDataSource is a mapping of column names (strings) to sequences of values. Here is a simple example. The mapping is provided by passing a Python dict with string keys and simple Python lists as values. The values could also be NumPy arrays, or Pandas sequences.

In [4]:
source=ColumnDataSource(data={
    'x' : [1, 2, 3, 4, 5],
    'y' : [3, 7, 8, 5, 1],
})

Up until now we have called functions like p.circle by passing in literal lists or arrays of data directly, when we do this, Bokeh creates a ColumnDataSource for us, automatically. But it is possible to specify a ColumnDataSource explicitly by passing it as the source argument to a glyph method. Whenever we do this, if we want a property (like "x" or "y" or "fill_color") to have a sequence of values, we pass the name of the column that we would like to use for a property:

In [5]:
p=figure(plot_width=400,plot_height=400)
p.circle("x","y",size=20,source=source)
show(p)

# Creating with Pandas DataFrames
It's also simple to create ColumnDataSource objects directly from Pandas data frames. To do this, just pass the data frame to ColumnDataSource when you create it:

In [10]:
from bokeh.sampledata.iris import flowers as df
source=ColumnDataSource(df)

In [35]:
p=figure(plot_width=400,plot_height=400)
p.circle("petal_length","petal_width",source=source)
show(p)

In [12]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [14]:
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
sepal_length    150 non-null float64
sepal_width     150 non-null float64
petal_length    150 non-null float64
petal_width     150 non-null float64
species         150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB


In [24]:
import pandas as pd
df=pd.DataFrame(df)
pd.pivot_table(df,values="sepal_length",index="species",aggfunc="mean")

Unnamed: 0_level_0,sepal_length
species,Unnamed: 1_level_1
setosa,5.006
versicolor,5.936
virginica,6.588


In [39]:
p=figure(plot_width=400,plot_height=400)
p.circle("sepal_length","petal_length",
         source=source,color='firebrick',fill_alpha=0.2)
show(p)

In [40]:
# create a column data source with
#the autompg sample data frame and plot it
from bokeh.sampledata.autompg import autompg_clean as df
source=ColumnDataSource(df)



In [41]:
df.head()

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name,mfr
0,18.0,8,307.0,130,3504,12.0,70,North America,chevrolet chevelle malibu,chevrolet
1,15.0,8,350.0,165,3693,11.5,70,North America,buick skylark 320,buick
2,18.0,8,318.0,150,3436,11.0,70,North America,plymouth satellite,plymouth
3,16.0,8,304.0,150,3433,12.0,70,North America,amc rebel sst,amc
4,17.0,8,302.0,140,3449,10.5,70,North America,ford torino,ford


In [46]:
pd.pivot_table(df,index=["origin","mfr"],values=['hp','yr','accel'],aggfunc="mean")

Unnamed: 0_level_0,Unnamed: 1_level_0,accel,hp,yr
origin,mfr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Asia,datsun,16.408696,83.826087,76.869565
Asia,honda,15.992308,67.846154,78.923077
Asia,mazda,16.091667,77.666667,78.75
Asia,nissan,14.5,88.0,82.0
Asia,subaru,16.925,73.5,78.0
Asia,toyota,16.038462,83.961538,76.346154
Europe,audi,15.942857,86.714286,75.714286
Europe,bmw,12.65,111.5,73.5
Europe,fiat,15.65,73.375,74.25
Europe,mercedes,19.533333,88.0,78.333333


In [47]:
df.head()

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name,mfr
0,18.0,8,307.0,130,3504,12.0,70,North America,chevrolet chevelle malibu,chevrolet
1,15.0,8,350.0,165,3693,11.5,70,North America,buick skylark 320,buick
2,18.0,8,318.0,150,3436,11.0,70,North America,plymouth satellite,plymouth
3,16.0,8,304.0,150,3433,12.0,70,North America,amc rebel sst,amc
4,17.0,8,302.0,140,3449,10.5,70,North America,ford torino,ford


In [62]:
p=figure(plot_height=400,plot_width=400)
p.circle(x="mpg",y="weight",source=source)
p.xaxis.axis_label="mpg"
p.yaxis.axis_label="weight"
show(p)

In [59]:
p=figure(plot_width=400,plot_height=400)
p.circle("weight","displ",source=source)
p.xaxis.axis_label="weight"
p.yaxis.axis_label="displ"
show(p)

In [63]:
p=figure(plot_width=400,plot_height=400)
p.circle("weight","hp",source=source)
p.xaxis.axis_label="weight"
p.yaxis.axis_label="hp"
show(p)

In [65]:
df.hp.describe()

count    392.000000
mean     104.469388
std       38.491160
min       46.000000
25%       75.000000
50%       93.500000
75%      126.000000
max      230.000000
Name: hp, dtype: float64

In [70]:
x=pd.cut(df.hp,10)