In [1]:
import numpy as np
import pandas as pd
from bokeh.plotting import *

# generate some random data
cats = list("abc")
yy = np.random.randn(2000)

g = np.random.choice(cats, 2000)

for i, l in enumerate(cats):
    yy[g == l] += i // 2

# replace own data by score = value, group = coresponding data
df = pd.DataFrame(dict(score=yy, group=g))


# Find the quartiles and IQR each category
groups = df.groupby('group')
q1 = groups.quantile(q=0.25)
q2 = groups.quantile(q=0.5)
q3 = groups.quantile(q=0.75)
iqr = q3 - q1
upper = q3 + 1.5*iqr
lower = q1 - 1.5*iqr

# find the outliers for each category
def outliers(group):
   cat = group.name
   return group[(group.score > upper.loc[cat][0]) | (group.score < lower.loc[cat][0])]['score']
out = groups.apply(outliers).dropna()


# Prepare outlier data for plotting,get coordinate for every outlier.
outx = []
outy = []
for cat in cats:
    # only add outliers if they exist
    if not out.loc[cat].empty:
        for value in out[cat]:
            outx.append(cat)
            outy.append(value)


output_notebook()


p = figure(tools="previewsave", background_fill_color="#EFE8E2", title="title", x_range=cats, toolbar_location = None)



# If no outliers, shrink lengths of stems to be no longer than the minimums or maximums
qmin = groups.quantile(q=0.00)
qmax = groups.quantile(q=1.00)
upper.score = [min([x,y]) for (x,y) in zip(list(qmax.iloc[:,0]),upper.score) ]
lower.score = [max([x,y]) for (x,y) in zip(list(qmin.iloc[:,0]),lower.score) ]


# stems
p.segment(cats, upper.score, cats, q3.score, line_width=1, line_color="black")
p.segment(cats, lower.score, cats, q1.score, line_width=1, line_color="black")


# boxes
p.rect(cats, (q3.score+q2.score)/2, 0.7, q3.score-q2.score,
    fill_color='lightsteelblue', line_width=1, line_color="black")
p.rect(cats, (q2.score+q1.score)/2, 0.7, q2.score-q1.score,
    fill_color="lightsteelblue", line_width=1, line_color="black")

# whisters (almost-0 height rects simpler than segments)
p.rect(cats, lower.score, 0.1, 0.01, line_color="black")
p.rect(cats, upper.score, 0.1, 0.01, line_color="black")


# outliers
p.circle(outx, outy, size=6, fill_alpha=0.6, color = "red")

# Title's colour and typeface
p.title.text_color = "black"
p.title.text_font = "default"
p.title.align = "center"        #other options: center/right
p.title.text_font_size = "80pt"


# label text setting
p.xaxis.axis_label_text_font = "default"
p.xaxis.axis_label_text_font_size = "10pt"


# axis text setting
p.xaxis.major_label_text_font_size = "10pt"
p.yaxis.major_label_text_color = "black"
p.yaxis.major_label_text_font_size = "8pt"

# grid line adjust
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = "white"
p.grid.grid_line_width = 2
p.xaxis.major_label_text_font_size="12pt"

show(p)