In [1]:
import numpy as np
from bokeh.plotting  import figure, show
from bokeh.layouts import gridplot
from collections import OrderedDict
from scipy.stats import probplot

In [2]:
def get_quantiles(data):
    # data is a list of lists
    final = []
    for i in data:
        q1 = np.percentile(i, 25)
        q2 = np.percentile(i, 50)
        q3 = np.percentile(i, 75)
        iqr = q3 - q1
        final.append((q1 - 1.5*iqr, q3 + 1.5*iqr,q1, q2,  q3,  [x for x in i if x < q1 - (1.5 * iqr) or x >q3 + (1.5 * iqr)  ]))
    return final


def make_box_raw(p, cats, upper, lower, q1, q2, q3, out):
    p.segment(cats, lower, cats, upper)
    p.vbar(cats, 0.7, q1, q2, fill_color="#E08E79", line_color="black")
    p.vbar(cats, 0.7, q2, q3, fill_color="#3B8686", line_color="black")
    p.rect(cats, lower, 0.2, 0.01, line_color="black")
    p.rect(cats, upper, 0.2, 0.01, line_color="black")
    for k, v in out.items():
        for point in v:
            p.circle([k], [point], size=6, color="#F38630", fill_alpha=0.6)
    return p

def zip_data(cats, data):
    zip_data = list(zip(*data))
    outs = OrderedDict()
    for counter, i in enumerate(cats):
        if len(zip_data[5][counter]) != 0:
            outs[i] = zip_data[5][counter]
    return zip_data, outs

def box_plot(cats, data, p = None):
    """
    cats: a list of labels
    data: a list of lists
    Example:
    cats = ['a', 'b']
    data = [[1, 2, 3], [4, 5, 6]]

    Returns: p, the figure()
    """
    assert len(cats) == len(data)
    assert isinstance(cats, list)
    assert isinstance(data, list)
    if not p:
        p = figure(tools="save", background_fill_color="#EFE8E2", x_range=cats)
    f, outs = zip_data(cats, get_quantiles(data))
    return make_box_raw(p, cats, f[0], f[1], f[2], f[3], f[4], outs)

In [3]:
 def make_qq_plot(nums):
    series1 = probplot(nums, dist="norm")
    p = figure(title="Normal QQ-Plot", background_fill_color="#E8DDCB")
    x = series1[0][0]
    y = series1[0][1]
    slope = series1[1][0]
    intercept = series1[1][1]
    y2 = [z * slope + intercept for z in x]
    p.scatter(series1[0][0],series1[0][1], fill_color="red")
    p.line(x,y2, line_width =2)
    return p


In [4]:
data = [62, 50, 53, 57, 41, 53, 55, 61, 59, 64, 50, 53, 64, 62, 50, 68, 
       54, 55, 57, 50, 55, 50, 56, 55, 46, 55, 53, 54, 52, 47, 47, 55,
       57, 48, 63, 57, 57, 55, 53, 59, 53, 52, 50, 55, 60, 50, 56, 58]

In [5]:
hist, edges = np.histogram(data, density=True)
p1 = figure()
p1.quad(top = hist, bottom=0, left=edges[:-1], right=edges[1:], alpha = .4)                                                                                       

In [6]:
cats = ['d']
data_ = [data]
p2 = box_plot(cats, data_)

In [7]:
p3 = make_qq_plot(data)

In [8]:
grid = gridplot([p1, p2, p3], ncols = 2,
    )
show(grid)