In [1]:
import numpy as np
import pandas as pd

import bokeh_catplot

import bokeh.plotting
import bokeh.io

bokeh.io.output_notebook()

In [2]:
df = pd.read_csv('data/frog_tongue_adhesion.csv', comment='#')
df.head()

Unnamed: 0,date,ID,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (mm2),contact area with mucus / contact area without mucus,contact pressure (Pa),adhesive strength (Pa)
0,2013_02_26,I,3,1205,46,1.95,-785,884,1.27,-0.29,387,70,0.82,3117,-2030
1,2013_02_26,I,4,2527,44,4.08,-983,248,1.59,-0.181,101,94,0.07,24923,-9695
2,2013_03_01,I,1,1745,34,2.82,-850,211,1.37,-0.157,83,79,0.05,21020,-10239
3,2013_03_01,I,2,1556,41,2.51,-455,1025,0.74,-0.17,330,158,0.52,4718,-1381
4,2013_03_01,I,3,493,36,0.8,-974,499,1.57,-0.423,245,216,0.12,2012,-3975


# Box plot

In [3]:
p = bokeh_catplot.box(
    data=df,
    cats='ID',
    val='impact force (mN)',
    horizontal=True,
)

bokeh.io.show(p)

# Strip plot/jitter plot

In [4]:
p = bokeh_catplot.strip(
    data=df,
    cats='ID',
    val='impact force (mN)',
    horizontal=True,
    jitter=True,
)

bokeh.io.show(p)

This style of plot plots all the data.

In [5]:
p = bokeh_catplot.strip(
    data=df,
    cats=['ID', 'trial number'],   
    val='impact force (mN)',
    color_column='trial number',
    frame_width=550,
    tooltips=[
        ('adh force', '@{adhesive force (mN)}')
    ],
)

bokeh.io.show(p)

## Overlaying jitter and boxplot

In [6]:
p = bokeh_catplot.strip(
    data=df,
    cats='ID',   
    val='impact force (mN)',
    horizontal=True,
    jitter=True,
)

p = bokeh_catplot.box(
    data=df,
    cats='ID',   
    val='impact force (mN)',
    horizontal=True,
    # Populate the boxplot on the plot you already made
    p=p,
    box_kwargs=dict(fill_color=None, line_color='gray'),
    median_kwargs=dict(line_color='gray'),
    whisker_kwargs=dict(line_color='gray'),
)

bokeh.io.show(p)

# Histogram

In [7]:
p = bokeh_catplot.histogram(
    data=df,
#    cats='ID',   
    val='impact force (mN)',
)

bokeh.io.show(p)

Beware binning bias...

Also, because of binning, the histogram does not plot all of the data.

# ECDF

The CDF (cumulative distribution function) is the integral of the PDF (probability distribution function). CDF(x) = probability of measurement being less than x.

The CDF contains all the information of the PDF (e.g. inflection in CDF is peak in the PDF).

A histogram approximates the PDF; the ECDF (empirical cumulative distribution function) approximates the CDF. 

ECDF(x) = fraction of measurements <= x.

In [8]:
p = bokeh_catplot.ecdf(
    data=df,
    val='impact force (mN)',
)

bokeh.io.show(p)

We show every point, with no binning bias.

In [9]:
p = bokeh_catplot.ecdf(
    data=df,
    cats='ID',
    val='impact force (mN)',
    style='staircase',
)

bokeh.io.show(p)

In [10]:
p = bokeh_catplot.ecdf(
    data=df,
    cats='ID',
    val='impact force (mN)',
    kind='colored',
)

bokeh.io.show(p)

In [11]:
%load_ext watermark
%watermark -v -p numpy,pandas,bokeh,bokeh_catplot,jupyterlab

CPython 3.7.7
IPython 7.13.0

numpy 1.18.1
pandas 0.24.2
bokeh 2.0.2
bokeh_catplot 0.1.7
jupyterlab 1.2.6
