In [15]:
import pandas as pd

# main plotting package (must explicitly import submodules)
import bokeh.io
import bokeh.models
import bokeh.plotting

# Enable viewing Bokeh plots in the notebook
bokeh.io.output_notebook()

In [16]:
df = pd.read_csv('data/gfmt_sleep.csv', na_values='*')
df['insomnia'] = df['sci'] <= 16

df.groupby('insomnia')['percent correct'].median()

insomnia
False    85.0
True     75.0
Name: percent correct, dtype: float64

# Bokeh's grammar and our first plot

Constructing plot has 4 main steps
1. Creating a figure on which to populate glyphs
2. Defining a data source that is the reference used to place glyphs
3. Choose the kind of glyph you would like
4. Annotate the columns of data source to determine how they are used to place the glyph

In [17]:
# Create the figure, stored in var `p`
p = bokeh.plotting.figure(
    frame_width=400,
    frame_height=300,
    x_axis_label='confidence when correct',
    y_axis_label='confidence when incorrect'
)


In [18]:
source = bokeh.models.ColumnDataSource(df)

In [19]:
# we will choose dots or circles as our glyph
# confidence when correct specifies x-coordinate
# confidence when incorrect specifies y-coordinate

p.circle(
    source=source,
    x='confidence when correct',
    y='confidence when incorrect'
)

In [20]:
bokeh.io.show(p)

Building plot like this requires that data frame is tidy

# Coloring with other dimensions

Make same plot, but with orange circles for insomniacs and blue circles for normal sleepers


In [24]:
# We can loop through data frame grouped by insomnia and populate
# the glyphs as we go

# For convenience
x = 'confidence when correct'
y = 'confidence when incorrect'

# Two sources
source_insomnia = df.loc[df['insomnia'], :]
source_normal = df.loc[~df['insomnia'], :]

# make figure
p = bokeh.plotting.figure(
    frame_width = 400,
    frame_height=300,
    x_axis_label=x,
    y_axis_label=y
)

# add glyphs
p.circle(
    source=df.loc[~df['insomnia'], :],
    x=x,
    y=y,
    legend_label='normal sleepers'
)

p.circle(
    source=df.loc[df['insomnia'], :],
    x=x,
    y=y,
    color='orange',
    legend_label='insomniacs'
)

bokeh.io.show(p)

In [25]:
p.legend.location = 'top_left'
p.legend.click_policy = 'hide'

bokeh.io.show(p)

# Adding tooltips

Might want to have access to other information in our tidy data source if we hover over a glyph. Say we want to know participant number, gender, and age. Can add tooltips when we instantiate the figure

Syntax for tooltip is a list of 2-tuples. Each tuple represents the tooltip you want. First entry is label and second is column from the data source. Second entry must have @ symbol


In [26]:
# For convenience
x = 'confidence when correct'
y = 'confidence when incorrect'

# Make figure
p = bokeh.plotting.figure(
    frame_width=400,
    frame_height=300,
    x_axis_label=x,
    y_axis_label=y,
    tooltips=[
        ('p-number', '@{participant number}'),
        ('gender', '@gender'),
        ('age', '@age'),
    ]
)

# Add glyphs
p.circle(
    source=source_normal,
    x=x,
    y=y,
    legend_label='normal sleepers',
)

p.circle(
    source=source_insomnia,
    x=x,
    y=y,
    color='orange',
    legend_label='insomniacs',
)

p.legend.location = 'top_left'
p.legend.click_policy = 'hide'

bokeh.io.show(p)

# Saving Bokeh Plots

Save it to variety of formats
* PNG for presentations
* SVG for publications in the paper of the past
* HTML for the paper of the future or sharing with colleagues

* To save as PNG click disk icon
* To save as SVG change output backend to 'svg' then click disk icon

In [28]:
p.output_backend='svg'

bokeh.io.show(p)

In [30]:
p.output_backend = 'canvas'

Save to HTML use bokeh.io.save()

In [31]:
bokeh.io.save(
    p,
    filename='insomniac_confidence_correct.html',
    title='Bokeh plot',
    resources=bokeh.resources.CDN,
);

Resulting HTML page has all the interactivity of the plot

In [35]:
!pytest seq_features_and_tests.py

platform darwin -- Python 3.10.11, pytest-7.4.0, pluggy-1.0.0
rootdir: /Users/sethkohno/git/bootcamp
plugins: hypothesis-6.29.3, anyio-3.7.0
collected 1 item                                                               [0m

seq_features_and_tests.py [32m.[0m[32m                                              [100%][0m



In [36]:
!pytest -v seq_features_and_tests.py

platform darwin -- Python 3.10.11, pytest-7.4.0, pluggy-1.0.0 -- /Users/sethkohno/anaconda3/bin/python
cachedir: .pytest_cache
hypothesis profile 'default' -> database=DirectoryBasedExampleDatabase('/Users/sethkohno/git/bootcamp/.hypothesis/examples')
rootdir: /Users/sethkohno/git/bootcamp
plugins: hypothesis-6.29.3, anyio-3.7.0
collected 1 item                                                               [0m

seq_features_and_tests.py::test_number_negatives [32mPASSED[0m[32m                  [100%][0m



## Separating tests in functional units