In [1]:
from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid, Circle, HoverTool, BoxSelectTool,\
    SaveTool, CustomJS, DatetimeAxis, LinearAxis, NumeralTickFormatter, FactorRange, CategoricalColorMapper
from bokeh.models.widgets.tables import (
    DataTable, TableColumn, IntEditor
)
from bokeh.models.widgets import DataTable, TableColumn, StringFormatter,\
        NumberFormatter, StringEditor, IntEditor, NumberEditor, SelectEditor,\
        CheckboxButtonGroup, CheckboxGroup, MultiSelect, RadioButtonGroup,\
        Select, Slider, Panel, Tabs, TextInput, Paragraph, Div, Button, Dropdown,\
        DateRangeSlider


from bokeh.palettes import Magma9, Category20b, Category20c
from bokeh.transform import factor_cmap

from bokeh.models.glyphs import Circle
from bokeh.io import curdoc, output_notebook, show as showio, output_file
from bokeh.plotting import show, figure
from bokeh.document import Document
from bokeh.models.layouts import Column, Row
from bokeh.embed import file_html
from bokeh.resources import INLINE
from bokeh.util.browser import view

import copy, json
import pandas as pd
import numpy as np
from datetime import date, datetime as dt

In [2]:
output_notebook()

In [None]:
def create_palettes(data_df, col, palette_type):
    uniques = data_df[data_df[col].notna()][col].unique()
    uniques = list(uniques.sort_values())
    color_map = {'field': col,
                      'transform': CategoricalColorMapper(palette=palette_type[len(uniques)],
                                                          factors=uniques)
                     }
    return color_map

In [23]:
#def create_country_pie(df, f_orf_type, filter_rows):
def create_bar(df, countries=[], genotypes=['Caliciviridae Norovirus GI'], orf_type="ORF1_sub", orf_subtypes=[], date_filter=[]):
    """
     The input DataFrame is plotted for 
     :param genotype: selects one of the two genotypes
     :param countries: countries
     :param orf_type: filter for ORF1, ORF2 col
     :param orf_subtype: filter rows for orf_type
     :param date_filter: start_date and end_date
    """
    tdf = df.copy()
    
    # Filter for Genotype
    g_categories = tdf['Genus-Genogroup'].cat.categories    
    if genotypes != []:
        g_codes = [g_categories.get_loc(c) for c in genotypes]
        tdf = tdf[tdf['Genus-Genogroup'].cat.codes.apply(lambda x: True if x in g_codes else False)]
    
    tdf['Genus-Genogroup'] = tdf['Genus-Genogroup'].cat.remove_unused_categories()
    genotypes = list(tdf['Genus-Genogroup'].cat.categories)
    
    #Create an absolut color map first for this genotype
    color_maps = {}
    for genotype in genotypes:
        palette_type = Category20b if genotype == 'Caliciviridae Norovirus GI' else Category20c
        color_maps[genotype] = create_palettes(tdf, orf_type , palette_type)
                
    # Filter date    
    if date_filter:
        tdf = tdf[tdf['Sample Date'].between(date_filter[0], date_filter[1])]  
            
    # ORF subtype   
    os_categories = tdf[orf_type].cat.categories
    if orf_subtypes != []:
        os_codes = [os_categories.get_loc(c) for c in orf_subtypes]
        tdf = tdf[tdf[orf_type].cat.codes.apply(lambda x: True if x in os_codes else False)]
        
    tdf[orf_type] = tdf[orf_type].cat.remove_unused_categories()
    orf_subtypes = list(tdf[orf_type].cat.categories)
    
    
    # Filter for countries
    # validate filter
    country_categories = tdf['Country'].cat.categories
    if countries != []:
        country_codes = [country_categories.get_loc(c) for c in countries]
        tdf = tdf[tdf['Country'].cat.codes.apply(lambda x: True if x in country_codes else False)]

    tdf['Country'] = tdf['Country'].cat.remove_unused_categories()
    countries = list(tdf['Country'].cat.categories)
    
    tdf = tdf.groupby(['Genus-Genogroup', orf_type]).count().dropna().astype('int') #
    tdf = tdf[['Sample Date', 'User', 'ORF1', 'ORF2']]
        
    num_cols = 2
    lay = Column()
    for ic, c in enumerate(genotypes):
        if ic%num_cols==0:
            lay.children.append(Row())
        p = create_genus_bar(tdf, c, orf_type, color_maps[c])
        lay.children[-1].children.append(p)

    min_date = df['Sample Date'].min()
    max_date = df['Sample Date'].max()
    #Title = Div(text="""<h2>Data between %s and %s</h2>"""%(min_date.strftime("%d/%m/%Y"), max_date.strftime("%d/%m/%Y")))

    #return Column(Title,Row(lay, total))
    return Column(Row(lay))

In [24]:
def create_genus_bar(data_df, genotype, orf_type, color_map):
    """
    Creates a bar chart for 
    :param genotype: one of the norovirus type
    :param orf_col: ORF1_sub or ORF2_sub
    :param color_map: 
    """
    # Get data for that country
    c_tdf = data_df.loc[genotype].copy()

    # create DataSource
    source = ColumnDataSource(c_tdf)
    orf_subtypes = source.data[orf_type].tolist()
    
    # create figure, with hover tool
    p = figure(x_range=orf_subtypes, tools='hover', tooltips="Count: @User; @%s"%(orf_type), title =genotype)
    
    # create bars
    p.vbar(x=orf_type, top='User', source=source, width=0.70, color=color_map)

    
#     p.axis.axis_label=None;
#     p.axis.visible=False;
#     p.grid.grid_line_color = None
    
    return p

## def create_orf1_country_all_pie(data_df, color_map):
#     total_df = data_df.reset_index(level=0).reset_index(level=0).groupby('ORF1_sub').count()
#     total_df['angle'] = total_df['ORF1']/total_df['ORF1'].sum() * 2*np.pi
#     source = ColumnDataSource(total_df)
#     total = figure( plot_width=260, title="All Countries", toolbar_location=None,
#            tools="hover", tooltips="@ORF1_sub: @ORF1", x_range=(-0.5, 1.0))
#     total.wedge(x=0, y=0, radius=CIRCLE_RADIUS,
#         start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
#         line_color="white", fill_color=color_map, legend_field='ORF1_sub', source=source)

#     total.axis.axis_label=None;
#     total.axis.visible=False;
#     total.grid.grid_line_color = None
    
#     return total



In [25]:
# df = pd.read_feather('noronet_clean.fr')

# # categories
# dict_categories = {}

# with open("categories.json", 'r') as f:
#     dict_categories = json.loads(f.read())

# gi_df = df[df['Genus-Genogroup']=='Caliciviridae Norovirus GI']
# gii_df = df[df['Genus-Genogroup']=='Caliciviridae Norovirus GII']

### Simple bar chart for all data

In [6]:
# gi_tdf = gi_df[gi_df['ORF1_sub']!='Unknown'][['Sample Date', 'ORF1', 'ORF1_sub', 'ORF2', 'ORF2_sub']].groupby('ORF1_sub').count() #
# gii_tdf = gii_df[gii_df['ORF1_sub']!='Unknown'][['Sample Date', 'ORF1', 'ORF1_sub', 'ORF2', 'ORF2_sub']].groupby('ORF1_sub').count() #

# GI_source = ColumnDataSource(gi_tdf)
# GI_types = GI_source.data['ORF1_sub'].tolist()
# GI_color_map = factor_cmap(field_name='ORF1_sub', palette=Category20b[len(GI_types)], factors=GI_types)

# GII_source = ColumnDataSource(gii_tdf)
# GII_types = GII_source.data['ORF1_sub'].tolist()
# GII_color_map = factor_cmap(field_name='ORF1_sub', palette=Category20c[len(GII_types)], factors=GII_types)

# p1 = figure(x_range=GI_types, tools='hover', tooltips="Count: @ORF1; @ORF1_sub", title ='Caliciviridae Norovirus GI')
# p1.vbar(x='ORF1_sub', top='ORF1', source=GI_source, width=0.70, color=GI_color_map)

# p2 = figure(x_range=GII_types, tooltips="Count: @ORF1", title ='Caliciviridae Norovirus GII')
# p2.vbar(x='ORF1_sub', top='ORF1', source=GII_source, width=0.70, color=GII_color_map)

# layout = Column(Row(p1,p2))
# #p
# #p.xaxis.axis_label = 'Country'
# show(layout)

### Bar plot years stacked

### Bar plot with FactorRange

## Catgorical Scatterplots

So far we have seen Categorical data used together with various bar glyphs. But Bokeh can use categorical coordinates for most any glyphs. Let's create a scatter plot with categorical coordinates on one axis. The `commits` data set simply has a series datetimes of GitHub commit. Additional columns to express the day and hour of day for each commit have already been added.