In [87]:
from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid, Circle, HoverTool, BoxSelectTool,\
    SaveTool, CustomJS, DatetimeAxis, LinearAxis, NumeralTickFormatter, FactorRange, CategoricalColorMapper
from bokeh.models.widgets.tables import (
    DataTable, TableColumn, IntEditor
)
from bokeh.models.widgets import DataTable, TableColumn, StringFormatter,\
        NumberFormatter, StringEditor, IntEditor, NumberEditor, SelectEditor,\
        CheckboxButtonGroup, CheckboxGroup, MultiSelect, RadioButtonGroup,\
        Select, Slider, Panel, Tabs, TextInput, Paragraph, Div, Button, Dropdown,\
        DateRangeSlider

from bokeh.models.annotations import Label, LabelSet

from bokeh.palettes import Magma9, Category20b, Category20c
from bokeh.transform import factor_cmap

from bokeh.models.glyphs import Circle
from bokeh.io import curdoc, output_notebook, show as showio, output_file
from bokeh.plotting import show, figure
from bokeh.document import Document
from bokeh.models.layouts import Column, Row
from bokeh.embed import file_html
from bokeh.resources import INLINE
from bokeh.util.browser import view
from bokeh.transform import cumsum

import copy, json
import pandas as pd
import numpy as np
from datetime import date, datetime as dt

In [2]:
output_notebook()

In [70]:
#Create an absolut color map first for genotypes
def create_palette(tdf, col, palette_type):
    uniques = tdf[col][tdf[col].notna()].unique()
    uniques = list(uniques.sort_values())
    return {'field': col,
                  'transform': CategoricalColorMapper(palette=palette_type[len(uniques)],
                                                      factors=uniques)
                 }
    

class chart():
    
    # dataframe
    df = pd.DataFrame()
    filtered_df = pd.DataFrame()
    
    # to ensure the same color for subtypes
    color_maps = {}
    
    selected_genotype = 'Caliciviridae Norovirus GI'
    selected_countries = []
    selected_orf_type = "ORF1_sub"
    selected_orf_subtypes = []
    date_filter = []
    
    def __init__(self, df, orf_type="ORF1_sub"):
        self.df = df
        self.selected_orf_type = orf_type
        
        # This works only for one genotype at once
        #self.df['Genus-Genogroup'] = self.df[self.df['Genus-Genogroup'].cat.remove_unused_categories()]
        genotype_categories = self.df['Genus-Genogroup'].cat.categories
        for genotype in list(genotype_categories):
            palette_type = Category20b if genotype == 'Caliciviridae Norovirus GI' else Category20c
            #genotype_codes = [genotype_categories.get_loc(c) for c in self.selected_genotype]
            #tdf = self.df[self.df['Genus-Genogroup'].cat.codes.apply(lambda x: True if x in genotype_codes else False)]
            tdf = self.df[self.df['Genus-Genogroup'].cat.codes == genotype_categories.get_loc(genotype)]
            if tdf.shape[0]>0:
                self.color_maps[genotype] = create_palette(tdf, orf_type , palette_type)     

#     def create_bar(self):    
#         num_cols = 2
#         lay = Column()
#         for ic, c in enumerate(self.plot_by):
#             if ic%num_cols==0:
#                 lay.children.append(Row())
#             p = create_genus_bar(data_df, c, self.selected_orf_type, color_maps[c])
#             lay.children[-1].children.append(p)

#         return Column(Row(lay))
    
    def filter_df(self):
        """
         The input DataFrame is plotted for 
         :param genotype: selects one of the two genotypes
         :param countries: countries
         :param orf_type: filter for ORF1, ORF2 col
         :param orf_subtype: filter rows for orf_type
         :param date_filter: start_date and end_date
        """
        tdf = self.df.copy()

        # Filter for Genotype
        tdf = tdf[tdf['Genus-Genogroup']==self.selected_genotype]

        # Filter date    
        if self.date_filter:
            #tdf = tdf[tdf['Sample Date'].between(self.date_filter[0], self.date_filter[1])]  
            tdf = tdf[ (tdf['Sample Date'] >= self.date_filter[0]) & (tdf['Sample Date'] <= self.date_filter[1])]  

        # ORF subtype
        orf_type_categories = tdf[self.selected_orf_type].cat.categories
        if self.selected_orf_subtypes != []:
            orf_type_codes = [orf_type_categories.get_loc(c) for c in self.selected_orf_subtypes]
            tdf = tdf[tdf[self.selected_orf_type].cat.codes.apply(lambda x: True if x in orf_type_codes else False)]

        tdf[self.selected_orf_type] = tdf[self.selected_orf_type].cat.remove_unused_categories()
        #orf_subtypes = list(tdf[self.selected_orf_type].cat.categories)


        # Filter for countries
        # validate filter
        country_categories = tdf['Country'].cat.categories
        if self.selected_countries != []:
            country_codes = [country_categories.get_loc(c) for c in self.selected_countries]
            tdf = tdf[tdf['Country'].cat.codes.apply(lambda x: True if x in country_codes else False)]

        tdf['Country'] = tdf['Country'].cat.remove_unused_categories()
        self.selected_countries = list(tdf['Country'].cat.categories)

        self.filtered_df = tdf #[['Sample Date', 'ORF1', 'ORF2']]

In [71]:
class orf_subtype_bar_chart(chart):
    
            
    def create_plot(self):
        """
        Creates a bar chart for 
        :param genotype: one of the norovirus type
        :param orf_col: ORF1_sub or ORF2_sub
        """
        
        c_tdf = self.filtered_df[['Genus-Genogroup','Sample Date', 'ORF1', self.selected_orf_type]]
        
        #c_tdf = c_tdf.groupby(['Genus-Genogroup', self.selected_orf_type]).count().dropna().astype('int') #
        c_tdf = c_tdf.groupby(['Genus-Genogroup', self.selected_orf_type]).count().dropna().astype('int').reset_index(0) #
        
        # create DataSource
        source = ColumnDataSource(c_tdf)
        orf_subtypes = source.data[self.selected_orf_type].tolist()

        # create figure, with hover tool
        p = figure(x_range=orf_subtypes, tools='hover', tooltips="Count: @ORF1; @%s"%(self.selected_orf_type), title = self.selected_genotype)
        
        # create bars
        p.vbar(x=self.selected_orf_type, top='ORF1', source=source, width=0.70, color=self.color_maps[self.selected_genotype])

        #     p.axis.axis_label=None;
        #     p.axis.visible=False;
        #     p.grid.grid_line_color = None

        return p

In [72]:
class factor_bar_chart(chart):
                
#     def filter_df(self):#,  groupby=['Genus-Genogroup']):
#         """
#          The input DataFrame is plotted for 
#          :param genotype: selects one of the two genotypes
#          :param countries: countries
#          :param orf_type: filter for ORF1, ORF2 col
#          :param orf_subtype: filter rows for orf_type
#          :param date_filter: start_date and end_date
#         """
        
#         tdf = self.df.copy()

#         # Filter for Genotype
#         tdf = tdf[tdf['Genus-Genogroup']==self.selected_genotype]

#         # Filter date    
#         if self.date_filter:
#             #tdf = tdf[tdf['Sample Date'].between(self.date_filter[0], self.date_filter[1])]  
#             tdf = tdf[ (tdf['Sample Date'] >= self.date_filter[0]) & (tdf['Sample Date'] <= self.date_filter[1])]  

#         # ORF subtype
#         orf_type_categories = tdf[self.selected_orf_type].cat.categories
#         if self.selected_orf_subtypes != []:
#             orf_type_codes = [orf_type_categories.get_loc(c) for c in self.selected_orf_subtypes]
#             tdf = tdf[tdf[self.selected_orf_type].cat.codes.apply(lambda x: True if x in orf_type_codes else False)]

#         tdf[self.selected_orf_type] = tdf[self.selected_orf_type].cat.remove_unused_categories()

#         # Filter for countries
#         # validate filter
#         country_categories = tdf['Country'].cat.categories
#         if self.selected_countries != []:
#             country_codes = [country_categories.get_loc(c) for c in self.selected_countries]
#             tdf = tdf[tdf['Country'].cat.codes.apply(lambda x: True if x in country_codes else False)]

#         tdf['Country'] = tdf['Country'].cat.remove_unused_categories()
#         self.filtered_df = tdf
        
    
    def create_plot(self):
        """
        Creates a bar chart for 

        """
        # Get data for that country
        c_tdf = self.filtered_df[['Genus-Genogroup','Sample Date', 'ORF1', self.selected_orf_type]] #.reset_index(1)
        c_tdf.loc[:, 'Sample Date'] = c_tdf['Sample Date'].apply(lambda x: "'%s"%dt.strftime(x, format("%Y"))[2:])
        c_tdf = c_tdf.groupby(['Sample Date', self.selected_orf_type]).count() #
        
        c_tdf = c_tdf.reset_index(level=0).reset_index(level=0)
        
        # create DataSource
        source = ColumnDataSource(c_tdf)
        orf_subtypes = source.data[self.selected_orf_type].tolist()
        
        factors = [(r[self.selected_orf_type], r['Sample Date']) for i, r in c_tdf[['Sample Date', self.selected_orf_type]].iterrows() ]
        source.add(list(factors), 'factors')
        
        # create figure, with hover tool
        p = figure(x_range=FactorRange(*factors),
           #plot_height=250,
           plot_width=1200,
           tools="hover",
           tooltips=[("Count:"," @ORF1"), ("Year: "," @{Sample Date}")],
          title = self.selected_genotype)
        
        # create bars
        p.vbar(x='factors', top='ORF1', width=0.9, alpha=0.5, source=source, color=self.color_maps[self.selected_genotype])
        p.xaxis.axis_label="Types"
        p.yaxis.axis_label="Nr of outbreaks"

        return p

In [88]:
# D = Dash_bar('noronet_clean.fr', "categories.json")

# BC = factor_bar_chart(D.df)
# #BC = orf_subtype_bar_chart(D.df)
# BC.selected_genotype =  'Caliciviridae Norovirus GI'

#     # ORF type radio button
# BC.selected_orf_type = 'ORF1_sub'

#     # ORF subtype checkbox
# orf_subtypes = []

# BC.filter_df()#groupby=['Genus-Genogroup'])
# pie_plot = BC.create_genus_bar()


# show(pie_plot)

In [131]:
class country_pie(chart):
    
    CIRCLE_RADIUS = 0.48
    
# def create_country_pie(df, countries=[], genotype='Caliciviridae Norovirus GI', orf_type="ORF1_sub", orf_subtypes=[], date_filter=[]):
#     """
#      The input DataFrame is plotted for 
#      :param genotype: selects one of the two genotypes
#      :param countries: countries
#      :param orf_type: filter for ORF1, ORF2 col
#      :param orf_subtype: filter rows for orf_type
#      :param date_filter: start_date and end_date
#     """
#     tdf = df.copy()
    
#     # Filter for Genotype
#     g_categories = tdf['Genus-Genogroup'].cat.categories    
#     tdf = tdf[tdf['Genus-Genogroup'].cat.codes==g_categories.get_loc(genotype)]
    
#     #tdf['Genus-Genogroup'] = tdf['Genus-Genogroup'].cat.remove_unused_categories()
#     #genotypes = list(tdf['Genus-Genogroup'].cat.categories)
    
#     #Create an absolut color map first for this genotype
#     palette_type = Category20b if genotype == 'Caliciviridae Norovirus GI' else Category20c
#     color_map = create_palettes(tdf, orf_type , palette_type)
    
#     # Filter date    
#     if date_filter:
#         tdf = tdf[tdf['Sample Date'].between(date_filter[0], date_filter[1])]  
    
#     # ORF subtype   
#     os_categories = tdf[orf_type].cat.categories
#     if orf_subtypes != []:
#         os_codes = [os_categories.get_loc(c) for c in orf_subtypes]
#         tdf = tdf[tdf[orf_type].cat.codes.apply(lambda x: True if x in os_codes else False)]
        
#     tdf[orf_type] = tdf[orf_type].cat.remove_unused_categories()
#     orf_subtypes = list(tdf[orf_type].cat.categories)
    
    
#     # Filter for countries
#     # validate filter
#     country_categories = tdf['Country'].cat.categories
#     if countries != []:
#         country_codes = [country_categories.get_loc(c) for c in countries]
#         tdf = tdf[tdf['Country'].cat.codes.apply(lambda x: True if x in country_codes else False)]

#     tdf['Country'] = tdf['Country'].cat.remove_unused_categories()
#     countries = list(tdf['Country'].cat.categories)
    
#     tdf = tdf.groupby(['Country', orf_type]).count().dropna().astype('int') #
#     tdf = tdf[['Sample Date', 'ORF1', 'ORF2']]
   
    def create_bar(self):    
        num_cols = 3
        lay = Column()
        for ic, c in enumerate(self.selected_countries):
            if ic%num_cols==0:
                lay.children.append(Row())
            p = self.create_orf1_country_pie(country = c)
            lay.children[-1].children.append(p)

        return Column(Row(lay))
    
    def create_orf1_country_pie(self, country):
        """
        Creates a pie chart for 
        :param country: one selected country
        :param color_map: 
        """
        # Get data for that country
        #print(self.filtered_df[['ORF1','ORF1_sub']].head())
        
        c_tdf = self.filtered_df.loc[country]
        
        # add angles for the slices
        c_tdf['angle'] = c_tdf['ORF1']/c_tdf['ORF1'].sum() * 2*np.pi

        # add labels for the slices
        c_tdf['wedge_label_ORF1'] = c_tdf['ORF1'].astype('str').str.pad(18, side = "left")

        # create DataSource
        source = ColumnDataSource(c_tdf)

        # create figure, with hover tool
        p = figure( plot_height=270, plot_width=300, title="%s"%country, toolbar_location=None,
                   tools="hover", tooltips="@ORF1_sub: @ORF1", x_range=(-0.5, 1.0))

        # create slices
        p.wedge(x=0, y=0, radius=self.CIRCLE_RADIUS,
                start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
                line_color="white", fill_color=self.color_maps[self.selected_genotype], source=source)

        # create slice labels
        labels = LabelSet(x=0, y=0, text='wedge_label_ORF1',
            angle=cumsum('angle', include_zero=True), source=source,
                          #background_fill_color='green', background_fill_alpha=0.2,
                          render_mode='canvas')
        p.add_layout(labels)

        # create sum label
        label = Label(x=self.CIRCLE_RADIUS, y=self.CIRCLE_RADIUS, x_offset=0, text="Sum: %d"%c_tdf['ORF1'].sum())
        p.add_layout(label) 

        p.axis.axis_label=None;
        p.axis.visible=False;
        p.grid.grid_line_color = None

        return p

    def create_orf1_country_all_pie(self):
        total_df = self.filtered_df.reset_index(level=1).reset_index(level=0).groupby('ORF1_sub').count()
        total_df['angle'] = total_df['ORF1']/total_df['ORF1'].sum() * 2*np.pi
        source = ColumnDataSource(total_df)
        total = figure( plot_width=260, title="All Countries", toolbar_location=None,
               tools="hover", tooltips="@ORF1_sub: @ORF1", x_range=(-0.5, 1.0))
        total.wedge(x=0, y=0, radius=self.CIRCLE_RADIUS,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", fill_color=self.color_maps[self.selected_genotype], legend_field='ORF1_sub', source=source)

        total.axis.axis_label=None;
        total.axis.visible=False;
        total.grid.grid_line_color = None

        return total

    
    def create_plot(self):
        self.filtered_df = self.filtered_df.groupby(['Country', self.selected_orf_type]).count().dropna().astype('int')
        self.filtered_df = self.filtered_df[['ORF1']]
        lay = self.create_bar()
        total = self.create_orf1_country_all_pie()

        min_date = self.df['Sample Date'].min()
        max_date = self.df['Sample Date'].max()

        return Column(Row(lay, total))

In [130]:
D = Dash_bar('noronet_clean.fr', "categories.json")

#BC = factor_bar_chart(D.df)
#BC = orf_subtype_bar_chart(D.df)
BC = country_pie(D.df)
BC.selected_genotype =  'Caliciviridae Norovirus GI'

    # ORF type radio button
BC.selected_orf_type = 'ORF1_sub'

    # ORF subtype checkbox
orf_subtypes = []

BC.filter_df()#groupby=['Genus-Genogroup'])
pie_plot = BC.create_plot()


show(pie_plot)


In [58]:
class outbreak_settincg_bar_chart():
    
    def create_genus_bar(self):
        """
        Creates a bar chart for 
        :param genotype: one of the norovirus type
        :param orf_col: ORF1_sub or ORF2_sub
        """
        # Get data for that country
        c_tdf = self.filtered_df.loc[self.selected_genotype].copy()
        
        # create DataSource
        source = ColumnDataSource(c_tdf)
        orf_subtypes = source.data[self.selected_orf_type].tolist()

        # create figure, with hover tool
        p = figure(x_range=orf_subtypes, tools='hover', tooltips="Count: @User; @%s"%(self.selected_orf_type), title = self.selected_genotype)
        
        # create bars
        p.vbar(x=self.selected_orf_type, top='User', source=source, width=0.70, color=self.color_maps[self.selected_genotype])

        
        
        #     p.axis.axis_label=None;
        #     p.axis.visible=False;
        #     p.grid.grid_line_color = None

        return p

In [24]:


## def create_orf1_country_all_pie(data_df, color_map):
#     total_df = data_df.reset_index(level=0).reset_index(level=0).groupby('ORF1_sub').count()
#     total_df['angle'] = total_df['ORF1']/total_df['ORF1'].sum() * 2*np.pi
#     source = ColumnDataSource(total_df)
#     total = figure( plot_width=260, title="All Countries", toolbar_location=None,
#            tools="hover", tooltips="@ORF1_sub: @ORF1", x_range=(-0.5, 1.0))
#     total.wedge(x=0, y=0, radius=CIRCLE_RADIUS,
#         start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
#         line_color="white", fill_color=color_map, legend_field='ORF1_sub', source=source)

#     total.axis.axis_label=None;
#     total.axis.visible=False;
#     total.grid.grid_line_color = None
    
#     return total



In [25]:
# df = pd.read_feather('noronet_clean.fr')

# # categories
# dict_categories = {}

# with open("categories.json", 'r') as f:
#     dict_categories = json.loads(f.read())

# gi_df = df[df['Genus-Genogroup']=='Caliciviridae Norovirus GI']
# gii_df = df[df['Genus-Genogroup']=='Caliciviridae Norovirus GII']

### Simple bar chart for all data

In [6]:
# gi_tdf = gi_df[gi_df['ORF1_sub']!='Unknown'][['Sample Date', 'ORF1', 'ORF1_sub', 'ORF2', 'ORF2_sub']].groupby('ORF1_sub').count() #
# gii_tdf = gii_df[gii_df['ORF1_sub']!='Unknown'][['Sample Date', 'ORF1', 'ORF1_sub', 'ORF2', 'ORF2_sub']].groupby('ORF1_sub').count() #

# GI_source = ColumnDataSource(gi_tdf)
# GI_types = GI_source.data['ORF1_sub'].tolist()
# GI_color_map = factor_cmap(field_name='ORF1_sub', palette=Category20b[len(GI_types)], factors=GI_types)

# GII_source = ColumnDataSource(gii_tdf)
# GII_types = GII_source.data['ORF1_sub'].tolist()
# GII_color_map = factor_cmap(field_name='ORF1_sub', palette=Category20c[len(GII_types)], factors=GII_types)

# p1 = figure(x_range=GI_types, tools='hover', tooltips="Count: @ORF1; @ORF1_sub", title ='Caliciviridae Norovirus GI')
# p1.vbar(x='ORF1_sub', top='ORF1', source=GI_source, width=0.70, color=GI_color_map)

# p2 = figure(x_range=GII_types, tooltips="Count: @ORF1", title ='Caliciviridae Norovirus GII')
# p2.vbar(x='ORF1_sub', top='ORF1', source=GII_source, width=0.70, color=GII_color_map)

# layout = Column(Row(p1,p2))
# #p
# #p.xaxis.axis_label = 'Country'
# show(layout)

### Bar plot years stacked

### Bar plot with FactorRange

## Catgorical Scatterplots

So far we have seen Categorical data used together with various bar glyphs. But Bokeh can use categorical coordinates for most any glyphs. Let's create a scatter plot with categorical coordinates on one axis. The `commits` data set simply has a series datetimes of GitHub commit. Additional columns to express the day and hour of day for each commit have already been added.