In [1]:
import pandas as pd
import numpy as np
from datetime import date, datetime as dt

from bokeh.io import curdoc, output_notebook 
from bokeh.plotting import show, figure
#from bokeh.document import Document

from bokeh.palettes import Magma9, Category20b, Category20c, Viridis256
from bokeh.models.annotations import Label, LabelSet

from bokeh.transform import cumsum, factor_cmap
from bokeh.models import ColumnDataSource, HoverTool, FactorRange, Plot, \
    CategoricalColorMapper, Legend, Circle, Div, Column, Row, Panel, Tabs    
    #Grid, BoxSelectTool,   SaveTool, CustomJS, DatetimeAxis, LinearAxis, NumeralTickFormatter,
    
from bokeh.models.widgets.tables import (
    DataTable, TableColumn, IntEditor
)


### This is a base class for all plots

In [2]:
#Create an absolut color map first for genotypes
def create_palette(tdf, col, palette_type):
    uniques = tdf[col][tdf[col].notna()].unique()
    uniques = list(uniques.sort_values())
    n_u = len(uniques)
    #print(uniques)
    return {'field': col,
                  'transform': CategoricalColorMapper(palette=palette_type[max(5,min(20,len(uniques)))],
                                                      factors=uniques)
                 }
 

class chart():
    """
    Base class for plots
    """
    
    # dataframe
    df = pd.DataFrame()
    filtered_df = pd.DataFrame()
    
    # to ensure the same color for subtypes
    color_maps = {}
    
    selected_genotype = 'Caliciviridae Norovirus GI'
    selected_countries = []
    selected_orf_type = "ORF1_sub"
    selected_orf_subtypes = []
    date_filter = []
    
    def __init__(self, df, orf_type="ORF1_sub"):
        self.df = df
        self.selected_orf_type = orf_type
        
        # This works only for one genotype at once
        #self.df['Genus-Genogroup'] = self.df[self.df['Genus-Genogroup'].cat.remove_unused_categories()]
        genotype_categories = self.df['Genus-Genogroup'].cat.categories
        for genotype in list(genotype_categories):
            palette_type = Category20b if genotype == 'Caliciviridae Norovirus GI' else Category20c
            #genotype_codes = [genotype_categories.get_loc(c) for c in self.selected_genotype]
            #tdf = self.df[self.df['Genus-Genogroup'].cat.codes.apply(lambda x: True if x in genotype_codes else False)]
            tdf = self.df[self.df['Genus-Genogroup'].cat.codes == genotype_categories.get_loc(genotype)]
            if tdf.shape[0]>0:
                self.color_maps[genotype_categories.get_loc(genotype)] = create_palette(tdf, orf_type , palette_type)     

    
    def filter_df(self):
        """
         The input DataFrame is plotted for 
         :param genotype: selects one of the two genotypes
         :param countries: countries
         :param orf_type: filter for ORF1, ORF2 col
         :param orf_subtype: filter rows for orf_type
         :param date_filter: start_date and end_date
        """
        tdf = self.df.copy()

        # Filter for Genotype
        tdf = tdf[tdf['Genus-Genogroup']==self.selected_genotype]
        
        # Filter date    
        if self.date_filter:
            #tdf = tdf[tdf['Sample Date'].between(self.date_filter[0], self.date_filter[1])]  
            tdf = tdf[ (tdf['Sample Date'] >= self.date_filter[0]) & (tdf['Sample Date'] <= self.date_filter[1])]  

        # ORF subtype
        orf_type_categories = tdf[self.selected_orf_type].cat.categories
        if self.selected_orf_subtypes != []:
            orf_type_codes = [orf_type_categories.get_loc(c) for c in self.selected_orf_subtypes]
            tdf = tdf[tdf[self.selected_orf_type].cat.codes.apply(lambda x: True if x in orf_type_codes else False)]

        tdf[self.selected_orf_type] = tdf[self.selected_orf_type].cat.remove_unused_categories()
        #orf_subtypes = list(tdf[self.segenotype_choice_cpie=1lected_orf_type].cat.categories)

        
        # Filter for countries
        # validate filter
        country_categories = tdf['Country'].cat.categories
        if self.selected_countries != []:
            country_codes = [country_categories.get_loc(c) for c in self.selected_countries]
            tdf = tdf[tdf['Country'].cat.codes.apply(lambda x: True if x in country_codes else False)]
        
        tdf['Country'] = tdf['Country'].cat.remove_unused_categories()
        self.selected_countries = list(tdf['Country'].cat.categories)

        self.filtered_df = tdf #[['Sample Date', 'ORF1', 'ORF2']]
        

In [3]:
class orf_subtype_bar_chart(chart):
    """
    This plot shows a bar chart for ORF1 ...
    """
    def create_plot(self):
        """
        Creates a bar chart for 
        :param genotype: one of the norovirus type
        :param orf_col: ORF1_sub or ORF2_sub
        """

        c_tdf = self.filtered_df[['Genus-Genogroup','Sample Date', 'ORF1', self.selected_orf_type]]
        
        #c_tdf = c_tdf.groupby(['Genus-Genogroup', self.selected_orf_type]).count().dropna().astype('int') #
        c_tdf = c_tdf.groupby(['Genus-Genogroup', self.selected_orf_type]).count().dropna().astype('int').reset_index(0) #
        
        # create DataSource
        source = ColumnDataSource(c_tdf)
        orf_subtypes = source.data[self.selected_orf_type].tolist()

        # create figure, with hover tool
        p = figure(x_range=orf_subtypes, tools='hover', tooltips="Count: @ORF1; @%s"%(self.selected_orf_type), title = self.selected_genotype)
        
        # create bars
        cm = self.color_maps[self.df['Genus-Genogroup'].cat.categories.get_loc(self.selected_genotype)]
        p.vbar(x=self.selected_orf_type, top='ORF1', source=source, width=0.70, color=cm)

        #     p.axis.axis_label=None;
        #     p.axis.visible=False;
        #     p.grid.grid_line_color = None

        return Column(p)

In [4]:
class factor_bar_chart(chart):
    """
    This plot shows a factored bar chart for each genus type ...
    """
    def create_plot(self):
        """
        Creates a bar chart for 

        """
        # Get data for that country
        c_tdf = self.filtered_df[['Genus-Genogroup','Sample Date', 'ORF1', self.selected_orf_type]] #.reset_index(1)
        c_tdf.loc[:, 'Sample Date'] = c_tdf['Sample Date'].apply(lambda x: "'%s"%dt.strftime(x, format("%Y"))[2:])
        c_tdf = c_tdf.groupby(['Sample Date', self.selected_orf_type]).count() #
        
        c_tdf = c_tdf.reset_index(level=0).reset_index(level=0)
        
        # create DataSource
        source = ColumnDataSource(c_tdf)
        orf_subtypes = source.data[self.selected_orf_type].tolist()
        
        factors = [(r[self.selected_orf_type], r['Sample Date']) for i, r in c_tdf[['Sample Date', self.selected_orf_type]].iterrows() ]
        source.add(list(factors), 'factors')
        
        # create figure, with hover tool
        p = figure(x_range=FactorRange(*factors),
           #plot_height=250,
           plot_width=1200,
           tools="hover",
           tooltips=[("Count:"," @ORF1"), ("Year: "," @{Sample Date}")],
          title = self.selected_genotype)
        
        # create bars
        cm = self.color_maps[self.df['Genus-Genogroup'].cat.categories.get_loc(self.selected_genotype)]
        p.vbar(x='factors', top='ORF1', width=0.9, alpha=0.5, source=source, color=cm)
        p.xaxis.axis_label="Types"
        p.yaxis.axis_label="Nr of outbreaks"

        return Column(p)
    
class factor_bar_rev_chart(chart):
    
    def create_plot(self):
        """
        Creates a bar chart for 

        """
        # Get data for that country
        c_tdf = self.filtered_df[['Genus-Genogroup', 'ORF1', 'Sample Date', self.selected_orf_type]] #.reset_index(1)
        c_tdf.loc[:, 'Sample Date'] = c_tdf['Sample Date'].apply(lambda x: "'%s"%dt.strftime(x, format("%Y"))[2:])
        c_tdf = c_tdf.groupby([self.selected_orf_type, 'Sample Date']).count() #
        
        c_tdf = c_tdf.reset_index(level=0).reset_index(level=0)
        
        # create DataSource
        source = ColumnDataSource(c_tdf)
        orf_subtypes = source.data[self.selected_orf_type].tolist()
        
        factors = [(r['Sample Date'], r[self.selected_orf_type]) for i, r in c_tdf[['Sample Date', self.selected_orf_type]].iterrows() ]
        source.add(list(factors), 'factors')
        
        # create figure, with hover tool
        p = figure(x_range=FactorRange(*factors),
           #plot_height=250,
           plot_width=1200,
           tools="hover",
           tooltips=[("Count:"," @ORF1"), ("Year: "," @{Sample Date}")],
          title = self.selected_genotype)
        
        # create bars
        cm = self.color_maps[self.df['Genus-Genogroup'].cat.categories.get_loc(self.selected_genotype)]
        p.vbar(x='factors', top='ORF1', width=0.9, alpha=0.5, source=source, color=cm)
        p.xaxis.axis_label="Types"
        p.yaxis.axis_label="Nr of outbreaks"

        return Column(p)

In [5]:
class country_pie(chart):
    """
    This plot shows a pie chart for each country ...
    """
    
    CIRCLE_RADIUS = 0.48
     
    def create_bar(self):    
        num_cols = 3
        lay = Column()
        for ic, c in enumerate(self.selected_countries):
            if ic%num_cols==0:
                lay.children.append(Row())
            p = self.create_orf1_country_pie(country = c)
            lay.children[-1].children.append(p)

        return Column(Row(lay))
    
    def create_orf1_country_pie(self, country):
        """
        Creates a pie chart for 
        :param country: one selected country
        :param color_map: 
        """
        # Get data for that country
        #print(self.filtered_df[['ORF1','ORF1_sub']].head())
        
        c_tdf = self.filtered_df.loc[country]
        
        # add angles for the slices
        c_tdf['angle'] = c_tdf['ORF1']/c_tdf['ORF1'].sum() * 2*np.pi

        # add labels for the slices
        c_tdf['wedge_label_ORF1'] = c_tdf['ORF1'].astype('str').str.pad(18, side = "left")

        # create DataSource
        source = ColumnDataSource(c_tdf)

        # create figure, with hover tool
        p = figure( plot_height=270, plot_width=300, title="%s"%country, toolbar_location=None,
                   tools="hover", tooltips="@ORF1_sub: @ORF1", x_range=(-0.5, 1.0))

        # create slices
        cm = self.color_maps[self.df['Genus-Genogroup'].cat.categories.get_loc(self.selected_genotype)]
        p.wedge(x=0, y=0, radius=self.CIRCLE_RADIUS,
                start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
                line_color="white", fill_color=cm, source=source)

        # create slice labels
        labels = LabelSet(x=0, y=0, text='wedge_label_ORF1',
            angle=cumsum('angle', include_zero=True), source=source,
                          #background_fill_color='green', background_fill_alpha=0.2,
                          render_mode='canvas')
        p.add_layout(labels)

        # create sum label
        label = Label(x=self.CIRCLE_RADIUS, y=self.CIRCLE_RADIUS, x_offset=0, text="Sum: %d"%c_tdf['ORF1'].sum())
        p.add_layout(label) 

        p.axis.axis_label=None;
        p.axis.visible=False;
        p.grid.grid_line_color = None

        return p

    def create_orf1_country_all_pie(self):
        total_df = self.filtered_df.reset_index(level=1).reset_index(level=0).groupby('ORF1_sub').count()
        total_df['angle'] = total_df['ORF1']/total_df['ORF1'].sum() * 2*np.pi
        source = ColumnDataSource(total_df)
        total = figure( plot_width=260, title="All Countries", toolbar_location=None,
               tools="hover", tooltips="@ORF1_sub: @ORF1", x_range=(-0.5, 1.0))
        
        cm = self.color_maps[self.df['Genus-Genogroup'].cat.categories.get_loc(self.selected_genotype)]
        total.wedge(x=0, y=0, radius=self.CIRCLE_RADIUS,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", fill_color=cm, legend_field='ORF1_sub', source=source)

        total.axis.axis_label=None;
        total.axis.visible=False;
        total.grid.grid_line_color = None

        return total

    
    def create_plot(self):
        self.filtered_df = self.filtered_df.groupby(['Country', self.selected_orf_type]).count().dropna().astype('int')
        self.filtered_df = self.filtered_df[['ORF1']]
        lay = self.create_bar()
        total = self.create_orf1_country_all_pie()

        min_date = self.df['Sample Date'].min()
        max_date = self.df['Sample Date'].max()

        return Column(Row(lay, total))

In [6]:
class outbreak_setting_pie_chart(chart):
    """
    This plot shows a pie chart for each genus sorted by the environment in which they appeared ...
    """
    
    CIRCLE_RADIUS = 0.48
    
    def create_plot(self):
        """
        Creates a bar chart for 
        :param genotype: one of the norovirus type
        :param orf_col: ORF1_sub or ORF2_sub
        """
        # Get data for that country        
        c_tdf = self.filtered_df.groupby(['Setting of the outbreak','Genus-Genogroup']).count().reset_index(0)

        c_tdf = c_tdf.loc[self.selected_genotype].copy()
        c_tdf['Setting of the outbreak'] = c_tdf['Setting of the outbreak'].astype('category')
        
        color_map = create_palette(c_tdf, 'Setting of the outbreak' , Category20b)
        c_tdf['angle'] = c_tdf['ORF1']/c_tdf['ORF1'].sum() * 2*np.pi

        # create DataSource
        source = ColumnDataSource(c_tdf)
        orf_subtypes = source.data[self.selected_orf_type].tolist()

        # create figure, with hover tool
        #p = figure(x_range=orf_subtypes, tools='hover', tooltips="Count: @User; @%s"%(self.selected_orf_type), title = self.selected_genotype)
        p = figure(plot_height=520, plot_width=620, title="Setting for the outbreak for all countries and all types", toolbar_location=None,
           tools="hover", tooltips="@{Setting of the outbreak}: @ORF1", x_range=(-0.5, 1.0))
        
        # wedge plot
        p.wedge(x=0, y=1, radius=self.CIRCLE_RADIUS, start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", legend_field='Setting of the outbreak', fill_color=color_map, source=source)
   
        legend = Legend( location=(0, -60))
        p.add_layout(legend, 'below')

        p.axis.axis_label=None; p.axis.visible=False; p.grid.grid_line_color = None
        #     p.axis.axis_label=None;
        #     p.axis.visible=False;
        #     p.grid.grid_line_color = None

        return Column(p)


In [7]:
# output_notebook()

# df = pd.read_feather('noronet_all_clean.fr')
# df = df[df['ORF1_sub']!='Unknown']
# BC = factor_bar_chart(df)

# #BC = orf_subtype_bar_chart(df)
# #BC = country_pie(df)
# #BC = outbreak_setting_pie_chart(df)
# BC.selected_genotype =  'Caliciviridae Norovirus GI'

#     # ORF type radio button
# BC.selected_orf_type = 'ORF1_sub'

#     # ORF subtype checkbox
# orf_subtypes = []

# BC.filter_df()#groupby=['Genus-Genogroup'])
# pie_plot = BC.create_plot()
# show(pie_plot)

In [8]:
# D = Dash_bar('noronet_clean.fr', "categories.json")

# BC = factor_bar_chart(D.df)
# #BC = orf_subtype_bar_chart(D.df)
# BC.selected_genotype =  'Caliciviridae Norovirus GI'

#     # ORF type radio button
# BC.selected_orf_type = 'ORF1_sub'

#     # ORF subtype checkbox
# orf_subtypes = []

# BC.filter_df()#groupby=['Genus-Genogroup'])
# pie_plot = BC.create_genus_bar()


# show(pie_plot)

In [9]:
# D = Dash_bar('noronet_clean.fr', "categories.json")

# #BC = factor_bar_chart(D.df)
# #BC = orf_subtype_bar_chart(D.df)
# #BC = country_pie(D.df)
# BC = outbreak_setting_pie_chart(D.df)

# BC.selected_genotype =  'Caliciviridae Norovirus GI'

#     # ORF type radio button
# BC.selected_orf_type = 'ORF1_sub'

#     # ORF subtype checkbox
# orf_subtypes = []
# BC.filter_df()#groupby=['Genus-Genogroup'])
# pie_plot = BC.create_plot()


# show(pie_plot)


### Bar plot years stacked

### Bar plot with FactorRange

## Catgorical Scatterplots

So far we have seen Categorical data used together with various bar glyphs. But Bokeh can use categorical coordinates for most any glyphs. Let's create a scatter plot with categorical coordinates on one axis. The `commits` data set simply has a series datetimes of GitHub commit. Additional columns to express the day and hour of day for each commit have already been added.