In [1]:
from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid, Circle, HoverTool, BoxSelectTool,\
    SaveTool, CustomJS, DatetimeAxis, LinearAxis, NumeralTickFormatter, FactorRange, CategoricalColorMapper,\
    Legend
from bokeh.models.widgets.tables import (
    DataTable, TableColumn, IntEditor
)
from bokeh.models.widgets import DataTable, TableColumn, StringFormatter,\
        NumberFormatter, StringEditor, IntEditor, NumberEditor, SelectEditor,\
        CheckboxButtonGroup, CheckboxGroup, MultiSelect, RadioButtonGroup,\
        Select, Slider, Panel, Tabs, TextInput, Paragraph, Div, Button, Dropdown,\
        DateRangeSlider

from bokeh.models.annotations import Label, LabelSet


from bokeh.palettes import Magma9, Category20b, Category20c
from bokeh.transform import factor_cmap

from bokeh.models.glyphs import Circle
from bokeh.io import curdoc, output_notebook, show as showio
from bokeh.plotting import show, figure
from bokeh.document import Document
from bokeh.models.layouts import Column, Row
from bokeh.layouts import gridplot
from bokeh.embed import file_html
from bokeh.resources import INLINE
from bokeh.util.browser import view
from bokeh.transform import cumsum

import copy, json
import pandas as pd
import numpy as np
from datetime import date, datetime as dt

In [2]:
#dates = dict_categories['dates']

Global values

In [3]:
CIRCLE_RADIUS = 0.48

### Create color palette globally for GI and GII subtypes

In [16]:
def create_palettes(data_df, col, palette_type):
    uniques = data_df[data_df[col].notna()][col].unique()
    uniques = list(uniques.sort_values())
    color_map = {'field': col,
                      'transform': CategoricalColorMapper(palette=palette_type[len(uniques)],
                                                          factors=uniques)
                     }
    return color_map

### Select data with a slider

In [17]:
# min_date = df[dates[0]].min()
# max_date = df[dates[0]].max()

# date_range_slider = DateRangeSlider(title="Date Range: ", start=min_date, end=max_date, value=(min_date, max_date), step=1)
# show(date_range_slider)

# date_range_slider.value 

### Simple countrywise pie chart for GI

#### Figure 5. GI polymerase genotypes per country (>10 submitted sequences)

In [130]:
#def create_country_pie(df, f_orf_type, filter_rows):
def create_country_pie(df, countries=[], genotype='Caliciviridae Norovirus GI', orf_type="ORF1_sub", orf_subtypes=[], date_filter=[]):
    """
     The input DataFrame is plotted for 
     :param genotype: selects one of the two genotypes
     :param countries: countries
     :param orf_type: filter for ORF1, ORF2 col
     :param orf_subtype: filter rows for orf_type
     :param date_filter: start_date and end_date
    """
    tdf = df.copy()
    
    # Filter for Genotype
    g_cat = tdf['Genus-Genogroup'].cat.categories    
    tdf = tdf[tdf['Genus-Genogroup'].cat.codes==g_cat.get_loc(genotype)]
    
    #Create an absolut color map first for this genotype
    palette_type = Category20b if genotype == 'Caliciviridae Norovirus GI' else Category20c
    color_map = create_palettes(tdf, orf_type , palette_type)
    
    # Filter date    
    if date_filter:
        tdf = tdf[tdf['Sample Date'].between(date_filter[0], date_filter[1])]  
            
    # ORF subtype   
    os_categories = tdf[orf_type].cat.categories
    if orf_subtypes != []:
        os_codes = [os_categories.get_loc(c) for c in orf_subtypes]
        tdf = tdf[tdf[orf_type].cat.codes.apply(lambda x: True if x in os_categories else False)]
        
    tdf[orf_type] = tdf[orf_type].cat.remove_unused_categories()
    os_categories = list(tdf[orf_type].cat.categories)
    
    
    # Filter for countries
    # validate filter
    country_categories = tdf['Country'].cat.categories
    if countries != []:
        country_codes = [country_categories.get_loc(c) for c in countries]
        tdf = tdf[tdf['Country'].cat.codes.apply(lambda x: True if x in country_codes else False)]

    tdf['Country'] = tdf['Country'].cat.remove_unused_categories()
    countries = list(tdf['Country'].cat.categories)
    
    tdf = tdf.groupby(['Country', orf_type]).count().dropna().astype('int') #
    tdf = tdf[['Sample Date', 'ORF1', 'ORF2']]
    
    
    num_cols = 3
    lay = Column()
    for ic, c in enumerate(countries):
        if ic%num_cols==0:
            lay.children.append(Row())
        p = create_orf1_country_pie(tdf, c, color_map)
        lay.children[-1].children.append(p)

    total = create_orf1_country_all_pie(tdf, color_map)

    min_date = df['Sample Date'].min()
    max_date = df['Sample Date'].max()
    #Title = Div(text="""<h2>Data between %s and %s</h2>"""%(min_date.strftime("%d/%m/%Y"), max_date.strftime("%d/%m/%Y")))

    #return Column(Title,Row(lay, total))
    return Column(Row(lay, total))

In [131]:
def create_orf1_country_pie(data_df, country, color_map):
    """
    Creates a pie chart for 
    :param country: one selected country
    :param color_map: 
    """
    # Get data for that country
    c_tdf = data_df.loc[country].copy()

    # add angles for the slices
    c_tdf['angle'] = c_tdf['ORF1']/c_tdf['ORF1'].sum() * 2*np.pi

    # add labels for the slices
    c_tdf['wedge_label_ORF1'] = c_tdf['ORF1'].astype('str').str.pad(18, side = "left")
    
    # create DataSource
    source = ColumnDataSource(c_tdf)
    
    # create figure, with hover tool
    p = figure( plot_height=270, plot_width=300, title="%s"%country, toolbar_location=None,
               tools="hover", tooltips="@ORF1_sub: @ORF1", x_range=(-0.5, 1.0))
    
    # create slices
    p.wedge(x=0, y=0, radius=CIRCLE_RADIUS,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", fill_color=color_map, source=source)
    
    # create slice labels
    labels = LabelSet(x=0, y=0, text='wedge_label_ORF1',
        angle=cumsum('angle', include_zero=True), source=source,
                      #background_fill_color='green', background_fill_alpha=0.2,
                      render_mode='canvas')
    p.add_layout(labels)
    
    # create sum label
    label = Label(x=CIRCLE_RADIUS, y=CIRCLE_RADIUS, x_offset=0, text="Sum: %d"%c_tdf['ORF1'].sum())
    p.add_layout(label) 

    p.axis.axis_label=None;
    p.axis.visible=False;
    p.grid.grid_line_color = None
    
    return p

def create_orf1_country_all_pie(data_df, color_map):
    total_df = data_df.reset_index(level=0).reset_index(level=0).groupby('ORF1_sub').count()
    total_df['angle'] = total_df['ORF1']/total_df['ORF1'].sum() * 2*np.pi
    source = ColumnDataSource(total_df)
    total = figure( plot_width=260, title="All Countries", toolbar_location=None,
           tools="hover", tooltips="@ORF1_sub: @ORF1", x_range=(-0.5, 1.0))
    total.wedge(x=0, y=0, radius=CIRCLE_RADIUS,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color=color_map, legend_field='ORF1_sub', source=source)

    total.axis.axis_label=None;
    total.axis.visible=False;
    total.grid.grid_line_color = None
    
    return total



In [132]:
# df = pd.read_feather('noronet_clean.fr')
# df['Genus-Genogroup'] = df['Genus-Genogroup'].astype('category')
# df['Country'] = df['Country'].astype('category')
# df['ORF1_sub'] = df['ORF1_sub'].astype('category')
# # g_cat = df['Genus-Genogroup'].cat.categories
# # genotype = 'Caliciviridae Norovirus GI'
# # df[df['Genus-Genogroup'].cat.codes==g_cat.get_loc(genotype)]

# rr = create_country_pie(df=df)




['AUSTRIA', 'BELGIUM', 'FINLAND', 'FRANCE', 'GERMANY', 'HUNGARY', 'ITALY', 'NETHERLANDS', 'NEW ZEALAND', 'RUSSIAN FEDERATION', 'SOUTH AFRICA', 'SWEDEN']


In [None]:
# output_notebook()
# # dataframe
# #df = pd.read_json('noronet_clean.json')
# df = pd.read_feather('noronet_clean.fr')

# # categories
# dict_categories = {}

# with open("categories.json", 'r') as f:
#     dict_categories = json.loads(f.read())

# # del unknown rows
# df = df[df['ORF1_sub']!='Unknown']

# ### Split DataFrame to GI and GII

# gi_df = df[df['Genus-Genogroup']=='Caliciviridae Norovirus GI']
# gii_df = df[df['Genus-Genogroup']=='Caliciviridae Norovirus GII']

# cc = ['AUSTRIA', 'BELGIUM']
# lay = create_country_pie(gi_df, [], genotype="gii", orf_type="ORF1_sub")

# #show(create_country_pie(gi_df, g_type="gii"))
# show(lay)

### Hierarchic pie chart
https://stackoverflow.com/questions/33019879/hierarchic-pie-donut-chart-from-pandas-dataframe-using-bokeh-or-matplotlib

<img src="http://i.stack.imgur.com/YKbzU.png" width="300" />

### Figure 8 Outbreak settings for GI, GII.2, GII.4, GII.6 and GII.17.

## How to create easily
* choose an ORF1_sub type

In [None]:
def create_outbreaksetting_pie(data_df):
    #split into GI and GII
#     gi_df = data_df[data_df['Genus-Genogroup']=='Caliciviridae Norovirus GI']
#     gii_df = data_df[data_df['Genus-Genogroup']=='Caliciviridae Norovirus GII']
#     gii_2 = gii_df[gii_df['ORF1_sub']=='2']
#     gii_4 = gii_df[gii_df['ORF1_sub']=='4']
#     gii_6 = gii_df[gii_df['ORF1_sub']=='6']
#     gii_17 = gii_df[gii_df['ORF1_sub']=='17']

    gi = data_df.groupby(['Setting of the outbreak']).count() 
    color_map = create_palettes(df, 'Setting of the outbreak' , Category20b)
    
    gi['angle'] = gi['ORF1']/gi['ORF1'].sum() * 2*np.pi
    source = ColumnDataSource(gi)
    
    # figure for the plot
    p = figure(plot_height=520, plot_width=620, title="Setting for the outbreak for all countries and all types", toolbar_location=None,
           tools="hover", tooltips="@{Setting of the outbreak}: @ORF1", x_range=(-0.5, 1.0))
    
    # wedge plot
    p.wedge(x=0, y=1, radius=CIRCLE_RADIUS,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", legend_field='Setting of the outbreak', fill_color=color_map, source=source)

    #legend = Legend(items=, location=(0, -60))
    legend = Legend( location=(0, -60))
    #legend.click_policy="mute"

    p.add_layout(legend, 'below')

    p.axis.axis_label=None; p.axis.visible=False; p.grid.grid_line_color = None
    p.legend.click_policy="hide"
    return p

In [None]:
#p = create_outbreaksetting_pie(df)
#show(p)

### Multiple charts
* Use Multichoice or multiselect
* generate all

In [None]:
def create_outbreaksetting_g_type_pie(data_df, g_type, g_sub_type, color_map):
    g_df = data_df[(data_df['Genus-Genogroup']==g_type) & (data_df['ORF1_sub']==g_sub_type)]
    g_df = g_df.groupby(['Setting of the outbreak']).count() 
    
    g_df['angle'] = g_df['ORF1']/g_df['ORF1'].sum() * 2*np.pi
    source = ColumnDataSource(g_df)
    p = figure(plot_width=220,plot_height=220, title="{0} {1}".format(g_type, g_sub_type), toolbar_location=None,
           tools="hover", tooltips="@{Setting of the outbreak}: @ORF1", x_range=(-0.5, 1.0))
    p.wedge(x=0, y=0, radius=CIRCLE_RADIUS,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        #line_color="white", legend_field='Setting of the outbreak', fill_color=color_map, source=source)
        line_color="white", fill_color=color_map, source=source)
    
    p.axis.axis_label=None; p.axis.visible=False; p.grid.grid_line_color = None
    return p

In [None]:
def create_outbreaksetting_g_types_pie(data_df, choices):
    num_cols = 4
    
    color_map = create_palettes(data_df, 'Setting of the outbreak' , Category20b)
    lay = Column()
    for ic, c in enumerate(choices):
        if ic%num_cols==0:
            lay.children.append(Row())
            
        
        p = create_outbreaksetting_g_type_pie(data_df, c[0], c[1], color_map)
        #p.legend.visible = False
        #if ic == len(choices)-1:
            #p.legend.visible = True
            #p.plot_width = p.plot_width+200
            #p.aspect_ratio=2
        lay.children[-1].children.append(p)

    #plast = lay.children[-1].children[-1]
    #l = Legend(location=(0,-60))
    #plast.add_layout(l, 'below')
    return Row(lay)

# chosen = [('Caliciviridae Norovirus GI','P3'), ('Caliciviridae Norovirus GII','P4'), ('Caliciviridae Norovirus GI','P3'), ('Caliciviridae Norovirus GII','P17')]
# ll = create_outbreaksetting_g_types_pie(df, chosen)
# show(ll)