# Pie Chart 

This chart automates the process of making a pie chart in bokeh using the ESDB format as an input data format.  Original code comes from https://bokeh.pydata.org/en/latest/docs/gallery/pie_chart.html


In [1]:
import pandas as pd

  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
import pandas as pd
import matplotlib
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models import ColumnDataSource, Div, HoverTool, LabelSet
from bokeh.models.glyphs import Ellipse

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)


## Generate practice dataset 

In [3]:


text = ['Health', 'Education', 'Gender', 'Macro', 'Finance', 'PSD', 'Conflict', 'Taxation', 'Democracy']
# example factor:
f1 = np.array([100, 140, 350, 400, 203, 504, 605, 304, 405])
f2 = np.array([60, 530, 540, 330, 200, 406, 709, 403, 509])
f3 = np.array([700, 400, 400, 600 , 320, 670, 302, 609, 304])


# data frame 
df = pd.DataFrame({'text': text, 'f1': f1, 'f2': f2, 'f3': f3})

df.head()

Unnamed: 0,text,f1,f2,f3
0,Health,100,60,700
1,Education,140,530,400
2,Gender,350,540,400
3,Macro,400,330,600
4,Finance,203,200,320


In [4]:
def long(df, cat_var, value_vars):
    ''' make sure the dataset is in ESDB long format.'''
    # reorder the dataframe is necessary for the rename code to work properly 
    # the wide to long function needs a common begining to each column to be shifted
    
    
    df = df[[cat_var] + value_vars]
    
    # rename columns 
    df.columns = [cat_var] + ['f'+str(f) for f in range(0, len(df.columns[1:]))]
    
    # move from wide to long format
    l = pd.wide_to_long(df, stubnames='f', i=[cat_var], j='var').reset_index()

    
    
    return l

df_plot = long(df, 'text', ['f1', 'f2', 'f3'])
df_plot.head()

Unnamed: 0,text,var,f
0,Health,0,100
1,Education,0,140
2,Gender,0,350
3,Macro,0,400
4,Finance,0,203


In [5]:
df_plot.columns = ['series_name', 'country_id', 'value_start']
df_plot['year'] = '2017'

In [6]:
df_plot.head()

Unnamed: 0,series_name,country_id,value_start,year
0,Health,0,100,2017
1,Education,0,140,2017
2,Gender,0,350,2017
3,Macro,0,400,2017
4,Finance,0,203,2017


## Generate Bokeh Visual- Pie Chart 

In [75]:
## add new imports 
from collections import Counter
from math import pi
from bokeh.transform import cumsum



def gen_counter(data, key_var, value_var, agg_option='sum'): 
    '''Define function to generate dictionary for Counter in the pie chart. 
    the function takes a data source with two columns - the column which will 
    be the keys of the Counter dictionary and the value_var which will be the 
    values as standalone floats. 
    
    *** choose the aggregation option if the key_var is not unique: options are to 
    sum the values or average the values.'''
    
    # generate the counts dataset using the groupby() function and .sum() if 'sum'
    if agg_option =='sum':
        counts = data.groupby(key_var).sum().reset_index()
    elif agg_option =='mean':
        counts = data.groupby(key_var).mean().reset_index()
        counts[value_var] = np.round(counts[value_var], 1)
    else: 
        print('Invalid agg_options value.')
    
    
    # generate numpy array of values for keys and values variables 
    keys = counts[key_var].values
    values = counts[value_var].values
    
    # use dict comprehension to generate counter dict 
    counter_dict = {keys[i]: values[i] for i in range(0,len(keys))}
    
    
    return counter_dict


def gen_pie(data, cat_sel = [], obs_sel = [],  category_var_name='series_name', 
            obs_var_name='country_id', value_var_name='value_start', 
           agg_option='sum', title_text='Pie Chart'): 
    '''Generate a pie chart given a data selection, cat_sel refers to the categories 
    which will be divided within the pie chart (the colors of the pie), value_var_name
    is the column which will determine the proportion of the pie. obs_sel will determine
    the country or region observations selected (these will be averaged or summed 
    dependending of the agg_option selection). The plot returns a pie chart.'''
    
    ##################################
    ##### generate the underlying data
    ##################################
    
    # select the categories and observations of interest
    data = data[data[category_var_name].isin(cat_sel) &
               data[obs_var_name].isin(obs_sel)]
    
    # generate the counter dictionary
    counter_dict = gen_counter(data, category_var_name, value_var_name, agg_option)
    
    # use the counter function to generate x for the plot
    x = Counter(counter_dict)
     
    # place 'x' into a dataframe called data (replace the old data for efficiency)
    data = pd.DataFrame.from_dict(dict(x), orient='index').reset_index().rename(index=str, columns={0:'value', 'index':'category'})
    
    # generate the angle of each slice
    data['angle'] = data['value']/sum(x.values()) * 2*pi
    
    # add colors based on USAID color scheme         
    palette = {'USAID Blue': '#002F6C', 'USAID Red': '#BA0C2F', 'Rich Black': '#212721', 'Medium Blue': '#0067B9',
        'Light Blue': '#A7C6ED', 'Dark Red': '#651D32', 'Dark Gray': '#6C6463', 'Medium Gray': '#8C8985', 'Light Gray': '#CFCDC9'}
    # select colors based on the length of 
    #### decide whether this should fail or not if there are missing values in the cat_sel 
    #### this will fail in its curren state (address later...)
    data['color'] = [palette[i] for i in list(palette.keys())[0:len(category_selection)]]
    
       
    ####################################
    ###### Generate the plot 
    ####################################
    
    # generate figure with simple tooltip (need to add style later)
    p = figure(plot_height=350, title=title_text, toolbar_location=None,
               tools="hover", tooltips="@category: @value{0.0}")
    
    # generate the wedges for the pie chart. 
    p.wedge(x=0, y=1, radius=0.4,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", line_width=3,
            fill_color='color', fill_alpha=0.9,legend='category', source=data)
    
    # basic formatting of the chart. 
    p.axis.axis_label=None
    p.axis.visible=False
    p.grid.grid_line_color = None

    
    return p


In [29]:
category_selection = ['Conflict', 'Democracy', 'Education', 'Finance', 'Gender',
       'Health', 'Macro', 'PSD', 'Taxation']

country_selection = [0,1,2]

In [77]:
plot = gen_pie(data=df_plot, cat_sel=category_selection, obs_sel=country_selection, 
        agg_option='sum', title_text = 'Type of Funding: Countries 1,2,3')

show(plot)