#### Prep Data

In [1]:
# set wd
import os
project_root = '/Users/kgedney/Documents/georgetown/anly503/project/'
os.chdir(project_root)

In [2]:
# prep
import pandas as pd
from pandas.api.types import CategoricalDtype

import bokeh

from bokeh.io import output_notebook
from bokeh.models.callbacks import CustomJS
from bokeh.models import ColumnDataSource, HoverTool, PrintfTickFormatter

from bokeh.layouts import widgetbox, column
from bokeh.models.widgets import RadioButtonGroup, Slider

from bokeh.plotting import figure, output_file, show

output_notebook()

In [3]:
# load data
gdp           = pd.read_csv('data/gdp.csv')
log_gdp       = pd.read_csv('data/log_gdp.csv')
gdp_growth    = pd.read_csv('data/gdp_growth.csv')
df_electric   = pd.read_csv('data/electric.csv')
df_internet   = pd.read_csv('data/internet.csv')
df_cellphones = pd.read_csv('data/cellphones.csv')

In [4]:
# rename gdp growth columns
years = list(range(1990, 2018, 1))
years = [str(year) for year in years]
renamed_years = ['gdp_growth_' + year for year in years]
zipped_dict   = dict(zip(years, renamed_years))
gdp_growth    = gdp_growth.rename(columns=zipped_dict)

In [5]:
# add gdp growth levels in 2017 to each df
# gdp_growth = gdp_growth.rename(columns={'2017': 'gdp_growth_2017'})
renamed_years.append('country')

drop       = ['Eritrea', 'Somalia', 'South Sudan'] # drop countries with missing data
gdp_growth = gdp_growth[~gdp_growth.country.isin(drop)]

df_electric   = pd.merge(df_electric,   gdp_growth[renamed_years], on='country', how='inner')
df_internet   = pd.merge(df_internet,   gdp_growth[renamed_years], on='country', how='inner')

df_cellphones = pd.merge(df_cellphones, gdp_growth[renamed_years], on='country', how='inner')

In [12]:
# replace NaNs for Bokeh (ref: https://stackoverflow.com/questions/38821132/bokeh-valueerror-out-of-range-float-values-are-not-json-compliant)
df_electric = df_electric.fillna('')

In [6]:
# # add panes for the other metrics
# radio_button_group = RadioButtonGroup(
#         labels=["Option 1", "Option 2", "Option 3"], active=0)

In [31]:
# helper function
def safe_cds(x):
    x = ColumnDataSource(x)
    for k in x.data.keys():
        x.data[k] = list(x.data[k])
    return x

def make_dataset(df, year_selected):
    df['x_axis_to_plot'] = df[str(year_selected)] # copy year_selected into special display column
    df['y_axis_to_plot'] = df['gdp_growth_' + str(gdp_year_selected)] 
    return safe_cds(df)

In [51]:
def make_widgets(source):

    #- create the interactive widgets
    year_select = Slider(start=1990, end=2016, value=1990, step=1, title="Choose a Year for Electricity")
    gdp_select  = Slider(start=1990, end=2017, value=1990, step=1, title="Choose a Year for GDP")

    #- define update functions: this is written in python but will be read with Javascript to update plots in html
    #- remember: must pass all vars that are updated as parameters
    def update_year(year_select=year_select, gdp_select=gdp_select, source=source):
        year_selected = year_select.value     # get value of active slider value
        new_data = source.data[year_selected] # get corresponding data from CDS for that year
        source.data['x_axis_to_plot'] = [n for n in new_data] # copy data into special column that will be displayd
        
        gdp_year_selected = gdp_select.value    
        new_gdp_data = source.data['gdp_growth_' + str(gdp_year_selected)]    
        source.data['y_axis_to_plot'] = [n for n in new_gdp_data] 
        
        source.change.emit()  # refresh plot        
    
    #- set callbacks
    year_select.callback = CustomJS.from_py_func(update_year)
    gdp_select.callback = CustomJS.from_py_func(update_year)

    
    return year_select, gdp_select

In [56]:
def make_plot(source):

    # set figure
    p = figure(title='GDP Growth and Electricity Access', 
               plot_width=750, plot_height=500, 
               x_axis_label = 'Electricity Access (% of Pop)', 
               y_axis_label ='GDP Growth',
               toolbar_location="above", 
               tools = "pan, box_zoom, reset")
    
    # format axis
    p.yaxis.formatter = PrintfTickFormatter(format='%0.0f %%')
    p.xaxis.formatter = PrintfTickFormatter(format='%0.0f %%')
    
    # add widgets
    year_select, gdp_select = make_widgets(source)

    # add glyphs
    p.circle(x      = 'x_axis_to_plot', 
             y      = 'y_axis_to_plot',
             source = source, 
             size   = 12, 
             alpha = 0.5)
    
    # add hover
    TOOLTIPS = [
        ('Country', '@country'),
        ('Income Level', '@income_level'),
        ('GDP Growth Rate', '@y_axis_to_plot'),
        ('% Pop with Electricty Access', '@x_axis_to_plot')]
    
    p.add_tools(HoverTool(tooltips=TOOLTIPS, toggleable=False))

    layout = column(p, widgetbox(year_select, gdp_select, width=750))
    
    show(layout)

In [57]:
# run plot
source = make_dataset(df_electric, year_selected=1990)
make_plot(source)

#### Plot 2

In [None]:
# specific to Ethiopia?

# group by income level and plot various characteristics? could do tabs for levels ?
# https://bokeh.pydata.org/en/latest/docs/gallery/bar_pandas_groupby_nested.html

# ref: https://bokeh.pydata.org/en/latest/docs/user_guide/categorical.html

In [93]:
from bokeh.io import show, output_file
from bokeh.plotting import figure
from bokeh.palettes import Spectral5, Spectral4
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap

In [74]:
# load data
df_electric   = pd.read_csv('data/electric.csv')
df_internet   = pd.read_csv('data/internet.csv')
df_cellphones = pd.read_csv('data/cellphones.csv')

In [None]:
# fix Swaziland
df_electric['income_level'][df_electric.country == 'Swaziland'] = 'Lower middle income'

In [81]:
# add category info back to files
cat_type = CategoricalDtype(categories=['High income', 'Upper middle income', 
                                        'Lower middle income', 'Low income'], 
                            ordered=True)
for df in [df_electric, df_internet, df_cellphones]:
    df['income_level'] = df['income_level'].astype(cat_type)

In [199]:
# sort datasets by income_level
df_electric = df_electric.sort_values('income_level')

In [200]:
def make_dataset_2(df, year_selected):
    df['right_to_plot'] = df[str(year_selected)]
    return safe_cds(df)

In [201]:
def make_widgets_2(source):

    #- create the interactive widgets
    year_select = Slider(start=1990, end=2016, value=1990, step=1, title="Choose a Year")

    #- define update function
    def update_year(year_select=year_select, source=source):
        year_selected = year_select.value     
        new_data      = source.data[year_selected] 
        source.data['right_to_plot'] = [n for n in new_data] 
        
        source.change.emit()  # refresh plot        
    
    #- set callbacks
    year_select.callback = CustomJS.from_py_func(update_year)
    
    return year_select

In [202]:
def make_plot_2(source):

    # set figure
    p2 = figure(title   = 'Electricity Access',
                y_range = source.data['country'],
                x_range = (0,100),
                plot_width=750, plot_height=500, 
                x_axis_label = '% of Population with Access to Electricity',
                toolbar_location = "above", 
                tools = "pan, box_zoom, reset"
               )    
    
    # format axis
    p2.xaxis.formatter = PrintfTickFormatter(format='%0.0f %%')
    
    # add widgets
    year_select = make_widgets_2(source)
    
    # set colors
    cmap = factor_cmap('income_level', palette=Spectral4, factors=['High income', 'Upper middle income', 
                                        'Lower middle income', 'Low income'])

    # add glyphs
    p2.hbar(y      = 'country', 
            right  = 'right_to_plot', 
            source = source, 
            height = 1, 
            fill_color = cmap, 
            line_color='white')

    # add hover
    TOOLTIPS = [
        ('Country', '@country'),
        ('Income Level', '@income_level'),
        ('% Pop with Electricty Access', '@right_to_plot')]
    
    p2.add_tools(HoverTool(tooltips=TOOLTIPS, toggleable=False))

    layout = column(widgetbox(year_select, width=750), p2)
    
    show(layout)

In [203]:
# add colors and groups?
# add legend
# add tabs for other indicators

In [204]:
# run plot
source = make_dataset_2(df_electric, year_selected=1990)
make_plot_2(source)