Daniel Rocha Ruiz, MSc in Data Science and Business Analytics

# Summary

## Intro

In this tutorial ("TDS - Choropleth map") we will analyse the share of Adults who are obese between the years 1975 and 2016.

## Sources
- Tutorial:
    - https://towardsdatascience.com/a-complete-guide-to-an-interactive-geographical-map-using-python-f4c5197e23e0
- Geoshapes:
    - https://www.naturalearthdata.com/downloads/110m-cultural-vectors/
- Obesity data:
    - https://ourworldindata.org/obesity


# Set-up

## Loading packages
The package geopandas can be a bit tricky to install and require Microsoft C++ Build tools to be installed in your machine.
- https://visualstudio.microsoft.com/visual-cpp-build-tools/

In [1]:
# genreal
import json
import pandas as pd

# geospatial
import geopandas as gpd

# simple map
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

# + interactive map
from bokeh.io import curdoc, output_notebook
from bokeh.models import Slider, HoverTool
from bokeh.layouts import widgetbox, row, column

ModuleNotFoundError: No module named 'geopandas'

## Loading geo shapes

In [None]:
# ------------------------------------------------
# load shapes data
gdf = gpd.read_file('data/ne_110m_admin_0_countries.shp')[['ADMIN', 'ADM0_A3', 'geometry']]
gdf.columns = ['country', 'country_code', 'geometry']

# drop Antarctica (=159)
#print(gdf[gdf['country'] == 'Antarctica'])
gdf = gdf.drop(gdf.index[159])

# print columns
print(gdf.columns)

# ------------------------------------------------
# load obesity data

df = pd.read_csv('data/share-of-adults-defined-as-obese.csv',
                 names = ['entity', 'code', 'year', 'per_cent_obesity'],
                 skiprows = 1)

# identifying missing variables
print(df.info())
print(df[df['code'].isnull()])

# Map representation
## Simple (no data = blank)

In [None]:
# get data for one year
df_2016 = df[df['year'] == 2016]

# merge
merged = gdf.merge(df_2016,
                   left_on = 'country_code',
                   right_on = 'code',
                   how = 'left')

# convert data to json
merged.per_cent_obesity.fillna('No data', inplace = True)
merged_json = json.loads(merged.to_json())
json_data = json.dumps(merged_json)
geosource = GeoJSONDataSource(geojson = json_data)

# define a sequential multi-hue color palette
palette = brewer['YlGnBu'][8]
# reverse color order: blue = highest = most obese
palette = palette[::-1]

# map numbers to color linearly; no Data -> 'grey'
color_mapper = LinearColorMapper(palette = palette,
                                 low = 0,
                                 high = 40,
                                 nan_color = '#d9d9d9')

# define custom tick labels for color bar
tick_labels = {'0': '0%',
               '5': '5%',
               '10':'10%',
               '15':'15%',
               '20':'20%',
               '25':'25%',
               '30':'30%',
               '35':'35%',
               '40': '>40%'}

# create color bar
color_bar = ColorBar(color_mapper = color_mapper,
                     label_standoff = 8,
                     width = 500,
                     height = 20,
                     border_line_color = None,
                     location = (0,0),
                     orientation = 'horizontal',
                     major_label_overrides = tick_labels)

# create figure object
p = figure(title = 'Share of adults who are obese, 2016',
           plot_height = 600 ,
           plot_width = 950,
           toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# add patch renderer
p.patches('xs',
          'ys',
          source = geosource,
          fill_color = {'field' :'per_cent_obesity', 'transform' : color_mapper},
          line_color = 'black',
          line_width = 0.25,
          fill_alpha = 1)
p.add_layout(color_bar, 'below')

# display figure inline in Jupyter Notebook
output_notebook()

# show
show(p)

## Map with interactivity
- The interactivity will not work on the Jupyter Notebook!
- Go to anaconda prompt, and type:
- (1) cd [myfolder]
- (2) bokeh serve --show [filename].ipynb

In [None]:
# add interactivity
def json_data(df,selectedYear):
    yr = selectedYear
    df_yr = df[df['year'] == yr]
    merged = gdf.merge(df_yr, left_on = 'country_code', right_on = 'code', how = 'left')
    merged.per_cent_obesity.fillna('No data', inplace = True)
    merged_json = json.loads(merged.to_json())
    json_data = json.dumps(merged_json)
    return json_data

# get data
geosource = GeoJSONDataSource(geojson = json_data(df,2016))

# define a sequential multi-hue color palette
palette = brewer['YlGnBu'][8]
# reverse color order: blue = highest = most obese
palette = palette[::-1]

# map numbers to color linearly
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 40, nan_color = '#d9d9d9')

# define custom tick labels for color bar
tick_labels = {'0': '0%',
               '5': '5%',
               '10':'10%',
               '15':'15%',
               '20':'20%',
               '25':'25%',
               '30':'30%',
               '35':'35%',
               '40': '>40%'}
hover = HoverTool(tooltips = [ ('Country/region','@country'),('% obesity', '@per_cent_obesity')])

# create color bar
color_bar = ColorBar(color_mapper=color_mapper,
                     label_standoff=8,
                     width = 500,
                     height = 20,
                     border_line_color=None,
                     location = (0,0),
                     orientation = 'horizontal',
                     major_label_overrides = tick_labels)

# create figure object
p = figure(title = 'Share of adults who are obese, 2016',
           plot_height = 600,
           plot_width = 950,
           toolbar_location = None,
           tools = [hover])
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# add patch renderer
p.patches('xs',
          'ys',
          source = geosource,
          fill_color = {'field' :'per_cent_obesity',
                        'transform' : color_mapper},
          line_color = 'black',
          line_width = 0.25,
          fill_alpha = 1)

#Specify layout
p.add_layout(color_bar, 'below')

# Define the callback function:

def update_plot(attr, old, new):
    # df is an implict argument
    yr = slider.value
    new_data = json_data(df,yr)
    geosource.geojson = new_data
    p.title.text = 'Share of adults who are obese, {}%'.format(yr)
    
# make a slider object
slider = Slider(title = 'Year',
                start = 1975,
                end = 2016,
                step = 1,
                value = 2016)

slider.on_change('value',
                 update_plot)
                 
# make a column layout of widgetbox(slider) and plot
layout = column(p,widgetbox(slider))
curdoc().add_root(layout)
                 
# display figure inline in Jupyter Notebook
output_notebook()

# show
show(layout)