In [1]:
# if you don't have these libraries installed uncomment them and install them 
# pip install bokeh
# pip install geopandas

#It would be bad form not to cite sources: https://towardsdatascience.com/a-complete-guide-to-an-interactive-geographical-map-using-python-f4c5197e23e0

In [2]:
# Import the necessary packages
import pandas as pd
import numpy as np
import re 
import geopandas as gpd
import pandas as pd
import json

### Utilities

In [3]:
def clean_text(string):
    regex = '[^A-Za-z0-9]+'
    return re.sub(regex, '', string)

def json_data(selectedYear):
    yr = selectedYear
    df_yr = df[df['year'] == yr]
    merged = gdf.merge(df_yr, left_on = 'country_code', right_on =     'code', how = 'left')
    merged.fillna('No data', inplace = True)
    merged_json = json.loads(merged.to_json())
    json_data = json.dumps(merged_json)
    return json_data

def update_plot(attr, old, new):
    yr = slider.value
    new_data = json_data(yr)
    geosource.geojson = new_data
    p.title.text = 'Share of adults who are obese, %d' %yr

### Notes

To render a world map, you need a file with world coordinates. Natural Earth is a public domain map dataset that provides geospatial data at various resolutions. It provides map data at 1:10m. 1:50m, and 1:110m scales. 

What even is raster data?
- Rasters are digital aerial photographs imagery from satellites, digital pictures, or even scanned maps. 
    - In its simplest form, it consists of a matrix of cells (or pixels) organized into rows and columns where each cell contains a value representing information such as temperature.
- Detailed description [here](https://desktop.arcgis.com/en/arcmap/10.3/manage-data/raster-and-images/what-is-raster-data.htm)

### Geopandas 
Geopandas can read almost any vector-based spatial data format. The function ```read_file``` returns a GeoDataframe object.

In [4]:
# The shapefile format is a geospatial vector data format for geographic information system (GIS)

shape_file = 'map_data/110m_cultural/ne_110m_admin_0_countries.shp'

# Read the shapefile using Geopandas, accessing fields is similar to accessing them in pandas
gdf = gpd.read_file(shape_file)[['ADMIN','ADM0_A3','geometry']]


#Rename columns.
gdf.columns = ['country', 'country_code', 'geometry']
gdf.head()

Unnamed: 0,country,country_code,geometry
0,Fiji,FJI,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,United Republic of Tanzania,TZA,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,Western Sahara,SAH,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,Canada,CAN,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,United States of America,USA,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."


In [5]:
# We can drop the row for 'Antarctica' as it uneccassarily occupies a large space in our map and is not required 
# in our current analysis

print(gdf[gdf['country'] == 'Antarctica'])

#Drop row corresponding to 'Antarctica'
gdf = gdf.drop(gdf.index[159])

        country country_code  \
159  Antarctica          ATA   

                                              geometry  
159  MULTIPOLYGON (((-48.66062 -78.04702, -48.15140...  


In [6]:
datafile = 'map_data/share-of-adults-defined-as-obese.csv'
df = pd.read_csv(datafile, names = ['entity', 'code', 'year', 'per_cent_obesity'], skiprows = 1)
df.head()

Unnamed: 0,entity,code,year,per_cent_obesity
0,Afghanistan,AFG,1975,0.5
1,Afghanistan,AFG,1976,0.5
2,Afghanistan,AFG,1977,0.6
3,Afghanistan,AFG,1978,0.6
4,Afghanistan,AFG,1979,0.6


### Let's Take a Look at the Data

In [7]:
df.info()
df[df['code'].isnull()]['entity'].unique()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8316 entries, 0 to 8315
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   entity            8316 non-null   object 
 1   code              7980 non-null   object 
 2   year              8316 non-null   int64  
 3   per_cent_obesity  8316 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 260.0+ KB


array(['Africa', 'Americas', 'Eastern Mediterranean', 'Europe', 'Global',
       'South-East Asia', 'Sudan (former)', 'Western Pacific'],
      dtype=object)

This an example of when Domain Knowledge would come into practice: Investigating the pandas dataframe shows missing values for Sudan. Our data spans the period from 1975–2016; while Sudan split into two countries in July 2011. This resulted in a change of 3-letter ISO code for the newly split country. To keep it simple, let us ignore the missing data for Sudan.

### Static Map for 2016

In [8]:
#Filter data for year 2016. # In this case we drop nulls. Though it's not always good to get rid of missing data rows
df_2016 = df[df['year'] == 2016].dropna()
df_2016.head()

Unnamed: 0,entity,code,year,per_cent_obesity
41,Afghanistan,AFG,2016,5.5
125,Albania,ALB,2016,21.7
167,Algeria,DZA,2016,27.4
251,Andorra,AND,2016,25.6
293,Angola,AGO,2016,8.2


### Let's Merge the DataFiles

In [9]:
#Merge dataframes gdf and df_2016.
merged = gdf.merge(df_2016, left_on = 'country_code', right_on = 'code')
merged.head()

Unnamed: 0,country,country_code,geometry,entity,code,year,per_cent_obesity
0,Fiji,FJI,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000...",Fiji,FJI,2016,30.2
1,United Republic of Tanzania,TZA,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...",Tanzania,TZA,2016,8.4
2,Canada,CAN,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742...",Canada,CAN,2016,29.4
3,United States of America,USA,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",United States,USA,2016,36.2
4,Kazakhstan,KAZ,"POLYGON ((87.35997 49.21498, 86.59878 48.54918...",Kazakhstan,KAZ,2016,21.0


This merged file can be rendered using geopandas but we want interactivity, so we will use the Bokeh Library. Bokeh uses the GeoJSON format. This format represents geographical features with JSON, and describes points, lines, and polygons (these are called patches in Bokeh). What we can do is convert the table above in the GeoJSON format.

In [10]:
# convert data to json
merged_json = json.loads(merged.to_json())

#Convert to String like object.
json_data = json.dumps(merged_json)

Let's start with a definition.

**Choropleth map:** A choropleth map is a type of thematic map in which a set of pre-defined areas is colored or patterned in proportion to a statistical variable that represents an aggregate summary of a geographic characteristic within each area, such as population density or per capita income. 

## Using Bokeh

In [11]:
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

In [12]:
#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)

In [13]:
#Define a sequential multi-hue color palette.
#This a set of hexadecimals that correspond to different colors
palette = brewer['YlGnBu'][8]
#Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]

In [14]:
#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 40)

In [15]:
#Define custom tick labels for color bar.
tick_labels = {'0': '0%', '5': '5%', '10':'10%', '15':'15%', 
               '20':'20%', '25':'25%', '30':'30%','35':'35%', '40': '>40%'}

In [16]:
#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

In [17]:
#Create figure object.
p = figure(title = 'Share of adults who are obese, 2016', plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

In [18]:
#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'per_cent_obesity', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)
#Specify figure layout.
p.add_layout(color_bar, 'below')
#Display figure inline in Jupyter Notebook.
output_notebook()
#Display figure.
show(p)

This is super cool. We can see that ther are missing pieces in the map. Sudan is missing, and some other as well. The reason that they have not been drawn is because they are missing the json we created earlier with the merging of the two datasets. 

In [19]:
# instead of doing an inner merge which is the default merge we do a left merge 
# which get's all the keys from the left dataframe which contains all the countries
# This means we will have null values
merged = gdf.merge(df_2016, left_on = 'country_code', right_on = 'code',how='left')
merged.isnull().any()

country             False
country_code        False
geometry            False
entity               True
code                 True
year                 True
per_cent_obesity     True
dtype: bool

In [20]:
merged_json = json.loads(merged.to_json())

#Convert to String like object.
json_data = json.dumps(merged_json)
geosource = GeoJSONDataSource(geojson = json_data)

#Create figure object.
p = figure(title = 'Share of adults who are obese, 2016', plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'per_cent_obesity', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

#Specify figure layout.
p.add_layout(color_bar, 'below')

#Display figure inline in Jupyter Notebook.
output_notebook()

#Display figure.
show(p)

So now we have the previously missing countries on our map, but there is still a problem. The newly added countries are color coded. A left merge results in addition of NaN values in the merged dataframe for corresponding missing values in the right dataframe (df_2016). The problem arises when we convert this merged dataframe into GeoJSON format, as NaN is not a valid JSON object. To circumvent this, we will replace all NaN values in merged dataframe into a string ‘No data’.

In [21]:
merged = gdf.merge(df_2016, left_on = 'country_code', right_on = 'code',how='left')
#Replace NaN values to string 'No data'.
merged.fillna('No data', inplace = True)

We must now aslo input a hex code to color code countries with no data as an argument for the color mapper. 

In [22]:
#Instantiate LinearColorMapper that maps numbers in a range linearly into a sequence of colors. 
#Input nan_color.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 40, nan_color = '#d9d9d9')

In [23]:
merged_json = json.loads(merged.to_json())

#Convert to String like object.
json_data = json.dumps(merged_json)
geosource = GeoJSONDataSource(geojson = json_data)

#Create figure object.
p = figure(title = 'Share of adults who are obese, 2016', plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'per_cent_obesity', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

#Specify figure layout.
p.add_layout(color_bar, 'below')

#Display figure inline in Jupyter Notebook.
output_notebook()

#Display figure.
show(p)

### Interactivity with Bokeh

The interactivity that we are going to implement here is the ability for the user to select a year and display obesity information for it.  There will also be a hovering tool which allows the user to view details just by hovering the mouse over a specific country or region.

Bokeh is the library for you. It provides an extensive set of widgets and tools and makes it very simple to create rich, interactive visualizations.

In [24]:
from bokeh.io import curdoc, output_notebook
from bokeh.models import Slider, HoverTool
from bokeh.layouts import widgetbox, row, column
from bokeh.models.callbacks import CustomJS

### More Resources
1. https://campus.datacamp.com/courses/interactive-data-visualization-with-bokeh/putting-it-all-together-a-case-study-4?ex=4
2. https://docs.bokeh.org/en/latest/docs/user_guide/interaction.html#customjs-with-a-python-function

# Interactivity is a beast of its own 
That's for later tonight or tomorrow: 
1. https://rebeccabilbro.github.io/interactive-viz-bokeh/

More on Bokeh. Bokeh is an interactive Python Library for visualizations that targets modern web browsers for presentation. 

Widgets are interactive controls that can be added to Bokeh applications to provide a front end user interface to visualization. They can drive new computations, update plots, and connect to other programmatic functionality. When used with the bokeh server they can run arbitrary sets of Python code. 

### Let's import the neccessary packages

In [25]:
from bokeh.layouts import layout
from bokeh.embed import file_html

from bokeh.io import show
from bokeh.io import output_notebook

from bokeh.models import Text
from bokeh.models import Plot
from bokeh.models import Slider
from bokeh.models import Circle
from bokeh.models import Range1d
from bokeh.models import CustomJS
from bokeh.models import HoverTool
from bokeh.models import LinearAxis
from bokeh.models import ColumnDataSource
from bokeh.models import SingleIntervalTicker

from bokeh.palettes import Spectral6
output_notebook()

- output_notebook() function from bokeh.io to display Bokeh plots inline.
- show() is called, the plot will be displayed inline in the next notebook output cell. 
- To save your Bokeh plots, you can use the output_file()

### Get some more data. 
These are the examples from the bokeh visualizations. Some of Bokeh examples rely on sample data that is not included in the Bokeh GitHub repository or released packages, due to their size.

In [108]:
import bokeh.sampledata
bokeh.sampledata.download()

Using data directory: /Users/Musau/.bokeh/data
Skipping 'CGM.csv' (checksum match)
Skipping 'US_Counties.zip' (checksum match)
Skipping 'us_cities.json' (checksum match)
Skipping 'unemployment09.csv' (checksum match)
Skipping 'AAPL.csv' (checksum match)
Skipping 'FB.csv' (checksum match)
Skipping 'GOOG.csv' (checksum match)
Skipping 'IBM.csv' (checksum match)
Skipping 'MSFT.csv' (checksum match)
Skipping 'WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip' (checksum match)
Skipping 'gapminder_fertility.csv' (checksum match)
Skipping 'gapminder_population.csv' (checksum match)
Skipping 'gapminder_life_expectancy.csv' (checksum match)
Skipping 'gapminder_regions.csv' (checksum match)
Skipping 'world_cities.zip' (checksum match)
Skipping 'airports.json' (checksum match)
Skipping 'movies.db.zip' (checksum match)
Skipping 'airports.csv' (checksum match)
Skipping 'routes.csv' (checksum match)
Skipping 'haarcascade_frontalface_default.xml' (checksum match)


### Import the Dataframes that we will be working with 

In [109]:
def process_data():
    from bokeh.sampledata.gapminder import regions
    from bokeh.sampledata.gapminder import fertility
    from bokeh.sampledata.gapminder import population
    from bokeh.sampledata.gapminder import life_expectancy

    # Make the column names ints not strings for handling
    columns     = list(fertility.columns)
    years       = list(range(int(columns[0]), int(columns[-1])))
    rename_dict = dict(zip(columns, years))

    fertility   = fertility.rename(columns=rename_dict)
    life_expectancy = life_expectancy.rename(columns=rename_dict)
    population  = population.rename(columns=rename_dict)
    regions     = regions.rename(columns=rename_dict)

    # Turn population into bubble sizes.
    # Use min_size and factor to tweak.
    scaling  = 200
    pop_size = np.sqrt(population / np.pi) / scaling
    min_size = 3
    pop_size = pop_size.where(
                  pop_size >= min_size
                  ).fillna(min_size)

    # Use pandas categories and categorize & color the regions
    regions.Group = regions.Group.astype('category')
    regions_list  = list(regions.Group.cat.categories)
    
    # didn't know this but you can have nested python functions
    def get_color(r):
        return Spectral6[regions_list.index(r.Group)]

    regions['region_color'] = regions.apply(get_color, axis=1)

    return (fertility, life_expectancy, pop_size,
        regions, years, regions_list)


# Define each dataframe

In [110]:
(fertility_df, life_expect_df,
pop_size_df, regions_df, years, regions) = process_data()

Next we will add each of our sources to the sources dictionary, where each key is the name of the year (prefaced with an underscore) and each value is a dataframe with the aggregated values for that year.

In [111]:
sources = {}

region_color      = regions_df['region_color']
region_color.name = 'region_color'

for year in years:
    fertility       = fertility_df[year]
    fertility.name  = 'fertility'
    life            = life_expect_df[year]
    life.name       = 'life'
    population      = pop_size_df[year]
    population.name = 'population'

    new_df = pd.concat(
                [fertility, life, population, region_color],
                axis=1
    )
    sources['_' + str(year)] = ColumnDataSource(new_df)

Later we will be able to pass this sources dictionary to the JavaScript Callback. In so doing, we will find that in our JavaScript we have objects named by year that refer to a corresponding ColumnDataSource

In [112]:
dict_of_sources = dict(zip(
                      [x for x in years],
                      ['_%s' % x for x in years])
                      )
dict_of_sources

{1964: '_1964',
 1965: '_1965',
 1966: '_1966',
 1967: '_1967',
 1968: '_1968',
 1969: '_1969',
 1970: '_1970',
 1971: '_1971',
 1972: '_1972',
 1973: '_1973',
 1974: '_1974',
 1975: '_1975',
 1976: '_1976',
 1977: '_1977',
 1978: '_1978',
 1979: '_1979',
 1980: '_1980',
 1981: '_1981',
 1982: '_1982',
 1983: '_1983',
 1984: '_1984',
 1985: '_1985',
 1986: '_1986',
 1987: '_1987',
 1988: '_1988',
 1989: '_1989',
 1990: '_1990',
 1991: '_1991',
 1992: '_1992',
 1993: '_1993',
 1994: '_1994',
 1995: '_1995',
 1996: '_1996',
 1997: '_1997',
 1998: '_1998',
 1999: '_1999',
 2000: '_2000',
 2001: '_2001',
 2002: '_2002',
 2003: '_2003',
 2004: '_2004',
 2005: '_2005',
 2006: '_2006',
 2007: '_2007',
 2008: '_2008',
 2009: '_2009',
 2010: '_2010',
 2011: '_2011',
 2012: '_2012'}

In [113]:
js_source_array = str(dict_of_sources).replace("'", "")
js_source_array

'{1964: _1964, 1965: _1965, 1966: _1966, 1967: _1967, 1968: _1968, 1969: _1969, 1970: _1970, 1971: _1971, 1972: _1972, 1973: _1973, 1974: _1974, 1975: _1975, 1976: _1976, 1977: _1977, 1978: _1978, 1979: _1979, 1980: _1980, 1981: _1981, 1982: _1982, 1983: _1983, 1984: _1984, 1985: _1985, 1986: _1986, 1987: _1987, 1988: _1988, 1989: _1989, 1990: _1990, 1991: _1991, 1992: _1992, 1993: _1993, 1994: _1994, 1995: _1995, 1996: _1996, 1997: _1997, 1998: _1998, 1999: _1999, 2000: _2000, 2001: _2001, 2002: _2002, 2003: _2003, 2004: _2004, 2005: _2005, 2006: _2006, 2007: _2007, 2008: _2008, 2009: _2009, 2010: _2010, 2011: _2011, 2012: _2012}'

In [114]:
xdr  = Range1d(1, 9)
ydr  = Range1d(20, 100)
plot = Plot(
    x_range=xdr,
    y_range=ydr,
    plot_width=800,
    plot_height=400,
    outline_line_color=None,
    toolbar_location=None,
    min_border=20,
)

 Bokeh’s API works in much the same way as Matplotlib’s, meaning that we can imagine our digital canvas in the same way we would imagine a traditional fabric canvas. As we add new elements to our plot object, we are adding new layers of information onto our canvas that will appear as overlays (unless they explicitly reset some earlier-set parameter). So far we have only created the plot object, so if we were to show() it at this phase, we would get… a blank canvas!

In [115]:
AXIS_FORMATS = dict(
    minor_tick_in=None,
    minor_tick_out=None,
    major_tick_in=None,
    major_label_text_font_size="10pt",
    major_label_text_font_style="normal",
    axis_label_text_font_size="10pt",

    axis_line_color='#AAAAAA',
    major_tick_line_color='#AAAAAA',
    major_label_text_color='#666666',

    major_tick_line_cap="round",
    axis_line_cap="round",
    axis_line_width=1,
    major_tick_line_width=1,
)

xaxis = LinearAxis(
    ticker     = SingleIntervalTicker(interval=1),
    axis_label = "Children per woman (total fertility)",
    **AXIS_FORMATS
)
yaxis = LinearAxis(
    ticker     = SingleIntervalTicker(interval=20),
    axis_label = "Life expectancy at birth (years)",
    **AXIS_FORMATS
)

plot.add_layout(xaxis, 'below')
plot.add_layout(yaxis, 'left')

In [116]:
show(plot)



In [117]:
text_source = ColumnDataSource({'year': ['%s' % years[0]]})
text        = Text(
                  x=2, y=35, text='year',
                  text_font_size='150pt',
                  text_color='#EEEEEE'
                  )
plot.add_glyph(text_source, text)

In [118]:
show(plot)

In [119]:
renderer_source = sources['_%s' % years[0]]
circle_glyph    = Circle(
                    x='fertility', y='life',
                    size='population', fill_alpha=0.8,
                    fill_color='region_color',
                    line_color='#7c7e71',
                    line_width=0.5, line_alpha=0.5
                    )

circle_renderer = plot.add_glyph(renderer_source, circle_glyph)

In [120]:
show(plot)

In [121]:
# Add hover for the circle (not other plot elements)
tooltips = "@index"
plot.add_tools(HoverTool(
                  tooltips=tooltips,
                  renderers=[circle_renderer]
                  )
              )

In [122]:
show(plot)

In [123]:
text_x = 7
text_y = 95
for i, region in enumerate(regions):
    plot.add_glyph(Text(
                      x=text_x, y=text_y,
                      text=[region],
                      text_font_size='10pt',
                      text_color='#666666'
                      )
                  )
    plot.add_glyph(Circle(
                      x=text_x - 0.1,
                      y=text_y + 2,
                      fill_color=Spectral6[i],
                      line_color=None,
                      fill_alpha=0.8,
                      size=10,
                      )
                  )
    text_y = text_y - 5

In [124]:
show(plot)

In [125]:
# Add the slider
code = """
    var year = slider.get('value'),
        sources = %s,
        new_source_data = sources[year].get('data');
    renderer_source.set('data', new_source_data);
    text_source.set('data', {'year': [String(year)]});
""" % js_source_array

callback = CustomJS(args=sources, code=code)
slider = Slider(
    start=years[0],
    end=years[-1],
    value=years[0],
    step=1,
    title="Year"
)
callback.args["renderer_source"] = renderer_source
callback.args["text_source"] = text_source
callback.args["slider"] = slider
slider.js_on_change("value", callback)

In [126]:
show(slider)

In [92]:
layout = column(slider, plot)
show(layout)

In [26]:
import yaml

from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Slider
from bokeh.plotting import figure
from bokeh.themes import Theme
from bokeh.io import show, output_notebook

from bokeh.sampledata.sea_surface_temperature import sea_surface_temperature

output_notebook()

In [27]:
def bkapp(doc):
    df = sea_surface_temperature.copy()
    source = ColumnDataSource(data=df)

    plot = figure(x_axis_type='datetime', y_range=(0, 25),
                  y_axis_label='Temperature (Celsius)',
                  title="Sea Surface Temperature at 43.18, -70.43")
    plot.line('time', 'temperature', source=source)

    def callback(attr, old, new):
        if new == 0:
            data = df
        else:
            data = df.rolling('{0}D'.format(new)).mean()
        source.data = ColumnDataSource.from_df(data)

    slider = Slider(start=0, end=30, value=0, step=1, title="Smoothing by N Days")
    slider.on_change('value', callback)

    doc.add_root(column(slider, plot))

    doc.theme = Theme(json=yaml.load("""
        attrs:
            Figure:
                background_fill_color: "#DDDDDD"
                outline_line_color: white
                toolbar_location: above
                height: 500
                width: 800
            Grid:
                grid_line_dash: [6, 4]
                grid_line_color: white
    """, Loader=yaml.FullLoader))

In [28]:
show(bkapp)

This is dope: https://github.com/bokeh/bokeh/tree/branch-2.3/examples/howto/server_embed

Next Tutorial: https://realpython.com/python-data-visualization-bokeh/

Building a visualization with Bokeh involves the following key steps:
1. Prepare the data
2. Determine where the visualization will be rendered
3. Set up the figure(s)
4. Connect to and draw your data
5. Organize the layout
6. Preview and save your beautiful data creation. 

Generating Figures: 
- In Bokeh there are two main ways to output your visualizations. 
    - The first is output_file('filename.html') this will write the visualization to a static html file.
    - The second is output_notebook() will render your visualization directly in the notebook
- Neither of these will actually show the visualization. That doesn't happen until show() is called. They just dictate where the visualization appears when you do do show


Sometimes when rendering multiple visualizations sequentially, you'll see that past renderers are not being cleared with each execution. If you experience this, import and run the following betwen executions

from bokeh.plotting import reset_output()

reset_output()

The figure object:
- The figure object is not only the foundation of your data visualization but also the object that unlocks all of Bokeh's available tools for visualizing data. The Bokeh figure is a subclass of the Bokeh Plot object whihc provides many of the parameters
- Examples below

In [3]:
# Data handling
import pandas as pd
import numpy as np

# Bokeh libraries
from bokeh.io import output_file, output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.layouts import row, column, gridplot
from bokeh.models.widgets import Tabs, Panel

# The figure will be rendered inline in my Jupyter Notebook
output_notebook()

# Example figure
fig = figure(background_fill_color='gray',
             background_fill_alpha=0.5,
             border_fill_color='blue',
             border_fill_alpha=0.25,
             plot_height=300,
             plot_width=500,
             x_axis_label='X Label',
             x_axis_type='datetime',
             x_axis_location='above',
             x_range=('2018-01-01', '2018-06-30'),
             y_axis_label='Y Label',
             y_axis_type='linear',
             y_axis_location='left',
             y_range=(0, 100),
             title='Example Figure',
             title_location='right',
             toolbar_location='below',
             tools='save')

# See what it looks like
show(fig)




Once the figure() object is instantiated, you can still configure it after the fact. Let's say you want to get rid of the gridlines:

In [4]:
# Remove the gridlines from the figure() object
fig.grid.grid_line_color = None

# See what it looks like 
show(fig)




The Bokeh [Plot Class](https://bokeh.pydata.org/en/latest/docs/reference/models/plots.html#bokeh.models.plots.Plot) is the superclass of the figure() object, from which figures inherit a lot of their attributes.

The [Figure Class](https://bokeh.pydata.org/en/latest/docs/reference/plotting.html#bokeh.plotting.figure.Figure) documentation is a good place to find more detail about the arguments of the figure() object.

Some specific customization options that are worth mentioning: 
- [Text Properties](https://bokeh.pydata.org/en/latest/docs/user_guide/styling.html#text-properties) covers all the attributes related to changing font styles, sizes, colors, and so forth.
- [TickFormatters](https://bokeh.pydata.org/en/latest/docs/user_guide/styling.html#tick-label-formats) are built-in objects specifically for formatting your axes using Python-like string formatting syntax.

Drawing Data:

An empty figure isn't all that exciting, so let's look at glyphs: the building blocks of Bokeh visualizations. A glyph is a vecotrized graphical shape or marker that is used to represent your data, like a circle or square. 

In [10]:
from bokeh.plotting import reset_output
reset_output()

#My x-y coordinate data
x = [1, 2, 1]
y = [1, 1, 2]

output_notebook()

# Create a figure with no toolbar and axis ranges of [0,3]
fig = figure(title='My Coordinates',
             plot_height=300, plot_width=300,
             x_range=(0, 3), y_range=(0, 3),
             toolbar_location=None)

# Draw the coordinates as circles
fig.circle(x=x, y=y,
           color='green', size=10, alpha=0.5)

# Show plot
show(fig)

The set of all glyphs can be found [here](https://docs.bokeh.org/en/latest/docs/reference/plotting.html)

Here are a few categories of glyphs:

- Marker includes shapes like circles, diamonds, squares, and triangles and is effective for creating visualizations like scatter and bubble charts.
- Line covers things like single, step, and multi-line shapes that can be used to build line charts.
- Bar/Rectangle shapes can be used to create traditional or stacked bar (hbar) and column (vbar) charts as well as waterfall or gantt charts.


In [11]:
# My word count data
day_num = np.linspace(1, 10, 10)
daily_words = [450, 628, 488, 210, 287, 791, 508, 639, 397, 943]
cumulative_words = np.cumsum(daily_words)

# Output the visualization directly in the notebook
output_notebook()

# Create a figure with a datetime type x-axis
fig = figure(title='My Tutorial Progress',
             plot_height=400, plot_width=700,
             x_axis_label='Day Number', y_axis_label='Words Written',
             x_minor_ticks=2, y_range=(0, 6000),
             toolbar_location=None)

# The daily words will be represented as vertical bars (columns)
fig.vbar(x=day_num, bottom=0, top=daily_words, 
         color='blue', width=0.75, 
         legend='Daily')

# The cumulative sum will be a trend line
fig.line(x=day_num, y=cumulative_words, 
         color='gray', line_width=1,
         legend='Cumulative')

# Put the legend in the upper left corner
fig.legend.location = 'top_left'

# Let's check it out
show(fig)

The beauty of Bokeh is that nearly any idea you have should be possible. It’s just a matter of how you want to leverage the available tools to do so. 

### Using the ColumnDataSource Object

While you can use python lists and numpy arrays to represent data, Bokeh is also equipped to use more complex data structures such as python dictionaries and Pandas Data Frames. The built in functionality in handling those data structures is known as a **ColumnDataSource**. This is what Bokeh uses behind the scenes so it's best to use it in this format. 

The ColumnDataSource can interpret three types of data objects:
- Python dict: The keys are names associated with the respective value sequences (lists, arrays, and so forth).
- Pandas DataFrame: The columns of the DataFrame become the reference names for the ColumnDataSource.
- Pandas groupby: The columns of the ColumnDataSource reference the columns as seen by calling groupby.describe().
