# Heat Maps in Bokeh

In [1]:
# Note: the below code requires a restart of the notebook the first time it is run
import sys
import os
from traitlets.config.manager import BaseJSONConfigManager
path = os.path.join(sys.prefix, 'etc', 'jupyter', 'nbconfig')
cm = BaseJSONConfigManager(config_dir=path)
cm.update("livereveal", {"scroll": True});

In [58]:
import pandas as pd
import numpy as np
import re
import os
from math import pi
from bokeh.io import show
from bokeh.models import LinearColorMapper, LogColorMapper, BasicTicker, PrintfTickFormatter, ColorBar
from bokeh import plotting
from bokeh.palettes import Viridis256, magma, RdYlGn, RdYlBu, Plasma256, Viridis256
plotting.output_notebook()

In [None]:
%%time
row_len = 800_000
h5_dir = r'c:\adsb'
h5_file = os.path.join(h5_dir, '2018-06-24.h5')
pickle_name = f'{os.path.basename(h5_file)}-{row_len}.p'
pickle_path = os.path.join(os.getcwd(), 'data', pickle_name)
if os.path.exists(pickle_path):
    df = pd.read_pickle(pickle_path)
else:
    with pd.HDFStore(h5_file) as store:
        columns_to_keep = ['Year', 'Cou', 'Icao', 'Op', 'Type']
        df = store.select('data', stop = row_len, columns=columns_to_keep)
        
    main_ops = ['Southwest', 'American', 'Delta', 'SkyWest', 'Air Canada', 
            'Virgin', 'United','JetBlue', 'Spirit', 'Frontier']
    for o in main_ops:
        df.loc[df.Op.fillna('Other').str.lower().str.contains(o.lower()), 'Op'] = o
    df.loc[~df.Op.isin(main_ops), 'Op'] = 'Other'
    df = df.astype({'Op':'category'})

    df = df.dropna(subset=['Cou', 'Icao'], how='any')
    df.to_pickle(pickle_path)

In [4]:
df.loc[df.Year.isin(['Year','','2104','2105']),'Year'] = np.nan
cou_vs_year = df.groupby(['Cou', 'Year']).count()['Icao'].unstack().fillna(0)
cou_vs_year.index.name = 'Country'
cou_vs_year.columns.name = 'Year'
cou_vs_year['Total'] = cou_vs_year.sum(axis=1)
cou_vs_year = cou_vs_year[cou_vs_year.columns[35:]]
cou_vs_year = cou_vs_year.sort_values('Total', ascending=False)
del cou_vs_year['Total']
cou_vs_year = cou_vs_year[:20]
cou_vs_year_percent = cou_vs_year.div(cou_vs_year.sum(axis=1),axis=0) * 100
year_vs_cou_percent = cou_vs_year_percent.T
year_vs_cou = cou_vs_year.T

In [5]:
year_vs_cou_percent.index = year_vs_cou_percent.index.astype(str)

years = list(year_vs_cou_percent.index)
months = list(year_vs_cou_percent.columns)

arr_p = pd.DataFrame(year_vs_cou_percent.stack(), columns=['Percentage']).reset_index()
arr_c = pd.DataFrame(year_vs_cou.stack(), columns=['Count']).reset_index()
total_by_cou = year_vs_cou.sum(axis=0)
arr_c['Total'] = arr_c.Country.map(total_by_cou)
arr = pd.merge(arr_p, arr_c, how='outer', left_on=['Year', 'Country'], right_on=['Year', 'Country'])

colors = Viridis256
mapper = LinearColorMapper(palette=colors, low=0, high=year_vs_cou_percent.max().max())

TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

p = plotting.figure(title="Aircraft Registered to Country by Year ({0} - {1})".format(years[0], years[-1]),
           x_range=years, y_range=list(reversed(months)),
           x_axis_location="above", plot_width=1000, plot_height=1200,
           tools=TOOLS, toolbar_location='above',
           tooltips=[('Country', '@Country'), 
                     ('Year', '@Year'), 
                     ('Aircraft Count', '@Count'),
                     ('Percent of Fleet', '@Percentage{0.0}% of fleet'),
                     ("Total Aircraft", '@Total{,}')])

p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3

p.rect(x="Year", y="Country", width=1, height=1,
       source=arr,
       fill_color={'field': 'Percentage', 'transform': mapper},
       line_color=None)

color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="10pt",
                     ticker=BasicTicker(desired_num_ticks=10),
                     label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')
show(p)

In [None]:
# models = list(set(df.Mdl.dropna())) #.str.strip().str.title()
# models2 = {}
# for m in models:
#     m_before = m
#     m = m.strip().title()
#     m = m.strip()
#     if re.match(r'[0-9]{4} *',m[:5]):
#         m = m[5:]
#     m = m.replace('Erj','ERJ').replace('Md-','MD-').replace('Dc-','DC-').replace('Pa-','PA-')
#     m = m.replace('Emb-','EMB-').replace('Emb ','EMB ')
#     m = m.replace('Raytheon Aircraft Company', 'Raytheon').replace('Airbus Industrie','Airbus')
#     m = m.replace('Dehavilland Dhc-2', 'Dehavilland')
#     m = m.replace('Dassault-Breguet Mystere','Dassault')
#     m = m.replace('Falcon 20-', 'Falcon 20')
#     m = m.replace('Mcdonnell', 'McDonnell')
#     m = m.replace('McDonnell Douglas Aircraft Co', 'McDonnell Douglas')
#     m = m.replace('McDonnell Douglas Corporation', 'McDonnell Douglas')
#     m = m.replace('McDonnell Douglas MD', 'MD').replace('McDonnell Douglas DC', 'DC')
#     m = m.replace('Embraer Executive Aircraft Inc', 'Embraer')
#     m = m.replace('Embraer E', 'E')
#     m = m.replace('American General Acft Corp', 'American General')
#     m = m.replace('Saab-Scania Saab', 'Saab')
#     m = m.replace('Eurocopter Deutschland Gmbh','Eurocopter')
#     m = m.replace('Piper Aircraft Inc', 'Piper')
#     m = m.replace('Diamond Aircraft Ind Inc', 'Diamond')
#     m = m.replace('Embraer-Empresa Brasileira De', 'Embraer')
#     m = m.replace('Gulfstream Aerospace', 'Gulfstream')
#     m = m.replace('Hawker Beechcraft Corp Hawker', 'Hawker')
#     m = m.replace('Hawker Beechcraft Corp King Air', 'King Air')
#     m = m.replace('Beech King Air', 'King Air')
#     m = m.replace('Textron Aviation King Air', 'King Air')
#     m = m.replace('Raytheon King Air', 'King Air')
#     m = m.replace('Bombardier Learjet', 'Learjet')
#     m = m.replace('Bombardier Global', 'Global')
#     m = m.replace('Bombardier Challenger', 'Challenger')
#     m = m.replace('Textron Aviation Citation', 'Citation')
#     m = m.replace('Cessna Citation', 'Citation')
#     m = m.replace('ERJ 170', 'ERJ-170').replace('ERJ 145', 'ERJ-145').replace('ERJ 190', 'ERJ-190')
#     m = m.replace('ERJ 1', 'ERJ-1')
#     m = m.replace('Kc-135R', 'KC-135R')
#     m = m.replace('Pa 46', 'PA-46')
#     m = m.replace('Crj', 'CRJ')
#     m = m.replace('CRJ ', 'CRJ-')
#     m = m.replace('Boeing ', '')
#     mdl_list = ['717', '737', '747', '757', '767', '777', '787', 
#                 'A300', 'A310', 'A318', 'A319', 'A320', 'A321', 'A330', 'A340', 
#                 'A350', 'A380', 'A400', 'ACJ319', 'ERJ-170', 'ERJ-145', 'ERJ-190','EMB-175',
#                 'Cessna 172', 'Cessna 180', 'C-130', 'MD-80', 'MD-90', 'G-IV', 'G-V', 'G200', 
#                 'G200', 'G450','G550', 'G650', 'C-27J', 'PA-67', 'PA-28', 'PA-32', 'PA-18',
#                'PA-20', 'PA-23', 'PA-24', 'PA-25', 'PA-28', 'PA-30', 'PA-31', 'PA-32', 'CL-600',
#                'PA-34', 'PA-44', 'PA-46', 'C-40', 'Cessna 182', 'Cessna 150','Superjet 100']
#     for ml in mdl_list:
#         if ml.lower() in m.lower():
#             m = ml
#             break
#     models2[m_before] =  m
# # df['Mdl'] = df['Mdl'].map(models2)

In [7]:
model_max, country_max = 15, 15

top_models = list(df.Type.value_counts()[:model_max].index)
cou_vs_mdl = df.groupby(['Cou', 'Type']).nunique()['Icao'].unstack().fillna(0)
cou_vs_mdl.index.name = 'Country'
cou_vs_mdl.columns.name = 'Mdl'
cou_vs_mdl = cou_vs_mdl[top_models]
cou_vs_mdl['Total'] = cou_vs_mdl.sum(axis=1)
cou_vs_mdl = cou_vs_mdl.sort_values('Total', ascending=False)
del cou_vs_mdl['Total']
cou_vs_mdl = cou_vs_mdl[:country_max]
cou_vs_mdl = cou_vs_mdl.reindex(sorted(cou_vs_mdl.columns), axis=1)
cou_vs_mdl.columns.name = 'Mdl'
cou_vs_mdl_percent = cou_vs_mdl.div(cou_vs_mdl.sum(axis=1),axis=0) * 100
mdl_vs_cou_percent = cou_vs_mdl_percent.T
mdl_vs_cou = cou_vs_mdl.T

In [57]:
mdl_vs_cou_percent.index = mdl_vs_cou_percent.index.astype(str)

mdl = list(mdl_vs_cou_percent.index)
country = list(mdl_vs_cou_percent.columns)

arr_p = pd.DataFrame(mdl_vs_cou_percent.stack(), columns=['Percentage']).reset_index()
arr_c = pd.DataFrame(mdl_vs_cou.stack(), columns=['Count']).reset_index()
total_by_cou = mdl_vs_cou.sum(axis=0)
arr_c['Total'] = arr_c.Country.map(total_by_cou)
arr = pd.merge(arr_p, arr_c, how='outer', left_on=['Mdl', 'Country'], right_on=['Mdl', 'Country'])

colors = Plasma256magma(30)
mapper = LinearColorMapper(palette=colors, low=1, high=mdl_vs_cou_percent.max().max())

TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

p = plotting.figure(title="Aircraft Registered to Country by Model",
           x_range=mdl, y_range=list(reversed(country)),
           x_axis_location="above", plot_width=950, plot_height=900,
           tools=TOOLS, toolbar_locatio#n='above',
           tooltips=[('Country', '@Country'), 
                     ('Model', '@Mdl'), 
                     ('Aircraft Count', '@Count'),
                     ('Percent of Fleet', '@Percentage{0.0}% of fleet'),
                     ("Total Aircraft", '@Total{,}')])

p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "12pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3

p.rect(x="Mdl", y="Country", width=1, height=1,
       source=arr,
       fill_color={'field': 'Percentage', 'transform': mapper},
       line_color='white')

color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="12pt",
                     ticker=BasicTicker(desired_num_ticks=10),
                     label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')
show(p)

AttributeError: 'DataFrame' object has no attribute 'Country'

In [54]:
Plasma256

['#0C0786',
 '#100787',
 '#130689',
 '#15068A',
 '#18068B',
 '#1B068C',
 '#1D068D',
 '#1F058E',
 '#21058F',
 '#230590',
 '#250591',
 '#270592',
 '#290593',
 '#2B0594',
 '#2D0494',
 '#2F0495',
 '#310496',
 '#330497',
 '#340498',
 '#360498',
 '#380499',
 '#3A049A',
 '#3B039A',
 '#3D039B',
 '#3F039C',
 '#40039C',
 '#42039D',
 '#44039E',
 '#45039E',
 '#47029F',
 '#49029F',
 '#4A02A0',
 '#4C02A1',
 '#4E02A1',
 '#4F02A2',
 '#5101A2',
 '#5201A3',
 '#5401A3',
 '#5601A3',
 '#5701A4',
 '#5901A4',
 '#5A00A5',
 '#5C00A5',
 '#5E00A5',
 '#5F00A6',
 '#6100A6',
 '#6200A6',
 '#6400A7',
 '#6500A7',
 '#6700A7',
 '#6800A7',
 '#6A00A7',
 '#6C00A8',
 '#6D00A8',
 '#6F00A8',
 '#7000A8',
 '#7200A8',
 '#7300A8',
 '#7500A8',
 '#7601A8',
 '#7801A8',
 '#7901A8',
 '#7B02A8',
 '#7C02A7',
 '#7E03A7',
 '#7F03A7',
 '#8104A7',
 '#8204A7',
 '#8405A6',
 '#8506A6',
 '#8607A6',
 '#8807A5',
 '#8908A5',
 '#8B09A4',
 '#8C0AA4',
 '#8E0CA4',
 '#8F0DA3',
 '#900EA3',
 '#920FA2',
 '#9310A1',
 '#9511A1',
 '#9612A0',
 '#9713A0',
 '#9

In [63]:
model_max, country_max = 15, 15

top_models = list(df.Op.value_counts()[:model_max].index)
cou_vs_mdl = df.groupby(['Op', 'Type']).nunique()['Icao'].unstack().fillna(0)
cou_vs_mdl.index.name = 'Op'
cou_vs_mdl.columns.name = 'Type'
# print(cou_vs_mdl.columns)
cou_vs_mdl = cou_vs_mdl[cou_vs_mdl.index.isin(top_models)]
cou_vs_mdl['Total'] = cou_vs_mdl.sum(axis=1)
print(cou_vs_mdl.index)
cou_vs_mdl = cou_vs_mdl.sort_values('Total', ascending=False)
del cou_vs_mdl['Total']
top_countries = list(cou_vs_mdl.sum(axis=0).sort_values(ascending=False).index[:country_max])
cou_vs_mdl = cou_vs_mdl[top_countries]
cou_vs_mdl = cou_vs_mdl.reindex(sorted(cou_vs_mdl.columns), axis=1)
cou_vs_mdl = cou_vs_mdl.reindex(sorted(cou_vs_mdl.index), axis=0)
cou_vs_mdl.columns.name = 'Type'
cou_vs_mdl_percent = cou_vs_mdl.div(cou_vs_mdl.sum(axis=1),axis=0) * 100
mdl_vs_cou_percent = cou_vs_mdl_percent.T
mdl_vs_cou = cou_vs_mdl.T

CategoricalIndex(['Air Canada', 'Alaska', 'American', 'Delta', 'Frontier',
                  'JetBlue', 'Other', 'SkyWest', 'Southwest', 'Spirit',
                  'United', 'Virgin', 'Wells Fargo'],
                 categories=['Air Canada', 'Alaska', 'American', 'Delta', 'Frontier', 'JetBlue', 'Other', 'SkyWest', ...], ordered=False, name='Op', dtype='category')


In [22]:
# ops_max, country_max = 15, 15

# top_ops = list(df.Op.value_counts()[:ops_max].index)
# cou_vs_op = df.groupby(['Cou', 'Op']).nunique()['Icao'].unstack().fillna(0)
# cou_vs_op.index.name = 'Country'
# cou_vs_op.columns.name = 'Ops'
# cou_vs_op = cou_vs_op[top_ops]
# print(cou_vs_op.info())
# # cou_vs_op = cou_vs_op.assign(Total =  cou_vs_op.sum(axis=1))
# s=cou_vs_op.sum(axis=1)
# cou_vs_op = cou_vs_op.sort_values('Total', ascending=False)
# del cou_vs_op['Total']
# cou_vs_op = cou_vs_op[:country_max]
# cou_vs_op = cou_vs_op.reindex(sorted(cou_vs_op.columns), axis=1)
# cou_vs_op.columns.name = 'Ops'
# cou_vs_op_percent = cou_vs_op.div(cou_vs_op.sum(axis=1),axis=0) * 100
# op_vs_cou_percent = cou_vs_op_percent.T
# op_vs_cou = cou_vs_op.T

In [64]:
mdl_vs_cou_percent.index = mdl_vs_cou_percent.index.astype(str)

mdl = list(mdl_vs_cou_percent.index)
country = list(mdl_vs_cou_percent.columns)

arr_p = pd.DataFrame(mdl_vs_cou_percent.stack(), columns=['Percentage']).reset_index()
arr_c = pd.DataFrame(mdl_vs_cou.stack(), columns=['Count']).reset_index()
total_by_cou = mdl_vs_cou.sum(axis=0)
arr_c['Total'] = arr_c.Op.map(total_by_cou)
arr = pd.merge(arr_p, arr_c, how='outer', left_on=['Op', 'Type'], right_on=['Op', 'Type'])

colors = Plasma256#RdYlBu[11]#, RdYlBu
mapper = LogColorMapper(palette=colors, low=1, high=mdl_vs_cou_percent.max().max())

TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

p = plotting.figure(title="Aircraft Registered to Country by Model",
           x_range=mdl, y_range=list(reversed(country)),
           x_axis_location="above", plot_width=950, plot_height=900,
           tools=TOOLS, toolbar_location='above',
           tooltips=[('Operator', '@Op'), 
                     ('Model', '@Type'), 
                     ('Aircraft Count', '@Count'),
                     ('Percent of Fleet', '@Percentage{0.0}% of fleet'),
                     ("Total Aircraft", '@Total{,}')])

p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "12pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = pi / 3

p.rect(x="Type", y="Op", width=1, height=1,
       source=arr,
       fill_color={'field': 'Percentage', 'transform': mapper},
       line_color='white')

color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="12pt",
                     ticker=BasicTicker(desired_num_ticks=10),
                     label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')
show(p)