*Note: All output_file() calls have been replaced with output_notebook() so that plots will display inline.*

# The Basics of Bokeh

## Your First Graph

In [18]:
from bokeh.plotting import figure, output_notebook, show

x = [1, 3, 5, 7]
y = [2, 4, 6, 8]

p = figure()
p.circle(x, y, size=10, color='red')

output_notebook()
show(p)

In [19]:
from bokeh.plotting import figure, output_notebook, show

x = [1, 3, 5, 7]
y = [2, 4, 6, 8]

p = figure()
p.circle(x, y, size=10, color='red')
p.line(x, y, color='blue', legend='line')
p.triangle(x[::-1], y, color='green', size=10, legend='triangle')
p.square(y, x, color='#FF0000', size=20)

p.legend.location = 'top_center'

output_notebook()
show(p)

# Bokeh and Pandas: Exploring the WWII THOR Dataset

## Loading Data in Pandas

In [4]:
import pandas as pd

df = pd.read_csv('thor_wwii.csv')

In [5]:
df.shape

(178281, 19)

In [10]:
df.columns.tolist()

['MSNDATE',
 'THEATER',
 'COUNTRY_FLYING_MISSION',
 'NAF',
 'UNIT_ID',
 'AIRCRAFT_NAME',
 'AC_ATTACKING',
 'TAKEOFF_BASE',
 'TAKEOFF_COUNTRY',
 'TAKEOFF_LATITUDE',
 'TAKEOFF_LONGITUDE',
 'TGT_COUNTRY',
 'TGT_LOCATION',
 'TGT_LATITUDE',
 'TGT_LONGITUDE',
 'TONS_HE',
 'TONS_IC',
 'TONS_FRAG',
 'TOTAL_TONS']

In [23]:
df.head()

Unnamed: 0,MSNDATE,THEATER,COUNTRY_FLYING_MISSION,NAF,UNIT_ID,AIRCRAFT_NAME,AC_ATTACKING,TAKEOFF_BASE,TAKEOFF_COUNTRY,TAKEOFF_LATITUDE,TAKEOFF_LONGITUDE,TGT_COUNTRY,TGT_LOCATION,TGT_LATITUDE,TGT_LONGITUDE,TONS_HE,TONS_IC,TONS_FRAG,TOTAL_TONS
0,03/30/1941,ETO,GREAT BRITAIN,RAF,84 SQDN,BLENHEIM,10.0,,,,,ALBANIA,ELBASAN,41.1,20.07,0.0,0.0,0.0,0.0
1,11/24/1940,ETO,GREAT BRITAIN,RAF,211 SQDN,BLENHEIM,9.0,,,,,ALBANIA,DURAZZO,41.32,19.45,0.0,0.0,0.0,0.0
2,12/04/1940,ETO,GREAT BRITAIN,RAF,211 SQDN,BLENHEIM,9.0,,,,,ALBANIA,TEPELENE,40.3,20.02,0.0,0.0,0.0,0.0
3,12/31/1940,ETO,GREAT BRITAIN,RAF,211 SQDN,BLENHEIM,9.0,,,,,ALBANIA,VALONA,40.47,19.49,0.0,0.0,0.0,0.0
4,01/06/1941,ETO,GREAT BRITAIN,RAF,211 SQDN,BLENHEIM,9.0,,,,,ALBANIA,VALONA,40.47,19.49,0.0,0.0,0.0,0.0


## The Bokeh ColumnDataSource

In [12]:
import pandas as pd
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource

df = pd.read_csv('thor_wwii.csv')
df = df.sort_values(by=['TOTAL_TONS'], ascending=False)

source = ColumnDataSource(df[0:100])

p = figure()
p.circle('TGT_LONGITUDE', 'TGT_LATITUDE',  source=source, 
         size=25, line_color='green', fill_color='white')

p.plot_width = 900
p.plot_height = 750

p.title.text = 'Target Locations'
p.title.align = 'center'
p.title.text_font_size = '18pt' 

p.background_fill_color = '#edfcff'
p.outline_line_width = 1
p.outline_line_color = 'black'

p.xaxis.axis_label = 'Longitude'
p.xaxis.axis_label_text_font_size = '12pt'
p.xgrid.minor_grid_line_color = 'black'
p.xgrid.minor_grid_line_alpha = 0.1

p.yaxis.axis_label = 'Latitude'
p.yaxis.axis_label_text_font_size = '12pt'
p.ygrid.minor_grid_line_color = 'black'
p.ygrid.minor_grid_line_alpha = 0.1

output_notebook()
show(p)

# Categorical Data and Bar Charts: Munitions Dropped by Country

In [19]:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource

from bokeh.palettes import Spectral5
from bokeh.transform import factor_cmap

df = pd.read_csv('thor_wwii.csv')

grouped = df.groupby('COUNTRY_FLYING_MISSION')['TOTAL_TONS'].agg(['sum'])
grouped['sum'] = grouped['sum']/1000

source = ColumnDataSource(grouped)
countries = source.data['COUNTRY_FLYING_MISSION'].tolist()
p = figure(x_range=countries)

color_map = factor_cmap(field_name='COUNTRY_FLYING_MISSION', 
                    palette=Spectral5, factors=countries)

p.vbar(x='COUNTRY_FLYING_MISSION', top='sum', source=source, width=0.70, color=color_map)

p.title.text ='Munitions Dropped by Allied Country'
p.xaxis.axis_label = 'Country'
p.yaxis.axis_label = 'Kilotons of Munitions'

output_notebook()
show(p)

# Stacked Bar Charts and Sub-sampling Data: Types of Munitions Dropped by Country

In [27]:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral3

df = pd.read_csv('thor_wwii.csv')
mask = df['COUNTRY_FLYING_MISSION'].isin(('USA','GREAT BRITAIN'))
df = df[mask]

cols_to_aggregate = ['TONS_IC', 'TONS_FRAG', 'TONS_HE']
grouped = df.groupby('COUNTRY_FLYING_MISSION')[cols_to_aggregate].agg(['sum'])

#reduce to kilotons again
grouped[cols_to_aggregate] = grouped[cols_to_aggregate]/1000

source = ColumnDataSource(grouped)
countries = source.data['COUNTRY_FLYING_MISSION'].tolist()
p = figure(x_range=countries)

p.vbar_stack(stackers=['TONS_HE_sum', 'TONS_FRAG_sum', 'TONS_IC_sum'], 
             x='COUNTRY_FLYING_MISSION', source=source, 
             legend = ['High Explosive', 'Fragmentation', 'Incendiary'],
             width=0.5, color=Spectral3)

p.title.text ='Types of Munitions Dropped by Allied Country'
p.legend.location = 'top_left'

p.xaxis.axis_label = 'Country'
p.xgrid.grid_line_color = None	#remove the x grid lines

p.yaxis.axis_label = 'Kilotons of Munitions'

output_notebook()
show(p)

# Time-Series, Annotations, and Multiple Plots: Bombing Operations over Time

In [29]:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource

df = pd.read_csv('thor_wwii.csv')
df['MSNDATE'] = pd.to_datetime(df['MSNDATE'], format='%m/%d/%Y')

grouped = df.groupby('MSNDATE')['TOTAL_TONS'].agg(['sum'])

grouped['sum'] = grouped['sum'].fillna(0)
grouped['sum'] = grouped['sum']/1000

source = ColumnDataSource(grouped)

p = figure(x_axis_type='datetime')
p.line(x='MSNDATE', y='sum', line_width=2, source=source)
p.yaxis.axis_label = 'Kilotons of Munitions Dropped'

output_notebook()
show(p)

## Resampling Time-Series Data

In [30]:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource

df = pd.read_csv('thor_wwii.csv')
df['MSNDATE'] = pd.to_datetime(df['MSNDATE'], format='%m/%d/%Y')

grouped = df.groupby(pd.Grouper(key='MSNDATE', freq='M'))['TOTAL_TONS'].agg(['sum'])

grouped['sum'] = grouped['sum'].fillna(0)
grouped['sum'] = grouped['sum']/1000

source = ColumnDataSource(grouped)

p = figure(x_axis_type='datetime')
p.line(x='MSNDATE', y='sum', line_width=2, source=source)
p.yaxis.axis_label = 'Kilotons of Munitions Dropped'

output_notebook()
show(p)

## Annotating Trends in Plots


In [33]:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, BoxAnnotation, Label
from datetime import datetime

df = pd.read_csv('thor_wwii.csv')

#mask for the European Theater of Operations
mask = df['THEATER']=='ETO'
df = df[mask]

df['MSNDATE'] = pd.to_datetime(df['MSNDATE'], format='%m/%d/%Y')
grouped = df.groupby(pd.Grouper(key='MSNDATE', freq='2W'))['TOTAL_TONS'].agg(['sum'])
grouped['sum'] = grouped['sum'].fillna(0)
grouped['sum'] = grouped['sum']/1000

source = ColumnDataSource(grouped)

p = figure(x_axis_type="datetime", width=900, height=750)

p.line(x='MSNDATE', y='sum', line_width=2, source=source)

p.title.text = 'European Theater of Operations'
p.title.align = 'center'
p.title.text_font_size = '14pt'

p.xaxis.major_label_orientation = -45
p.xaxis.major_label_text_font_style = 'bold'
p.xaxis.major_label_text_font_size = '10pt'

p.yaxis.axis_label = 'Kilotons of Munitions Dropped'
p.yaxis.axis_label_text_font_size = '12pt'
p.yaxis.axis_label_text_font_style = 'normal'
p.yaxis.major_label_text_font_size = '10pt'
p.ygrid.minor_grid_line_color = '#f7f7f7'

box_left = pd.to_datetime('6-6-1944')
box_right = pd.to_datetime('5-8-1945')
box_top = source.data['sum'].max() + 5
box_bottom = 0

box = BoxAnnotation(left=box_left, right=box_right, bottom=box_bottom, top=box_top,
                    line_width=1, line_color='black', line_dash='dashed',
                    fill_alpha=0.2, fill_color='orange')

p.add_layout(box)

d_day = Label(x=box_left, y=box_top, text='D-Day', 
              text_align='center', text_font_style='italic', text_font_size='10pt')
p.add_layout(d_day)

ve_day = Label(x=box_right, y=box_top, text='V-E Day', 
               text_align='center', text_font_style='italic', text_font_size='10pt')
p.add_layout(ve_day)

output_notebook()
show(p)

# Spatial Data: Mapping Target Locations

In [32]:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, Range1d
from bokeh.layouts import layout
from bokeh.tile_providers import CARTODBPOSITRON
from pyproj import Proj, transform 

def LongLat_to_EN(long, lat):
    try:
      easting, northing = transform(
        Proj(init='epsg:4326'), Proj(init='epsg:3857'), long, lat)
      return easting, northing
    except:
      return None, None

df = pd.read_csv('thor_wwii.csv')
#convert all lat/long to webmercator and store in new column
df['E'], df['N'] = zip(*df.apply(lambda x: LongLat_to_EN(x['TGT_LONGITUDE'], x['TGT_LATITUDE']), axis=1))

grouped = df.groupby(['E', 'N'])['TOTAL_TONS'].agg(['sum']).reset_index()

source = ColumnDataSource(grouped)

left = -2150000
right = 18000000
bottom = -5300000
top = 11000000

p = figure(x_range=Range1d(left, right), y_range=Range1d(bottom, top))
p.add_tile(CARTODBPOSITRON)

p.circle(x='E', y='N', source=source, line_color='grey', fill_color=None)

p.axis.visible = False

output_notebook()
show(p)