### [Data Visualization with Bokeh in Python, Part I: Getting Started](https://towardsdatascience.com/data-visualization-with-bokeh-in-python-part-one-getting-started-a11655a467d4)

In [1]:
import numpy as np
import pandas as pd
from bokeh.io import show, output_notebook
from bokeh.plotting import figure

In [2]:
output_notebook()

In [3]:
flights=pd.read_csv("data/nyc_flights.csv", index_col=0)
flights.head()

Unnamed: 0_level_0,month,day,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,arr_delay,carrier,flight,tailnum,origin,dest,air_time,distance,hour,minute,time_hour
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2013,1,1,517.0,515,2.0,830.0,819,11.0,UA,1545,N14228,EWR,IAH,227.0,1400,5,15,2013-01-01T10:00:00Z
2013,1,1,533.0,529,4.0,850.0,830,20.0,UA,1714,N24211,LGA,IAH,227.0,1416,5,29,2013-01-01T10:00:00Z
2013,1,1,542.0,540,2.0,923.0,850,33.0,AA,1141,N619AA,JFK,MIA,160.0,1089,5,40,2013-01-01T10:00:00Z
2013,1,1,544.0,545,-1.0,1004.0,1022,-18.0,B6,725,N804JB,JFK,BQN,183.0,1576,5,45,2013-01-01T10:00:00Z
2013,1,1,554.0,600,-6.0,812.0,837,-25.0,DL,461,N668DN,LGA,ATL,116.0,762,6,0,2013-01-01T11:00:00Z


In [4]:
flights.shape

(336776, 18)

In [5]:
flights.dtypes

month               int64
day                 int64
dep_time          float64
sched_dep_time      int64
dep_delay         float64
arr_time          float64
sched_arr_time      int64
arr_delay         float64
carrier            object
flight              int64
tailnum            object
origin             object
dest               object
air_time          float64
distance            int64
hour                int64
minute              int64
time_hour          object
dtype: object

In [6]:
flights.isnull().sum()

month                0
day                  0
dep_time          8255
sched_dep_time       0
dep_delay         8255
arr_time          8713
sched_arr_time       0
arr_delay         9430
carrier              0
flight               0
tailnum           2512
origin               0
dest                 0
air_time          9430
distance             0
hour                 0
minute               0
time_hour            0
dtype: int64

In [7]:
flights["arr_delay"].describe()

count    327346.000000
mean          6.895377
std          44.633292
min         -86.000000
25%         -17.000000
50%          -5.000000
75%          14.000000
max        1272.000000
Name: arr_delay, dtype: float64

In [8]:
arr_hist, bin_edges=np.histogram(flights["arr_delay"], 
                                 bins=int(180/5), 
                                 range=[-60, 120])
delays=pd.DataFrame({"arr_delay": arr_hist, 
                    "left": bin_edges[:-1], 
                    "right": bin_edges[1:]})
delays.head()

Unnamed: 0,arr_delay,left,right
0,276,-60.0,-55.0
1,636,-55.0,-50.0
2,1394,-50.0,-45.0
3,2820,-45.0,-40.0
4,5339,-40.0,-35.0


In [9]:
p=figure(plot_width=600, plot_height=350)

p.quad(bottom=0, 
      top=delays["arr_delay"], 
      left=delays["left"], 
      right=delays["right"], 
      fill_color="red", 
      line_color="black")

p.xaxis.axis_label="Delay (min)"
p.yaxis.axis_label="Number of flights"
p.title='Histogram of Arrival Delays'
show(p)

In [10]:
from bokeh.models import ColumnDataSource
src=ColumnDataSource(delays)
src.data.keys()

dict_keys(['index', 'arr_delay', 'left', 'right'])

In [11]:
from bokeh.models import HoverTool
hover = HoverTool(tooltips = [('Delay Interval Left ', '@left'),
                          ('(x,y)', '($x, $y)')])


p=figure(plot_width=600, plot_height=350, tools=[hover, "reset"])

p.quad(source=src, 
      bottom=0, 
      top="arr_delay", 
      left="left", 
      right="right", 
      fill_color="DarkBlue", 
      fill_alpha=0.6, 
      line_color="black")

show(p)

In [12]:
delays['f_interval'] = ['%d to %d minutes' % (left, right) for left, right in zip(delays['left'], delays['right'])]
delays['f_count'] = ['%d flights' % count for count in delays['arr_delay']]

In [13]:
delays.head()

Unnamed: 0,arr_delay,left,right,f_interval,f_count
0,276,-60.0,-55.0,-60 to -55 minutes,276 flights
1,636,-55.0,-50.0,-55 to -50 minutes,636 flights
2,1394,-50.0,-45.0,-50 to -45 minutes,1394 flights
3,2820,-45.0,-40.0,-45 to -40 minutes,2820 flights
4,5339,-40.0,-35.0,-40 to -35 minutes,5339 flights


In [14]:
source=ColumnDataSource(delays)
hover = HoverTool(tooltips = [('Delay', '@f_interval'),
                             ('Num of Flights', '@f_count')])
p=figure(plot_width=600, plot_height=350, tools=[hover, "reset"])

p.quad(source=source, 
      bottom=0, 
      top="arr_delay", 
      left="left", 
      right="right", 
      fill_color="red", 
      fill_alpha=0.5, 
      line_color="black", 
      hover_fill_color="navy", 
      hover_fill_alpha=0.5)
show(p)

### [Data Visualization with Bokeh in Python, Part II: Interactions](https://towardsdatascience.com/data-visualization-with-bokeh-in-python-part-ii-interactions-a4cf994e2512)

In [15]:
flights_df=pd.read_csv("data/complete_flights.csv", 
                      usecols=["arr_delay", "name", "carrier"])
flights_df.head()

Unnamed: 0,arr_delay,carrier,name
0,11.0,UA,United Air Lines Inc.
1,20.0,UA,United Air Lines Inc.
2,33.0,AA,American Airlines Inc.
3,-18.0,B6,JetBlue Airways
4,-25.0,DL,Delta Air Lines Inc.


In [16]:
flights_df.shape

(336776, 3)

In [17]:
flights_df.describe()

Unnamed: 0,arr_delay
count,327346.0
mean,6.895377
std,44.633292
min,-86.0
25%,-17.0
50%,-5.0
75%,14.0
max,1272.0


In [18]:
available_carriers=np.sort((flights_df["name"].unique()))
list(available_carriers)

['AirTran Airways Corporation',
 'Alaska Airlines Inc.',
 'American Airlines Inc.',
 'Delta Air Lines Inc.',
 'Endeavor Air Inc.',
 'Envoy Air',
 'ExpressJet Airlines Inc.',
 'Frontier Airlines Inc.',
 'Hawaiian Airlines Inc.',
 'JetBlue Airways',
 'Mesa Airlines Inc.',
 'SkyWest Airlines Inc.',
 'Southwest Airlines Co.',
 'US Airways Inc.',
 'United Air Lines Inc.',
 'Virgin America']

### Dataset for plot

In [19]:
from bokeh.palettes import Category20

def make_dataset(carrier_list, range_start=-60, range_end=120, bins_width=5):
    assert range_start<range_end, "Start must be less than end"
    range_extent=range_end-range_start
    by_carrier = pd.DataFrame(columns=['proportion', 'left', 'right', 
                                       'f_proportion', 'f_interval',
                                       'name', 'color'])
    for i, carrier_name in enumerate(carrier_list):
        subset=flights_df[flights_df["name"]==carrier_name]
        arr_hist, edges=np.histogram(subset["arr_delay"], 
                                    bins=int(range_extent/bins_width), 
                                    range=[range_start, range_end])
        arr_df=pd.DataFrame({"proportion": arr_hist/np.sum(arr_hist), 
                            "left": edges[:-1], 
                            "right": edges[1:]})
        arr_df["f_proportion"]=["%0.5f" %proportion for proportion in arr_df["proportion"]]
        
        arr_df['f_interval'] = ['%d to %d minutes' % (left, right) for left, 
                                right in zip(arr_df['left'], arr_df['right'])]

        # Assign the carrier for labels
        arr_df['name'] = carrier_name

        # Color each carrier differently
        arr_df['color'] = Category20[16][i]
        
        by_carrier=by_carrier.append(arr_df, ignore_index=True)
    
    return by_carrier
    

In [20]:
final_df=make_dataset(available_carriers)
final_df.head()

Unnamed: 0,proportion,left,right,f_proportion,f_interval,name,color
0,0.0,-60.0,-55.0,0.0,-60 to -55 minutes,AirTran Airways Corporation,#1f77b4
1,0.0,-55.0,-50.0,0.0,-55 to -50 minutes,AirTran Airways Corporation,#1f77b4
2,0.0,-50.0,-45.0,0.0,-50 to -45 minutes,AirTran Airways Corporation,#1f77b4
3,0.001655,-45.0,-40.0,0.00166,-45 to -40 minutes,AirTran Airways Corporation,#1f77b4
4,0.001655,-40.0,-35.0,0.00166,-40 to -35 minutes,AirTran Airways Corporation,#1f77b4


In [21]:
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '20pt'
    p.title.text_font = 'serif'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '14pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '12pt'
    p.yaxis.major_label_text_font_size = '12pt'

    return p

In [22]:
def make_plot(src):
    # Hover tool with vline mode
    hover = HoverTool(tooltips=[('Carrier', '@name'), 
                                ('Delay', '@f_interval'),
                                ('Proportion', '@f_proportion')],
                          mode='vline')

    # Blank plot with correct labels
    p = figure(plot_width = 600, plot_height = 600, 
              tools=[hover, "reset"])
    p.title.text = 'Histogram of Arrival Delays by Carrier'
    p.xaxis.axis_label="Delay (min)"
    p.yaxis.axis_label="Porportion"
    # Quad glyphs to create a histogram
    p.quad(source = src, 
           bottom = 0, 
           top = 'proportion', 
           left = 'left', 
           right = 'right',
           color = 'color', 
           fill_alpha = 0.7, 
           hover_fill_color = 'color', 
           legend_group = 'name',
           hover_fill_alpha = 1.0, 
           line_color = 'black')


    # Styling
    p = style(p)

    return p

In [23]:
p = make_plot(ColumnDataSource(final_df))
show(p)


In [24]:

from bokeh.models import CategoricalColorMapper, Panel
from bokeh.models.widgets import CheckboxGroup, Slider, RangeSlider, Tabs

from bokeh.layouts import column, row

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

In [25]:
import numpy as np

from bokeh.io import curdoc
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, Slider, TextInput
from bokeh.plotting import figure

In [28]:
def modify_doc(doc):    
    # Set up data
    N = 200
    x = np.linspace(0, 4*np.pi, N)
    y = np.sin(x)
    source = ColumnDataSource(data=dict(x=x, y=y))


    # Set up plot
    plot = figure(height=400, width=400, title="my sine wave",
                  tools="crosshair,pan,reset,save,wheel_zoom",
                  x_range=[0, 4*np.pi], y_range=[-2.5, 2.5])

    plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)


    # Set up widgets
    text = TextInput(title="title", value='my sine wave')
    offset = Slider(title="offset", value=0.0, start=-5.0, end=5.0, step=0.1)
    amplitude = Slider(title="amplitude", value=1.0, start=-5.0, end=5.0, step=0.1)
    phase = Slider(title="phase", value=0.0, start=0.0, end=2*np.pi)
    freq = Slider(title="frequency", value=1.0, start=0.1, end=5.1, step=0.1)


    # Set up callbacks
    def update_title(attrname, old, new):
        plot.title.text = text.value

    text.on_change('value', update_title)

    def update_data(attrname, old, new):

        # Get the current slider values
        a = amplitude.value
        b = offset.value
        w = phase.value
        k = freq.value

        # Generate the new curve
        x = np.linspace(0, 4*np.pi, N)
        y = a*np.sin(k*x + w) + b

        source.data = dict(x=x, y=y)

    for w in [offset, amplitude, phase, freq]:
        w.on_change('value', update_data)


    # Set up layouts and add to document
    inputs = column(text, offset, amplitude, phase, freq)
    layout=row(inputs, plot, width=800)
    # Make a tab with the layout 

    doc.add_root(layout)

In [29]:
handler = FunctionHandler(modify_doc)
app = Application(handler)
show(app, notebook_url="http://localhost:8888")