In [1]:
## Histogram
# pandas and numpy for data manipulation
import pandas as pd
import numpy as np

from bokeh.plotting import figure
from bokeh.models import HoverTool
from bokeh.models import (CategoricalColorMapper, HoverTool, 
						  ColumnDataSource,TabPanel, # retiré Panel 
						  CustomJSTickFormatter, SingleIntervalTicker, LinearAxis)
from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, 
								   CheckboxButtonGroup,  # retiré Tabs
								  TableColumn, DataTable, Select)
from bokeh.layouts import column, row
from bokeh.models import Column
from bokeh.palettes import Category20_16

# Make plot with histogram and return tab
def histogram_tab(flights):

	# Function to make a dataset for histogram based on a list of carriers
	# a minimum delay, maximum delay, and histogram bin width
	def make_dataset(carrier_list, range_start = -60, range_end = 120, bin_width = 5):

		# Dataframe to hold information
		by_carrier = pd.DataFrame(columns=['proportion', 'left', 'right', 
										   'f_proportion', 'f_interval',
										   'name', 'color'])
		
		range_extent = range_end - range_start

		# Iterate through all the carriers
		for i, carrier_name in enumerate(carrier_list):

			# Subset to the carrier
			subset = flights[flights['name'] == carrier_name]

			# Create a histogram with 5 minute bins
			arr_hist, edges = np.histogram(subset['arr_delay'], 
										   bins = int(range_extent / bin_width), 
										   range = [range_start, range_end])

			# Divide the counts by the total to get a proportion
			arr_df = pd.DataFrame({'proportion': arr_hist / np.sum(arr_hist), 'left': edges[:-1], 'right': edges[1:] })

			# Format the proportion 
			arr_df['f_proportion'] = ['%0.5f' % proportion for proportion in arr_df['proportion']]

			# Format the interval
			arr_df['f_interval'] = ['%d to %d minutes' % (left, right) for left, right in zip(arr_df['left'], arr_df['right'])]

			# Assign the carrier for labels
			arr_df['name'] = carrier_name

			# Color each carrier differently
			arr_df['color'] = Category20_16[i]

			# Add to the overall dataframe
			by_carrier = pd.concat((by_carrier,arr_df))       # by_carrier.append(arr_df)

		# Overall dataframe
		by_carrier = by_carrier.sort_values(['name', 'left'])

		return ColumnDataSource(by_carrier)

	def style(p):
		# Title 
		p.title.align = 'center'
		p.title.text_font_size = '20pt'
		p.title.text_font = 'serif'

		# Axis titles
		p.xaxis.axis_label_text_font_size = '14pt'
		p.xaxis.axis_label_text_font_style = 'bold'
		p.yaxis.axis_label_text_font_size = '14pt'
		p.yaxis.axis_label_text_font_style = 'bold'

		# Tick labels
		p.xaxis.major_label_text_font_size = '12pt'
		p.yaxis.major_label_text_font_size = '12pt'

		return p
	
	def make_plot(src):
		# Blank plot with correct labels
		p = figure(width = 700, height = 700, 
				  title = 'Histogram of Arrival Delays by Airline',
				  x_axis_label = 'Delay (min)', y_axis_label = 'Proportion')

		# Quad glyphs to create a histogram
		p.quad(source = src, bottom = 0, top = 'proportion', left = 'left', right = 'right',
			   color = 'color', fill_alpha = 0.7, hover_fill_color = 'color', legend_label = 'name',
			   hover_fill_alpha = 1.0, line_color = 'black')

		# Hover tool with vline mode
		hover = HoverTool(tooltips=[('Carrier', '@name'), 
									('Delay', '@f_interval'),
									('Proportion', '@f_proportion')],
						  mode='vline')

		p.add_tools(hover)

		# Styling
		p = style(p)

		return p
	
	
	
	def update(attr, old, new):
		carriers_to_plot = [carrier_selection.labels[i] for i in carrier_selection.active]
		
		new_src = make_dataset(carriers_to_plot,
							   range_start = range_select.value[0],
							   range_end = range_select.value[1],
							   bin_width = binwidth_select.value)
		
		

		src.data.update(new_src.data)
		
	# Carriers and colors
	available_carriers = list(set(flights['name']))
	available_carriers.sort()


	airline_colors = list(Category20_16)
	airline_colors.sort() 
		
	carrier_selection = CheckboxGroup(labels=available_carriers, 
									  active = [0, 1])
	carrier_selection.on_change('active', update)
	
	binwidth_select = Slider(start = 1, end = 30, 
							 step = 1, value = 5,
							 title = 'Bin Width (min)')
	binwidth_select.on_change('value', update)
	
	range_select = RangeSlider(start = -60, end = 180, value = (-60, 120),
							   step = 5, title = 'Range of Delays (min)')
	range_select.on_change('value', update)
	
	# Initial carriers and data source
	initial_carriers = [carrier_selection.labels[i] for i in carrier_selection.active]
	
	src = make_dataset(initial_carriers,
					   range_start = range_select.value[0],
					   range_end = range_select.value[1],
					   bin_width = binwidth_select.value)
	p = make_plot(src)
	
	# Put controls in a single element
	controls = Column(carrier_selection, binwidth_select, range_select)
	
	# Create a row layout
	layout = row(controls, p)
	
	# Make a tab with the layout 
	tab = TabPanel(child=layout, title = 'Histogram')

	return tab

In [2]:
# densidad

# pandas and numpy for data manipulation
import pandas as pd
import numpy as np

from scipy.stats import gaussian_kde

from bokeh.plotting import figure
from bokeh.models import (CategoricalColorMapper, HoverTool, 
						  ColumnDataSource, # retiré Panel, 
						  CustomJSTickFormatter, SingleIntervalTicker, LinearAxis)
from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, 
								  CheckboxButtonGroup, # retiré Tabs, creo que era una versión vieja
								  TableColumn, DataTable, Select)
from bokeh.layouts import column, row
from bokeh.palettes import Category20_16

def density_tab(flights):
	
	# Dataset for density plot based on carriers, range of delays,
	# and bandwidth for density estimation
	def make_dataset(carrier_list, range_start, range_end, bandwidth):

		xs = []
		ys = []
		colors = []
		labels = []

		for i, carrier in enumerate(carrier_list):
			subset = flights[flights['name'] == carrier]
			subset = subset[subset['arr_delay'].between(range_start, 
														range_end)]

			kde = gaussian_kde(subset['arr_delay'], bw_method=bandwidth)
			
			# Evenly space x values
			x = np.linspace(range_start, range_end, 100)
			# Evaluate pdf at every value of x
			y = kde.pdf(x)

			# Append the values to plot
			xs.append(list(x))
			ys.append(list(y))

			# Append the colors and label
			colors.append(airline_colors[i])
			labels.append(carrier)

		new_src = ColumnDataSource(data={'x': xs, 'y': ys, 
								   'color': colors, 'label': labels})

		return new_src

	def make_plot(src):
		p = figure(width = 700, height = 700,
				   title = 'Density Plot of Arrival Delays by Airline',
				   x_axis_label = 'Delay (min)', y_axis_label = 'Density')


		p.multi_line('x', 'y', color = 'color', legend_label = 'label', 
					 line_width = 3,
					 source = src)

		# Hover tool with next line policy
		hover = HoverTool(tooltips=[('Carrier', '@label'), 
									('Delay', '$x'),
									('Density', '$y')],
						  line_policy = 'next')

		# Add the hover tool and styling
		p.add_tools(hover)

		p = style(p)

		return p
	
	def update(attr, old, new):
		# List of carriers to plot
		carriers_to_plot = [carrier_selection.labels[i] for i in 
							carrier_selection.active]
		
		# If no bandwidth is selected, use the default value
		if bandwidth_choose.active == []:
			bandwidth = None
		# If the bandwidth select is activated, use the specified bandwith
		else:
			bandwidth = bandwidth_select.value
			
		
		new_src = make_dataset(carriers_to_plot,
									range_start = range_select.value[0],
									range_end = range_select.value[1],
									bandwidth = bandwidth)
		
		src.data.update(new_src.data)
		
	def style(p):
		# Title 
		p.title.align = 'center'
		p.title.text_font_size = '20pt'
		p.title.text_font = 'serif'

		# Axis titles
		p.xaxis.axis_label_text_font_size = '14pt'
		p.xaxis.axis_label_text_font_style = 'bold'
		p.yaxis.axis_label_text_font_size = '14pt'
		p.yaxis.axis_label_text_font_style = 'bold'

		# Tick labels
		p.xaxis.major_label_text_font_size = '12pt'
		p.yaxis.major_label_text_font_size = '12pt'

		return p
	
	# Carriers and colors
	available_carriers = list(set(flights['name']))
	available_carriers.sort()

	airline_colors = list(Category20_16)
	airline_colors.sort()

	# Carriers to plot
	carrier_selection = CheckboxGroup(labels=available_carriers, 
									   active = [0, 1])
	carrier_selection.on_change('active', update)
	
	range_select = RangeSlider(start = -60, end = 180, value = (-60, 120),
							   step = 5, title = 'Range of Delays (min)')
	range_select.on_change('value', update)
	
	# Initial carriers and data source
	initial_carriers = [carrier_selection.labels[i] for 
						i in carrier_selection.active]
	
	# Bandwidth of kernel
	bandwidth_select = Slider(start = 0.1, end = 5, 
							  step = 0.1, value = 0.5,
							  title = 'Bandwidth for Density Plot')
	bandwidth_select.on_change('value', update)
	
	# Whether to set the bandwidth or have it done automatically
	bandwidth_choose = CheckboxButtonGroup(
		labels=['Choose Bandwidth (Else Auto)'], active = [])
	bandwidth_choose.on_change('active', update)

	# Make the density data source
	src = make_dataset(initial_carriers, 
						range_start = range_select.value[0],
						range_end = range_select.value[1],
						bandwidth = bandwidth_select.value) 
	
	# Make the density plot
	p = make_plot(src)
	
	# Add style to the plot
	p = style(p)
	
	# Put controls in a single element
	controls = Column(carrier_selection, range_select, 
						 bandwidth_select, bandwidth_choose)
	
	# Create a row layout
	layout = row(controls, p)
	
	# Make a tab with the layout 
	tab = TabPanel(child=layout, title = 'Density Plot')

	return tab

In [3]:
# mapa

# pandas and numpy for data manipulation
import pandas as pd
import numpy as np

from bokeh.plotting import figure
from bokeh.models import (CategoricalColorMapper, HoverTool, 
						  ColumnDataSource, # Panel, 
						  CustomJSTickFormatter, SingleIntervalTicker, LinearAxis)
from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, 
								  CheckboxButtonGroup, 
								  TableColumn, DataTable, Select)
from bokeh.layouts import column, row #,WidgetBox
from bokeh.palettes import Category20_16

def map_tab(map_data, states):

	# Function to make a dataset for the map based on a list of carriers
	def make_dataset(carrier_list):
		
		# Subset to the carriers in the specified list
		subset = map_data[map_data['carrier']['Unnamed: 3_level_1'].isin(
														   carrier_list)]

		
		# Dictionary mapping carriers to colors
		color_dict = {carrier: color for carrier, color in zip(
			available_carriers, airline_colors)}
		
		# Lists of data for plotting
		flight_x = []
		flight_y = []
		colors = []
		carriers = []
		counts = []
		mean_delays = []
		min_delays = []
		max_delays = []
		dest_loc = []
		origin_x_loc = []
		origin_y_loc = []
		dest_x_loc = []
		dest_y_loc = []
		origins = []
		dests = []
		distances = []

		# Iterate through each carrier
		for carrier in carrier_list:

			# Subset to the carrier
			sub_carrier = subset[subset['carrier']['Unnamed: 3_level_1'] == carrier]

			# Iterate through each route (origin to destination) for the carrier
			for _, row in sub_carrier.iterrows():

				colors.append(color_dict[carrier])
				carriers.append(carrier)
				origins.append(row['origin']['Unnamed: 1_level_1'])
				dests.append(row['dest']['Unnamed: 2_level_1'])

				# Origin x (longitude) and y (latitude) location
				origin_x_loc.append(row['start_long']['Unnamed: 20_level_1'])
				origin_y_loc.append(row['start_lati']['Unnamed: 21_level_1'])

				# Destination x (longitude) and y latitude (location)
				dest_x_loc.append(row['end_long']['Unnamed: 22_level_1'])
				dest_y_loc.append(row['end_lati']['Unnamed: 23_level_1'])

				# Flight x (longitude) locations
				flight_x.append([row['start_long']['Unnamed: 20_level_1'], 
								 row['end_long']['Unnamed: 22_level_1']])

				# Flight y (latitude) locations
				flight_y.append([row['start_lati']['Unnamed: 21_level_1'], 
								 row['end_lati']['Unnamed: 23_level_1']])


				# Stats about the particular route
				counts.append(row['arr_delay']['count'])
				mean_delays.append(row['arr_delay']['mean'])
				min_delays.append(row['arr_delay']['min'])
				max_delays.append(row['arr_delay']['max'])
				distances.append(row['distance']['mean'])


		# Create a column data source from the lists of lists
		new_src = ColumnDataSource(data = {'carrier': carriers, 'flight_x': flight_x, 'flight_y': flight_y, 
											   'origin_x_loc': origin_x_loc, 'origin_y_loc': origin_y_loc,
											   'dest_x_loc': dest_x_loc, 'dest_y_loc': dest_y_loc,
											   'color': colors, 'count': counts, 'mean_delay': mean_delays,
											   'origin': origins, 'dest': dests, 'distance': distances,
											   'min_delay': min_delays, 'max_delay': max_delays})

		return new_src

	def make_plot(src, xs, ys):
		
		# Create the plot with no axes or grid
		p = figure(width = 1100, height = 700, title = 'Map of 2013 Flight Delays Departing NYC')
		p.xaxis.visible = False
		p.yaxis.visible = False
		p.grid.visible = False

		# States are drawn as patches
		patches_glyph = p.patches(xs, ys, fill_alpha=0.2, fill_color = 'lightgray', 
								  line_color="#884444", line_width=2, line_alpha=0.8)

		# Airline flights are drawn as lines
		lines_glyph = p.multi_line('flight_x', 'flight_y', color = 'color', line_width = 2, 
								   line_alpha = 0.8, hover_line_alpha = 1.0, hover_line_color = 'color',
								   legend_label = 'carrier', source = src)

		# Origins are drawn as squares (all in NYC)
		squares_glyph = p.square('origin_x_loc', 'origin_y_loc', color = 'color', size = 10, source = src, 
								 legend_label = 'carrier')

		# Destinations are drawn as circles
		circles_glyph = p.circle('dest_x_loc', 'dest_y_loc', color = 'color', size = 10, source = src, 
								 legend_label = 'carrier')

		# Add the glyphs to the plot using the renderers attribute
		p.renderers.append(patches_glyph)
		p.renderers.append(lines_glyph)
		p.renderers.append(squares_glyph)
		p.renderers.append(circles_glyph)

		# Hover tooltip for flight lines, assign only the line renderer
		hover_line = HoverTool(tooltips=[('Airline', '@carrier'),
									('Number of Flights', '@count'),
									('Average Delay', '@mean_delay{0.0}'),
									('Max Delay', '@max_delay{0.0}'),
									('Min Delay', '@min_delay{0.0}')],
							  line_policy = 'next',
							  renderers = [lines_glyph])
		
		# Hover tooltip for origin and destination, assign only the line renderer
		hover_circle = HoverTool(tooltips=[('Origin', '@origin'),
										   ('Dest', '@dest'),
										   ('Distance (miles)', '@distance')],
								renderers = [circles_glyph])

		# Position the location so it does not overlap plot
		p.legend.location = (10, 50)

		# Add the hovertools to the figure
		p.add_tools(hover_line)
		p.add_tools(hover_circle)

		p = style(p) 
		
		return p
	
	# Styling 
	def style(p):
			
		# Title 
		p.title.align = 'center'
		p.title.text_font_size = '20pt'
		p.title.text_font = 'serif'

		# Axis titles
		p.xaxis.axis_label_text_font_size = '14pt'
		p.xaxis.axis_label_text_font_style = 'bold'
		p.yaxis.axis_label_text_font_size = '14pt'
		p.yaxis.axis_label_text_font_style = 'bold'

		# Tick labels
		p.xaxis.major_label_text_font_size = '12pt'
		p.yaxis.major_label_text_font_size = '12pt'

		return p
		
	# Show selected carriers on map
	def update(attr, old, new):
		# Find list of carriers and make a new data set
		carrier_list = [carrier_selection.labels[i] for i in carrier_selection.active]
		new_src = make_dataset(carrier_list)

		src.data.update(new_src.data)
			
			
	available_carriers = list(set(map_data['carrier']['Unnamed: 3_level_1']))
	available_carriers.sort()

	airline_colors = list(Category20_16)
	airline_colors.sort()

	# Remove Alaska and Hawaii from states
	if 'HI' in states: del states['HI']
	if 'AK' in states: del states['AK']

	# Put longitudes and latitudes in lists
	xs = [states[state]['lons'] for state in states]
	ys = [states[state]['lats'] for state in states]

	# CheckboxGroup to select carriers for plotting    
	carrier_selection = CheckboxGroup(labels=available_carriers, active = [0, 1])
	carrier_selection.on_change('active', update)

	# Initial carriers to plot
	initial_carriers = [carrier_selection.labels[i] for i in carrier_selection.active]

	# Initial source and plot
	src = make_dataset(initial_carriers)

	p = make_plot(src, xs, ys)

	# Layout setup
	layout = row(carrier_selection, p)
	tab = TabPanel(child = layout, title = 'Flight Map')

	return tab

In [4]:
# Rutas

# pandas and numpy for data manipulation
import pandas as pd
import numpy as np

from bokeh.plotting import figure

from bokeh.models import (CategoricalColorMapper, HoverTool, 
						  ColumnDataSource,#  Panel, 
						  CustomJSTickFormatter, SingleIntervalTicker, LinearAxis)

from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, 
								  CheckboxButtonGroup, 
								  TableColumn, DataTable, Select)

from bokeh.layouts import column, row#, WidgetBox
from bokeh.palettes import Category20_16

# List of lists to single list
from itertools import chain

def route_tab(flights):

	# Make dataset for plot based on route start (origin) and 
	# end (destination)
	def make_dataset(origin, destination):
		# Subset to the selected route
		subset = flights[(flights['dest'] == destination) & (flights['origin'] == origin)]
		
		# Find the carriers who cover particular route
		carriers = list(set(subset['name']))

		# x is the delay, y is the airline
		xs = []
		ys = []
		label_dict = {}
		
		# Iterate through the unique carriers
		for i, carrier in enumerate(carriers):
			
			# Subset to the carrier
			carrier_data = subset[subset['name'] == carrier]
			
			# Append the index of the carrier as many times as there are flights
			# Append the delays for the carrier
			ys.append([i for _ in range(len(carrier_data))])
			xs.append(list(carrier_data['arr_delay']))
  
			# Map the index to the carrier
			label_dict[i]= carrier
			
		xs = list(chain(*xs))
		ys = list(chain(*ys))
			
		new_src = ColumnDataSource(data = {'x': xs, 'y': ys})
		
		return new_src, label_dict
	
	
	def make_plot(src, origin, destination, label_dict):
		
		p = figure(width = 800, height = 400, x_axis_label = 'Delay (min)', y_axis_label = '',
                title = 'Arrival Delays for Flights from %s to %s' % (origin, destination))


		p.circle('x', 'y', source = src, alpha = 0.4,
				 color = 'navy', size = 15)

		
		p.yaxis[0].ticker.desired_num_ticks = len(label_dict)

		p.yaxis.formatter = CustomJSTickFormatter(code = """
							var labels = %s;
							return labels[tick];
							""" % label_dict)
		
		return p
	
	def style(p):
		# Title 
		p.title.align = 'center'
		p.title.text_font_size = '20pt'
		p.title.text_font = 'serif'

		# Axis titles
		p.xaxis.axis_label_text_font_size = '14pt'
		p.xaxis.axis_label_text_font_style = 'bold'
		p.yaxis.axis_label_text_font_size = '14pt'
		p.yaxis.axis_label_text_font_style = 'bold'

		# Tick labels
		p.xaxis.major_label_text_font_size = '12pt'
		p.yaxis.major_label_text_font_size = '12pt'

		return p
	
	def update(attr, old, new):
		# Origin and destination determine values displayed
		origin = origin_select.value
		destination = dest_select.value
		
		# Get the new dataset
		new_src, label_dict = make_dataset(origin, destination)
		
		if len(label_dict) == 0:
			p.title.text = 'No Flights on Record from %s to %s' % (origin, destination)
		
		else:
			p.yaxis[0].ticker.desired_num_ticks = len(label_dict)
			p.yaxis.formatter = CustomJSTickFormatter(code = """
								var labels = %s;
								return labels[tick];
								""" % label_dict)

			p.title.text = 'Arrival Delays for Flights from %s to %s' % (origin, destination)

		src.data.update(new_src.data)
	
	origins = list(set(flights['origin']))
	dests = list(set(flights['dest']))

	origin_select = Select(title = 'Origin', value = 'JFK', options = origins)
	origin_select.on_change('value', update)

	dest_select = Select(title = 'Destination', value = 'MIA', options = dests)
	dest_select.on_change('value', update)
	
	initial_origin = origin_select.value
	initial_dest = dest_select.value
	
	src, label_dict = make_dataset(initial_origin, initial_dest)
	
	p = make_plot(src, initial_origin, initial_dest, label_dict)
	p = style(p)
	
	controls = Column(origin_select, dest_select)
	layout = row(controls, p)

	tab = TabPanel(child = layout, title = 'Route Details')

	return tab

In [5]:
## tablas

# pandas and numpy for data manipulation
import pandas as pd
import numpy as np

from bokeh.models import ColumnDataSource# , Panel
from bokeh.models.widgets import TableColumn, DataTable

def table_tab(flights):

	# Calculate summary stats for table
	carrier_stats = flights.groupby('name')['arr_delay'].describe()
	carrier_stats = carrier_stats.reset_index().rename(
		columns={'name': 'airline', 'count': 'flights', '50%':'median'})

	# Round statistics for display
	carrier_stats['mean'] = carrier_stats['mean'].round(2)
	carrier_src = ColumnDataSource(carrier_stats)

	# Columns of table
	table_columns = [TableColumn(field='airline', title='Airline'),
					 TableColumn(field='flights', title='Number of Flights'),
					 TableColumn(field='min', title='Min Delay'),
					 TableColumn(field='mean', title='Mean Delay'),
					 TableColumn(field='median', title='Median Delay'),
					 TableColumn(field='max', title='Max Delay')]

	carrier_table = DataTable(source=carrier_src, 
							  columns=table_columns, width=1000)

	tab = TabPanel(child = carrier_table, title = 'Summary Table')

	return tab

In [6]:
## final

# Pandas for data management
import pandas as pd

from bokeh.models import Tabs
# os methods for manipulating paths
from os.path import dirname, join

# Bokeh basics 
from bokeh.io import curdoc
#from bokeh.plotting import figure, curdoc
#from bokeh.models.widgets import Tabs


# Each tab is drawn by one script
# from scripts.histogram import histogram_tab
# from scripts.density import density_tab
# from scripts.table import table_tab
# from scripts.draw_map import map_tab
# from scripts.routes import route_tab

# Using included state data from Bokeh for map
from bokeh.sampledata.us_states import data as states

# Read data into dataframes
flights = pd.read_csv("https://raw.githubusercontent.com/WillKoehrsen/Bokeh-Python-Visualization/master/bokeh_app/data/flights.csv").dropna()

# Formatted Flight Delay Data for map
map_data = pd.read_csv("https://raw.githubusercontent.com/WillKoehrsen/Bokeh-Python-Visualization/master/bokeh_app/data/flights_map.csv",
                            header=[0,1], index_col=0)

# Create each of the tabs
tab1 = histogram_tab(flights)
tab2 = density_tab(flights)
tab3 = table_tab(flights)
tab4 = map_tab(map_data, states)
tab5 = route_tab(flights)

# Put all the tabs into one application
tabs = Tabs(tabs = [tab1, tab2, tab3, tab4, tab5])

# Put the tabs in the current document for display
curdoc().add_root(tabs)


In [14]:
!bokeh serve --show --port 5007 mainfr.ipynb

^C
