In [357]:
import bokeh as bk
import numpy as np
from bokeh.plotting import figure, output_file, show, output_notebook
import pandas as pd
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral6
from bokeh.plotting import figure
from bokeh.models.tools import HoverTool

color_mapper = LogColorMapper(palette=palette)

In [273]:
dtype = {'DayOfWeek': np.uint8, 'DayofMonth': np.uint8, 'Month': np.uint8 , 
         'Cancelled': np.uint8, 'Year': np.uint16, 'FlightNum': np.uint16 , 
         'Distance': np.uint16, 'UniqueCarrier': str, 'CancellationCode': str, 
         'Origin': str, 'Dest': str, 'ArrDelay': np.float16, 
         'DepDelay': np.float16, 'CarrierDelay': np.float16, 
         'WeatherDelay': np.float16, 'NASDelay': np.float16, 
         'SecurityDelay': np.float16, 'LateAircraftDelay': np.float16, 
         'DepTime': np.float16}

In [298]:
path = 'data/2008.csv.bz2'
flights_df = pd.read_csv(path, usecols=dtype.keys(), dtype=dtype)

In [300]:
flights_df = flights_df[np.isfinite(flights_df['DepTime'])] #, 'DepMin'])

In [301]:
flights_df['DepHour'] = flights_df['DepTime'] // 100
flights_df['DepHour'].replace(to_replace=24, value=0, inplace=True)

In [302]:
#flights_df = flights_df.replace(np.nan, '', regex=True)

In [303]:
flights_df['DepMin'] = flights_df['DepTime'] - flights_df['DepHour']*100

In [304]:
flights_df['DepHour'] = flights_df['DepHour'].apply(lambda f: format(f, '.0f'))

In [305]:
flights_df['DepMin'] = flights_df['DepMin'].apply(lambda f: format(f, '.0f'))

In [306]:
flights_df.shape

(6873482, 21)

In [317]:
flights_df['Date'] = pd.to_datetime(flights_df.rename(columns={'DayofMonth': 'Day'})[['Year', 'Month', 'Day']])

#flights_df['Time'] = pd.to_datetime(flights_df.rename(
#    columns={'DepHour': 'Hour', 'DepMin':'Minute'})[['Hour', 'Minute']])

flights_df['DateTime'] = pd.to_datetime(flights_df.rename(columns={'DayofMonth': 'Day', 'DepHour': 'Hour', 'DepMin':'Minute'})\
                                        [['Year', 'Month', 'Day', 'Hour', 'Minute']])

In [318]:
flights_df.head()

Unnamed: 0,Year,Month,DayofMonth,DayOfWeek,DepTime,UniqueCarrier,FlightNum,ArrDelay,DepDelay,Origin,...,CancellationCode,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,DepHour,DepMin,Date,DateTime
0,2008,1,3,4,2003.0,WN,335,-14.0,8.0,IAD,...,,,,,,,20,3,2008-01-03,2008-01-03 20:03:00
1,2008,1,3,4,754.0,WN,3231,2.0,19.0,IAD,...,,,,,,,7,54,2008-01-03,2008-01-03 07:54:00
2,2008,1,3,4,628.0,WN,448,14.0,8.0,IND,...,,,,,,,6,28,2008-01-03,2008-01-03 06:28:00
3,2008,1,3,4,926.0,WN,1746,-6.0,-4.0,IND,...,,,,,,,9,26,2008-01-03,2008-01-03 09:26:00
4,2008,1,3,4,1829.0,WN,3920,34.0,34.0,IND,...,,2.0,0.0,0.0,0.0,32.0,18,29,2008-01-03,2008-01-03 18:29:00


In [326]:
num_flights_by_date = flights_df.groupby('Date').size().reset_index()
num_flights_by_date.columns = ['Date', 'Count']

In [343]:
#TOOLS = "pan,wheel_zoom,reset,hover,save"

source = ColumnDataSource(num_flights_by_date)

In [368]:
p = figure(title="Number of flights per day in 2008", x_axis_type='datetime',
           toolbar_location=None, tools='')

p.line(x='Date', y='Count', source=source)
p.xaxis.axis_label = 'Date'
p.yaxis.axis_label = 'Number of flights'

p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

hover = HoverTool()
hover.tooltips=[
    ('Date', '@Date{%F}'),
    ('Count', '@Count')
]
hover.formatters = {'Date': 'datetime'}
p.add_tools(hover)
output_notebook()
show(p)