# Generic Timeseries Dashboard Example 

### Data: Github Repo Issues
### Repos
1. cobra
2. ipfs
3. tf
4. pandas
5. d3
6. angular
7. jsipfs

In [1]:
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
from bokeh.io import push_notebook, show, output_notebook
from bokeh.layouts import row, column
from bokeh.layouts import row, column, widgetbox
from bokeh.models import ColumnDataSource, CustomJS, Panel, HoverTool, CategoricalColorMapper, WheelZoomTool, Range1d
from bokeh.models.widgets import PreText, Select, RangeSlider, Slider, DateRangeSlider, DateSlider, Tabs
from bokeh.plotting import figure

import bokeh
import bokeh.palettes as colors
import cPickle
import datetime
import gzip
import json
import locale
import numpy as np
import operator
import os
import pandas as pd
locale.setlocale( locale.LC_ALL, 'en_US.UTF-8' ) 
output_notebook()

## prepare data

In [2]:
# Data path
data_path = "7reposdf.pkl.gz"

In [3]:
# load DataFrame
with gzip.open(data_path, "rb") as fp:
  df = cPickle.load(fp)

In [4]:
# cols to keep
cols = ['repo','id','created_at','closed_at',
        'comments','labels','author_association','state',
  # 'milestone',
  # 'updated_at',
  # 'title',
  # 'body',
]

In [5]:
# select cols
df_t = df[cols]
# limit to closed issues only
df_c = df_t[df_t["state"]=="closed"].copy()
# calculate time open
df_c["time_open"] = df_c.apply(lambda row: row["closed_at"] - row["created_at"], axis=1)
# convert time open to number/fraction of days
df_c["time_open_d"] = df_c.apply(
  lambda row: row["time_open"].total_seconds()/60./60./24.0, axis=1)

## classify data based on type and value distribution

In [6]:
columns = sorted(df_c.columns)

# datetime types
dt_types = ['datetime64[Y]', 'datetime64[M]', 'datetime64[W]', 'datetime64[D]', 'datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]', 'datetime64[ps]', 'datetime64[fs]', 'datetime64[as]',]
# time span types
ts_types = ['timedelta64[Y]', 'timedelta64[M]', 'timedelta64[W]', 'timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', 'timedelta64[ns]', 'timedelta64[ps]', 'timedelta64[fs]', 'timedelta64[as]',]

In [7]:
datetimes = [x for x in columns if df_c[x].dtype in dt_types]
timespans = [x for x in columns if df_c[x].dtype in ts_types]

discrete = [x for x in columns if df_c[x].dtype in [object, str, unicode]]
continuous_all = [x for x in columns if x not in discrete]
continuous = [x for x in columns if x not in discrete+datetimes+timespans]
quantileable = [x for x in continuous if len(df_c[x].unique()) > 20]

In [8]:
# set some additional variables for plotting
min_date = min(df_c[datetimes[-1]])
max_date = max(df_c[datetimes[-1]])
SIZES = list(range(6, 22, 3))
COLORS = bokeh.palettes.Spectral11

## define bokeh plog configuration

In [9]:
def modify_doc(doc):
  """ contains functions for building and updating bokeh document"""
  # TODO: a lot of low-hanging re-org would make this much more legible, less fragile
  def create_figure():
    d_min, d_max = range_select.value
    # TODO: update to downsample *after* filters have been applied
    #df_f = df_c[(df_c[x.value] > d_min) & (df_c[x.value] < d_max)]
    df_f = df_c
    # ---------------------------------------------------------
    # sample
    # ---------------------------------------------------------
    if n == "All":
      df_sample = df_f
    else:
      samplesize = n.value if type(n.value) == int else locale.atoi(n.value)
      df_sample = df_f.sample(samplesize)
    # ---------------------------------------------------------
    # X and Y axes
    # ---------------------------------------------------------
    xs = df_sample[x.value].values
    ys = df_sample[y.value].values
    x_title = x.value.title()
    y_title = y.value.title()

    max_y = max(ys)
    # kwargs to pass to the plot
    kw = dict()
    kw["y_range"] = Range1d(-5, max_y, bounds=(-5,max_y))
    if y.value in discrete:
      kw['y_range'] = sorted(set(ys))
    kw['title'] = "%s vs %s" % (x_title, y_title)
    
    # set default tools
    tools_main = ",".join(["pan", "crosshair", "box_zoom", "wheel_zoom", "reset", "save", ])
    # ---------------------------------------------------------
    # Draw figure
    # ---------------------------------------------------------
    p = figure(plot_height=600, 
               plot_width=600, 
               tools=tools_main, 
               active_scroll="wheel_zoom",
               x_axis_type="datetime", 
               output_backend="webgl", 
               **kw)
    # make a few tweaks to plot config
    p.x_range = Range1d(d_min, d_max)
    p.xaxis.axis_label = x_title
    p.yaxis.axis_label = y_title
    if x.value in discrete:
      p.xaxis.major_label_orientation = pd.np.pi / 4

    # ---------------------------------------------------------
    # size axis
    # ---------------------------------------------------------
    df_sample["size"] = 9
    # TODO: normalize size around median
    if size.value != 'None':
      try:
        groups = pd.qcut(df_sample[size.value].values, len(SIZES))
      except:
        # TODO: make more robust
        groups = pd.qcut(df_sample[size.value].values, len(SIZES)/2)
      df_sample["size"] = [SIZES[xx] for xx in groups.codes]
    # ---------------------------------------------------------
    # color axis
    # ---------------------------------------------------------
    df_sample["color"] = "#31AADE"
    if color.value != 'None':
      if color.value in discrete:
        color_groups = {x: COLORS[i] for i, x in enumerate(sorted(df_sample[color.value].unique()))}
        df_sample["color"] = [color_groups[xx] for xx in df_sample[color.value].values]
      else:
        groups = pd.qcut(df_sample[color.value].values, len(COLORS))
        df_sample["color"] = [COLORS[xx] for xx in groups.codes]
    
    # ---------------------------------------------------------
    # Plot Data/Draw Glyphs
    # ---------------------------------------------------------
    source = ColumnDataSource(df_sample)
    p.circle(x=x.value, 
              y=y.value, 
              source=source, 
              color='color', 
              size='size', 
              line_color="white", 
              alpha=0.6, 
              hover_color='orange', 
              selection_color="orange",
              hover_alpha=0.5)
    
    # Modify plot tools
    # TODO: make hover tool parameterized
    hover = HoverTool(tooltips=[('repo', '@repo'),
                                ('author','@author_association')])
    vzoom = WheelZoomTool(dimensions='width', maintain_focus=False)
    p.add_tools(hover)
    p.add_tools(vzoom)
    
    #TODO: add in row filter checkbox functionality
    # checkboxes = None
    # return p, checkboxes
    return p

  def update(attr, old, new):
    """ function called by any widget's `on_change` event; rebuilds figure"""
    layout.children[0].children[1] = create_figure()
    
  # ---------------------------------------------------------
  # controls
  # ---------------------------------------------------------
  
  # sample size widget
  n = Select(title="Sample Size (n)", value="100", options=["All"] + ["100", "1,000", "10,000"])
  n.on_change('value', update)
  
  # row filter widget
  # TODO: recativate
  row_filter = Select(title="row filter", value="None", options=["None"]+discrete, disabled=True)
  row_filter.on_change('value', update)
  
  # x-axis select widget
  x = Select(title='x axis', value='created_at', options=datetimes)
  x.on_change('value', update)

  # y-axis select widget
  y = Select(title='y axis', value='time_open_d', options=columns)
  y.on_change('value', update)

  # size-axis select widget
  size = Select(title='Size', value='time_open_d', options=['None'] + quantileable)
  size.on_change('value', update)

  # color-axis select widget
  color = Select(title='Color', value='None', options=['None'] + discrete + quantileable)
  color.on_change('value', update)
  
  # timeseries x-axis range filter widget
  range_select = DateRangeSlider(start = min_date, end = max_date, value = (min_date, max_date), step = 1, title = 'Date Range')
  range_select.on_change('value', update)
  
  # TODO: reactivate rowfilter, checkboxes
  # p, checkboxes = create_figure()
  # if checkboxes != None:
  #   controls = widgetbox([n, x, y, color, size, row_filter, checkboxes], width=200)
  # else:
  #   controls = widgetbox([n, x, y, color, size, row_filter], width=200)
  
  #plot
  p = create_figure()
  
  # left control panel
  controls = widgetbox([n, x, y, color, size, row_filter], width=200)
    
  # bottom control panel
  controls2 = widgetbox([range_select], width=800)
  
  layout = column(row(controls, p), controls2)
  doc.add_root(layout)

# Render Dashboard

In [10]:
handler = FunctionHandler(modify_doc)
app = Application(handler)
doc = app.create_document()
show(app, notebook_url="localhost:8888")