In [22]:
import os
import sys
import logging
import pandas
from collections import defaultdict
from bokeh.charts import Bar, Scatter, Line
from bokeh.layouts import widgetbox, row, column
from bokeh.models import Button, Select, RangeSlider, DataTable, ColumnDataSource, CustomJS, TableColumn, Div, CheckboxGroup
from bokeh.io import curdoc, output_notebook, push_notebook, show
output_notebook()

In [2]:
# Load source data
csv_filepath = 'big_data_benchmarking_20170125.csv'
if not os.path.isfile(csv_filepath):
    logging.error("File: " + csv_filepath + " does not exist!")
    sys.exit(1)
dataframe = pandas.read_csv(csv_filepath, low_memory=False)
source = ColumnDataSource(data=dataframe)

In [3]:
# DataTable
columns = [TableColumn(field=c, title=c) for c in dataframe.columns]
data_table = DataTable(source=source, columns=columns, editable=False, width=1400, height=500)

In [15]:
# Plots
def generate_bar():
    all_active = []
    #all_active.extend(checkbox_database.active)
    #all_active.extend([checkbox for checkbox in checkbox_database.active])
    source_dataframe = dataframe[dataframe['database'].isin(["Oracle Database"])]
    bar_concurrency_by_database = Bar(source_dataframe, values='time', label='concurrency_factor', group='database',
                                      legend='top_right', xlabel='Concurrency factor', ylabel='Time in seconds',
                                      title="Average query execution time by concurrency factor", agg='mean', width=1800)
    bar_concurrency_by_database.title.text_font_size = '12pt'
    return bar_concurrency_by_database

In [5]:
plots = defaultdict(list)
for i in dataframe['concurrency_factor'].sort_values().unique():
    scatter_plot = Scatter(dataframe[dataframe['concurrency_factor'] == i], x='rows', y='time', color='database',
                           title="Concurrency " + str(i) + "  |  Individual query execution time", legend='top_left',
                           legend_sort_field='color', legend_sort_direction='ascending', xlabel='Number of rows',
                           ylabel='Time in seconds')
    scatter_plot.title.text_font_size = '12pt'
    plots['scatter_plot'].append(scatter_plot)

In [24]:
def update(attrname, old, new):
    current = dataframe[(dataframe['concurrency_factor'] == int(select_concurrency.value))
                        & (dataframe['database'].isin(checkbox_database.active))
                        #& (dataframe['database'] == select_database.value)
                        & (dataframe['table_size_category'] == select_table_size.value)
                        & (dataframe['category'] == select_query_category.value)
                        #& (dataframe['query_id'] == select_query_id.value)
                        #& (dataframe['query_id'].isin([int(i) for i in list(eval(select_query_id.value))]))
                        & (dataframe['rows'] >= slider_rows.range[0])
                        & (dataframe['rows'] <= slider_rows.range[1])
                        & (dataframe['time'] >= slider_time.range[0])
                        & (dataframe['time'] <= slider_time.range[1])]
    source.data = ColumnDataSource(data=current).data
    push_notebook()

In [7]:
#################################################
# Widgets
options = list(dataframe['concurrency_factor'].sort_values().apply(str).unique())
select_concurrency = Select(title="Concurrency Factor:", value=options[0], options=options)
select_concurrency.on_change('value', update)

labels = list(dataframe['database'].sort_values().apply(str).unique())
checkbox_database = CheckboxGroup(labels=labels, active=[])
checkbox_database.on_change('active', update)

options = list(dataframe['database'].sort_values().apply(str).unique())
select_database = Select(title="Database:", value=options[0], options=options)
select_database.on_change('value', update)

options = list(dataframe['table_size_category'].sort_values().apply(str).unique())
select_table_size = Select(title="Table Size:", value=options[0], options=options)
select_table_size.on_change('value', update)

options = list(dataframe['category'].sort_values().apply(str).unique())
select_query_category = Select(title="Query Category:", value=options[0], options=options)
select_query_category.on_change('value', update)

options = list(dataframe['query_id'].sort_values().apply(str).unique())
options = [(str(i), str(i)) for i in options]
options.insert(0, ("99", "All"))
select_query_id = Select(title="Query ID:", value=options[1][0], options=options)
select_query_id.on_change('value', update)

end = dataframe['rows'].max()
slider_rows = RangeSlider(start=0, end=end, range=(0, end), step=end//100, title="Rows")
slider_rows.on_change('range', update)

end = dataframe['time'].max()
slider_time = RangeSlider(start=0, end=end, range=(0, end), step=end//100, title="Time")
slider_time.on_change('range', update)

button_download = Button(label="Download", button_type='success')
button_download.callback = CustomJS(args=dict(source=source), code=open("download.js").read())

widgets = [select_concurrency, checkbox_database, select_database, select_table_size, select_query_category, select_query_id, slider_rows,
           slider_time, button_download]

In [30]:
r1 = row(widgetbox(widgets), widgetbox(data_table))
r3 = row([item for item in plots['scatter_plot']])

In [31]:
#################################################
# Layout
layout = column(r1,r3)
from ipywidgets import interact

In [32]:
interact(update)

ValueError: cannot find widget or abbreviation for argument: 'attrname'

In [29]:
from bokeh.layouts import column
from bokeh.models import CustomJS, ColumnDataSource, Slider
from bokeh.plotting import figure

x = [x*0.005 for x in range(0, 200)]
y = x

source = ColumnDataSource(data=dict(x=x, y=y))

plot = figure(plot_width=400, plot_height=400)
plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6)

callback = CustomJS(args=dict(source=source), code="""
    var data = source.get('data');
    var f = cb_obj.get('value')
    x = data['x']
    y = data['y']
    for (i = 0; i < x.length; i++) {
        y[i] = Math.pow(x[i], f)
    }
    source.trigger('change');
""")

slider = Slider(start=0.1, end=4, value=1, step=.1, title="power", callback=callback)

layout = column(slider, plot)

show(layout)

In [39]:
import bokeh.palettes
import random

KeyError: 16

In [40]:
key, item = random.choice(list(bokeh.palettes.all_palettes.items()))

In [42]:
print(key)
print(item)

Pastel1
{3: ['#fbb4ae', '#b3cde3', '#ccebc5'], 4: ['#fbb4ae', '#b3cde3', '#ccebc5', '#decbe4'], 5: ['#fbb4ae', '#b3cde3', '#ccebc5', '#decbe4', '#fed9a6'], 6: ['#fbb4ae', '#b3cde3', '#ccebc5', '#decbe4', '#fed9a6', '#ffffcc'], 7: ['#fbb4ae', '#b3cde3', '#ccebc5', '#decbe4', '#fed9a6', '#ffffcc', '#e5d8bd'], 8: ['#fbb4ae', '#b3cde3', '#ccebc5', '#decbe4', '#fed9a6', '#ffffcc', '#e5d8bd', '#fddaec'], 9: ['#fbb4ae', '#b3cde3', '#ccebc5', '#decbe4', '#fed9a6', '#ffffcc', '#e5d8bd', '#fddaec', '#f2f2f2']}


In [45]:
max(item)

9

In [47]:
item[max(item)]

['#fbb4ae',
 '#b3cde3',
 '#ccebc5',
 '#decbe4',
 '#fed9a6',
 '#ffffcc',
 '#e5d8bd',
 '#fddaec',
 '#f2f2f2']

In [70]:
palettes = bokeh.palettes.all_palettes
palettes_list = ['Category20', 'Accent', 'Paired', 'Pastel1', 'Spectral', 'Set3']
palette_name = random.choice(list(palettes_list))
palette = palettes[palette_name][max(palettes[palette_name])]

['#fbb4ae',
 '#b3cde3',
 '#ccebc5',
 '#decbe4',
 '#fed9a6',
 '#ffffcc',
 '#e5d8bd',
 '#fddaec',
 '#f2f2f2']

In [None]:
colors = 
colors