In [43]:
import pandas as pd
import numpy as np

In [44]:
from bokeh.io import show, output_notebook, push_notebook
from bokeh.plotting import figure

from bokeh.models import CategoricalColorMapper, HoverTool, ColumnDataSource, Panel
from bokeh.models.widgets import CheckboxGroup, Slider, RangeSlider, Tabs


from bokeh.layouts import column, row, WidgetBox
from bokeh.palettes import Category20_16

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

output_notebook()

# Reading in Data

In [45]:
# load in params and websites
webs_paras = pd.read_csv('../data/normalized.csv')
webs = webs_paras.drop(['(Doc complete) Byets in','(Fully loaded) Bytes in','(Fully loaded) Requests','(Fully loaded) Requests','(Doc complete) Requests'], axis = 1)
webs_requests = webs_paras[['Site name','(Fully loaded) Requests','(Fully loaded) Requests','(Doc complete) Requests']]



 #webs['(Doc complete) Byets in'].describe()

list_of_params = list(webs.columns[1:].unique())

list_of_params.sort()

#len(list_of_params) 

#for i in range(49):
    #print i%10

# Function to Make Dataset for plot


In [46]:
def make_dataset(params_list, range_start = 0.0, range_end = 1, bin_width = 0.005):
    
    #check to make sure the start is less than the end
    assert range_start < range_end, "Start must be less than end!"
    
    #by_params = pd.DataFrame(columns=[ ,'Max', 'Avarage', 'Min','color'])
    by_params = pd.DataFrame(columns=[ 'left','right', 'proportion', 'p_proportion','p_interval', 'name', 'color']) 
   
    
    range_extent = range_end - range_start
    values = ['Min', "Avarage", 'Max']
    # Iterate through all the parameters 
    for i, para_name in enumerate(params_list):
        
        #print para_name
        # Subset to the parameter
        subset = webs[para_name]
        
        # note: subset have to be a list of values
        
        # [webs.columns[i%6]]
        
        # Create a histogram with specified bins and range
        arr_hist, edges = np.histogram(subset,  
                                      bins = int(range_extent / bin_width),
                                      range = [range_start, range_end])

        # Divide the counts by the total to get a proportion and create df
        arr_df= pd.DataFrame({'proportion': arr_hist ,
                              'left': edges[:-1], 'right': edges[1:]}) #/ np.sum(arr_hist)
      
        # Format the proportion
        arr_df['p_proportion'] = ['%0.00005f' % proportion for proportion in arr_df['proportion']]
        
        # Format the interval
        arr_df['p_interval'] = ['%d to %d scale' % (left, right) for left, 
                               right in zip(arr_df['left'], arr_df['right'])]
        
        # Assign the parameter for labels
        arr_df['name'] = para_name
        
        # Color each parametr differently
        arr_df['color'] = Category20_16[i%16]
        
        # Add to the overall dataframe
        by_params = by_params.append(arr_df)
        
    # Overall dataframe
    by_params = by_params.sort_values(['name','left'])
    
    
    
    return by_params
        
        
        
        

In [47]:
make_dataset(list_of_params).head()

Unnamed: 0,color,left,name,p_interval,p_proportion,proportion,right
0,#1f77b4,0.0,(Fully loaded) Time,0 to 0 scale,1.0,1,0.005
1,#1f77b4,0.005,(Fully loaded) Time,0 to 0 scale,0.0,0,0.01
2,#1f77b4,0.01,(Fully loaded) Time,0 to 0 scale,0.0,0,0.015
3,#1f77b4,0.015,(Fully loaded) Time,0 to 0 scale,1.0,1,0.02
4,#1f77b4,0.02,(Fully loaded) Time,0 to 0 scale,2.0,2,0.025


# Function to Make Plot from Data Source

In [48]:
def style(p):
    # Title
    p.title.align = 'center'
    p.title.text_font_size ='20pt'
    p.title.text_font = 'serif'
    
    # Axis titles
    p.xaxis.axis_label_text_font_size = '14pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_style = 'bold'
    
    # Tick labels
    p.xaxis.major_label_text_font_size = '12pt'
    p.yaxis.major_label_text_font_size = '12pt'
    
    return p

In [49]:
def make_plot(src):
    # Blank plot with correct labels
    p = figure(plot_width = 700, plot_height = 700,
              title = "Histogram of Parametes For The Websites",
              x_axis_label = 'Parameters', y_axis_label = "Values")
    
    # Quad glyphs to create a histogram
    p.quad(source=src, bottom =0,left = 'left', right = 'right', color ='color', top= 'proportion',fill_alpha = 0.7, hover_fill_color = 'color', legend = 'name',
           hover_fill_alpha = 1.0, line_color = 'white') #top='proportion',
    
    # Hover tool with vline mode
    hover = HoverTool(tooltips=[('Parameter','@name'),
                               ('Difference','@p_interval'),
                                ('Proportion','p_proportion')
                               ],
                     mode='vline')
    
    p.add_tools(hover)
    # Stypling
    p = style(p)
    return p

In [50]:
p=make_plot(ColumnDataSource(make_dataset(list_of_params)))
show(p)

# Checkbox Group Element for Selecting parameter

In [51]:
para_selection = CheckboxGroup(labels=list_of_params, active = [0,1])
show(para_selection)

In [52]:
[para_selection.labels[i] for i in para_selection.active]

['(Fully loaded) Time', 'DOM elements']

# Update Function for Parameter Selected

In [53]:
# Update function takes three default parameters
def update(attr, old, new):
    
    # Get the list of parameter for the graph
    parameter_to_plot = [para_selection.labels[i] for i in para_selection.active]
    
    # Make a new dataset based on the selected parameter and the 
    # make_dataset function defined earlier
    new_src = make_dataset(parameter_to_plot) # note range are not specified
    
    
    # Convert dataframe to column data source
    new_src = ColumnDataSource(new_src)
    
    # Update the source used the quad glpyhs
    src.data.update(new_src.data)

In [82]:
def modify_doc(doc):
    def make_dataset(params_list, range_start = 0.0, range_end = 1, bin_width = 0.005):
        
        #check to make sure the start is less than the end
        assert range_start < range_end, "Start must be less than end!"

        #by_params = pd.DataFrame(columns=[ ,'Max', 'Avarage', 'Min','color'])
        by_params = pd.DataFrame(columns=[ 'left','right', 'proportion', 'p_proportion','p_interval', 'name', 'color']) 
            # 

        range_extent = range_end - range_start
        values = ['Min', "Avarage", 'Max']
        # Iterate through all the parameters 
        for i, para_name in enumerate(params_list):

            #print para_name
            # Subset to the parameter
            subset = webs[para_name]

            # note: subset have to be a list of values

            # [webs.columns[i%6]]

            # Create a histogram with specified bins and range
            arr_hist, edges = np.histogram(subset,  
                                          bins = int(range_extent / bin_width),
                                          range = [range_start, range_end])

            # Divide the counts by the total to get a proportion and create df
            arr_df= pd.DataFrame({'proportion': arr_hist ,
                                  'left': edges[:-1], 'right': edges[1:]}) #/ np.sum(arr_hist)

            # Format the proportion
            arr_df['p_proportion'] = ['%0.00005f' % proportion for proportion in arr_df['proportion']]

            # Format the interval
            arr_df['p_interval'] = ['%d to %d scale' % (left, right) for left, 
                                   right in zip(arr_df['left'], arr_df['right'])]

            # Assign the parameter for labels
            arr_df['name'] = para_name

            # Color each parametr differently
            arr_df['color'] = Category20_16[i%16]

            # Add to the overall dataframe
            by_params = by_params.append(arr_df)

        # Overall dataframe
        by_params = by_params.sort_values(['name','left'])
    
        return ColumnDataSource(by_params)


    def style(p):
        # Title
        p.title.align = 'center'
        p.title.text_font_size ='20pt'
        p.title.text_font = 'serif'

        # Axis titles
        p.xaxis.axis_label_text_font_size = '14pt'
        p.xaxis.axis_label_text_font_style = 'bold'
        p.yaxis.axis_label_text_font_size = '14pt'
        p.yaxis.axis_label_text_font_style = 'bold'

        # Tick labels
        p.xaxis.major_label_text_font_size = '12pt'
        p.yaxis.major_label_text_font_size = '12pt'

        return p


    def make_plot(src):
        # Blank plot with correct labels
        p = figure(plot_width = 700, plot_height = 700,
                  title = "Histogram of Parametes for the websites",
                  x_axis_label = 'x_label', y_axis_label = "y_labe")

        # Quad glyphs to create a histogram
        p.quad(source=src, bottom =0,left = 'left', right = 'right', color ='color', top= 'proportion',fill_alpha = 0.7, hover_fill_color = 'color', legend = 'name',
               hover_fill_alpha = 1.0, line_color = 'white') #top='proportion',

        # Hover tool with vline mode
        hover = HoverTool(tooltips=[('Parameter','@name'),
                                   ('Difference','@p_interval'),
                                    ('Proportion','p_proportion')
                                   ],
                         mode='vline')

        p.add_tools(hover)
        # Stypling
        p = style(p)
        return p
     
    # Update function takes three default parameters
    def update(attr, old, new):

        # Get the list of parameter for the graph
        parameter_to_plot = [para_selection.labels[i] for i in para_selection.active]

        # Make a new dataset based on the selected parameter and the 
        # make_dataset function defined earlier
        new_src = make_dataset(parameter_to_plot, range_start = 0, range_end = 1, bin_width = 0.005) # note range are not specified


        # Convert dataframe to column data source
        new_src = ColumnDataSource(new_src)

        # Update the source used the quad glpyhs
        src.data.update(new_src.data)

    list_of_params = list(webs.columns[1:].unique())
    list_of_params.sort()
    
    para_selection = CheckboxGroup(labels=list_of_params, active = [0,1])
    para_selection.on_change('active',update)
    
    binwidth_select = Slider(start =0, end = 1,
                            step = 0.00025, value = 0.0005,
                            title = 'Change in parameter')
    binwidth_select.on_change('value', update)
    
    range_select = RangeSlider(start=0, end=1, value =(0,1),
                             step=0.00025, title = 'Change in range')
    range_select.on_change('value', update)
    
    initial_params = [para_selection.labels[i] for i in para_selection.active]
    
    src = make_dataset(initial_params,
                      range_start = range_select.value[0],
                      range_end = range_select.value[1],
                      bin_width = binwidth_select.value)
    
    
    p = make_plot(src)
    #show(p)
    # Put controls in a single element
    controls = WidgetBox(para_selection, binwidth_select, range_select)
    
    # Create a row layout
    layout = row(controls, p)
    
    # Make a tab with the layout
    tab = Panel(child = layout, title = 'Histogram')
    tabs = Tabs(tabs=[tab])
    
    doc.add_root(tabs)
    
# Set up an application
handler = FunctionHandler(modify_doc)
app = Application(handler)

In [83]:
#modify_doc(webs)

In [85]:
show(app, 'localhost:9000')

ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='693ea0e8-21ab-4657-a9ee-5ad0abd4b24e', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='9950cbf9-5f7f-44c9-ac38-73f544ad5de4', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='ba876a6e-e57e-4eb9-a250-7e52340232b3', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='914cb765-1efc-45c9-8e49-9599805f93a1', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, go

ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='9b505edc-0383-4288-ba1c-d7466b020383', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='7cce8199-453a-4b94-9644-b1798d83ff90', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='ccf79ebf-30ec-438e-a5c4-eb2ac170fdb6', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='1f0ac524-8135-42a6-87b6-1218838a0494', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, go

ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='ca51a1c7-19a2-47b4-9e61-0d974921b5cf', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='a0c4a75a-1318-454f-b0fe-c398976d4c97', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='1d19ba32-421f-4fd7-87e6-e37157b33b9a', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, got ColumnDataSource(id='77977662-f8ed-411f-9b7e-ad495fbc7285', ...)",)
ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1): ValueError("expected a dict or pandas.DataFrame, go