In [17]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
#from bokeh.palettes import Viridis6 as palette
from bokeh.palettes import Category20_16
from bokeh.models import HoverTool
# Import the ColumnDataSource class
from bokeh.models import ColumnDataSource

In [7]:
# Load dataset 
df = pd.read_csv('dataset.txt', sep = ',', header = None)
column_names = ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Labels']
df.columns = column_names
X = df.drop(['Labels'], axis = 1)
Y = pd.DataFrame(data = df['Labels'])

# Labels and mapping for labels
cl_num = range(len(Y['Labels'].unique()))
map_Y = []
for cl_id, names in zip(cl_num, Y['Labels'].unique()):
    Y.loc[Y['Labels'] == names] = cl_id
    map_Y.append([names, cl_id])
X.head()

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [3]:
# Create a blank figure with labels

p1 = figure(plot_width = 600, plot_height = 600, 
           title = column_names[0].split(' ')[0]+' Glyphs',
            x_axis_label = column_names[0], y_axis_label = column_names[1])

# Create a blank figure with labels
p2 = figure(plot_width = 600, plot_height = 600, 
           title = column_names[2].split(' ')[0]+' Glyphs',
            x_axis_label = column_names[2], y_axis_label = column_names[3])

NameError: name 'figure' is not defined

In [130]:
colors = ['navy', 'red', 'green']
for cl_id in cl_num:
    p1.circle(X['Sepal Length'].loc[Y['Labels'] == cl_id], X['Sepal Width'].loc[Y['Labels'] == cl_id], color = colors[cl_id])
output_notebook()
show(p1)

In [131]:
for cl_id in cl_num:
    p2.circle(X['Petal Length'].loc[Y['Labels'] == cl_id], X['Petal Width'].loc[Y['Labels'] == cl_id], color = colors[cl_id])
output_notebook()
show(p2)

In [9]:
# extracts basic statistic
stat_names = ['max', 'min', 'mean', 'median', 'std']
summary = pd.DataFrame(data = [X.max(), X.min(), X.mean(), X.median(), X.std()])
summary['statistics'] = stat_names
summary = summary.set_index('statistics')
summary

Unnamed: 0_level_0,Sepal Length,Sepal Width,Petal Length,Petal Width
statistics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
max,7.9,4.4,6.9,2.5
min,4.3,2.0,1.0,0.1
mean,5.843333,3.054,3.758667,1.198667
median,5.8,3.0,4.35,1.3
std,0.828066,0.433594,1.76442,0.763161


In [252]:
dataset = X['Sepal Length'].loc[Y['Labels']==0]
arr_hist, edges = np.histogram(dataset, 
                bins = int(dataset.shape[0]/5), 
                range = [dataset.min(), dataset.max()])

feature = pd.DataFrame({'values': arr_hist, 
                       'left': edges[:-1], 
                       'right': edges[1:]})

In [253]:
# Create the blank plot
p3 = figure(plot_height = 600, plot_width = 600, 
           title = 'Histogram of Features',
          x_axis_label = 'Length', 
           y_axis_label = 'Width')

# Add a quad glyph
p3.quad(bottom=0, top=feature['values'], 
       left=feature['left'], right=feature['right'], 
        fill_color='red', line_color='black')

# Show the plot
show(p3)   

In [254]:
# Convert dataframe to column data source
src = ColumnDataSource(feature)
src.data.keys()

dict_keys(['index', 'values', 'left', 'right'])

In [255]:
# Add a quad glyph with source this time
p3.quad(source = src, bottom=0, top='values', 
       left='left', right='right', 
       fill_color='red', line_color='black')

In [256]:
# Hover tool referring to our own data field using @ and
# a position on the graph using $
h = HoverTool(tooltips = [('Value Interval ', '@left'),
                          ('(x,y)', '($x, $y)')])

In [12]:
X.loc[Y['Labels']==0]

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
5,5.4,3.9,1.7,0.4
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
8,4.4,2.9,1.4,0.2
9,4.9,3.1,1.5,0.1


In [257]:
# Add a column showing the extent of each interval
feature['f_interval'] = ['%.2f to %.2f values' % (left, right) for left, right in zip(feature['left'], feature['right'])]

In [258]:
# Convert dataframe to column data source
src = ColumnDataSource(feature)
src.data.keys()

dict_keys(['index', 'values', 'left', 'right', 'f_interval'])

In [259]:
# Create the blank plot
p3 = figure(plot_height = 600, plot_width = 600, 
           title = 'Histogram of Values',
          x_axis_label = 'Length', 
           y_axis_label = 'Number of Flights')

# Add a quad glyph with source this time
p3.quad(bottom=0, top='values', left='left', right='right', source=src,
       fill_color='red', line_color='black', fill_alpha = 0.75,
       hover_fill_alpha = 1.0, hover_fill_color = 'navy')

# Add a hover tool referring to the formatted columns
hover = HoverTool(tooltips = [('Values', '@f_interval'),
                             ('Num of Values', '@values')])

# Style the plot
#p3 = style(p3)

# Add the hover tool to the graph
p3.add_tools(hover)

# Show the plot
show(p3)