 # Table of Contents
<div class="toc" style="margin-top: 1em;"><ul class="toc-item" id="toc-level0"><li><span><a href="http://localhost:8888/notebooks/OneDrive/Documents/data_science_stuff/Personal_projects/CSG_stuff/Final_look.ipynb?dashboard#Data-visualization---Iris-dataset" data-toc-modified-id="Data-visualization---Iris-dataset-1">Data visualization - Iris dataset</a></span></li></ul></div>

# Data visualization - Iris dataset

This notebook shows a comparison of plots between each of the features of the iris dataset. The plots are interactive, you can hover over them see their x and y location, the row number of the data point and the original label of the point in the dataset.  

The here I have used K means algorithm to cluster dataset, and in case each plot I have colored the dots according to the predicted category, you can decipher the original category by just hovering over the point. 

The whole goal here is that it is a cute exercise in data visualization using bqplot and python widgets.  

bqplot is a rather interesting plotting library. While platforms like bokeh and plotly have more mature plotting options compared to bqplot, there are a few interesting features that bqplot offers that others don't. One being that your plots are javascript objects that you can interact with in the notebook. Then there is the fact that you can actually move the data points around in the plot. This feature I find to be the most fascinating, this means you don't have to go back to some array and keep changing values or repeatedly plot another, you can just select and move your points around and you can write a function to, for example, recalculate the standard deviation or mean or some fitted curve. 



In [3]:
from bqplot import pyplot as plt
import pandas as pd 
from sklearn import datasets
from sklearn.cluster import  KMeans
from bqplot import * 
import sklearn.metrics as sm
from bqplot.interacts import (
    FastIntervalSelector, IndexSelector, BrushIntervalSelector,
    BrushSelector, MultiSelector, LassoSelector, PanZoom, HandDraw
)


from ipywidgets import ToggleButtons, VBox, HTML
class plotting_bq:
    def __init__(self,
                 field1=None,
                 field2=None,
                 switch_list=[0, 1, 2],
                 label_flag=None,
                 chart=None):
        
        self.field1 = field1
        self.field2 = field2
        self.switch_list = switch_list
        self.label_flag = label_flag

        self.label1 = 'setosa'
        self.label2 = 'versicolor'
        self.label3 = 'virginica'

        self.xposS = 0.2
        self.xposV = 0.2
        self.xposVR = 0.3

        self.yposS = 0.2
        self.yposV = 0.2
        self.yposVR = 0.3
        self.chart = 0

    def set_scales(self):
        '''Set color and axis scale for the chart '''
        col_sec = ColorScale(colors=['Red', 'Green', 'Blue'])
        sc_x = LinearScale()
        sc_y = LinearScale()
        return col_sec, sc_x, sc_y

    def axis_options(self, sc_x, sc_y):
        '''provide axis options'''
        ax_x = Axis(label=self.field1, scale=sc_x)
        ax_y = Axis(
            label=self.field2,
            scale=sc_y,
            orientation='vertical',
            tick_format='0.2f')

        return ax_x, ax_y

    def plotting(self, all_dataframe):
        '''Plot adjust the various options and plot the figure'''
        kmeans = self.kmeans()
        ypred = np.choose(kmeans.labels_, self.switch_list)

        # set color scales and axis scales
        col_sec, sc_x, sc_y = self.set_scales()

        scatter_chart = Scatter(
            x=all_dataframe[self.field1],
            y=all_dataframe[self.field2],
            scales={'x': sc_x,
                    'y': sc_y,
                    'color': col_sec},
            colors=['blue'],
            color=ypred,
            names=all_dataframe['fname'],
            names_unique=False,
            display_names=False,
            enable_move=True,
            enable_hover=True,
            tooltip=Tooltip(
                fields=['name', 'index', 'x', 'y'],
                labels=['Original_name', 'Row', 'x', 'y']),
            interactions={'click': 'select',
                          'hover': 'tooltip'},
            unselected_style={'opacity': 0.6},
            selected_style={'opacity': 1.0})

        # axis options
        ax_x, ax_y = self.axis_options(sc_x, sc_y)

        self.list_selected_points(scatter_chart)

        panzoom = PanZoom(scales={'x': [sc_x], 'y': [sc_y]})
        
        self.chart = scatter_chart

       
        colors = ['Red', 'Green', 'Blue']

        label1 = self.place_label(self.xposS, self.yposS, self.label1,
                                  colors[0])
        label2 = self.place_label(self.xposV, self.yposV, self.label2,
                                  colors[1])
        label3 = self.place_label(self.xposVR, self.yposVR, self.label3,
                                  colors[2])
            
        return Figure(
                axes=[ax_x, ax_y],
                marks=[scatter_chart, label2, label3, label1], 
                title = self.field1 + ' vs ' + self.field2)
            
       

    def list_selected_points(self, chart):
        def observe_selected(change):
            return (print('Row number {}'.format(self.chart.selected)))

        return chart.observe(observe_selected, 'selected')

    def get_data(self):

        iris_dataset = datasets.load_iris()
        X = iris_dataset.data
        y = iris_dataset.target
        names = iris_dataset.target_names
        iris = pd.DataFrame(data=iris_dataset.data)
        iris.columns = iris_dataset.feature_names

        data_subset = iris[[self.field1, self.field2]]
        name_list = []
        for i in range(0, y.shape[0]):
            name_list.append(iris_dataset.target_names[y[i]])

        name_list1 = pd.DataFrame({'fname': name_list})
        all_data = pd.concat([data_subset, name_list1], axis=1)

        return all_data

    def kmeans(self):

        dataset = self.get_data()
        kmeans_object = KMeans(
            n_clusters=3,
            random_state=1).fit(dataset[[self.field1, self.field2]])
        return kmeans_object

    def place_label(self, xpos, ypos, text, color):
        return Label(
            x=[xpos],
            y=[ypos],
            default_size=16,
            font_weight='bolder',
            colors=[color],
            text=[text],
            enable_move=True)
    
def plots(arg):
    figlist={'petal width vs petal length': 1, 
              'petal width vs sepal length': 2,
              'petal width vs sepal width':3,  
              'petal length vs sepal width':4, 
              'petal length vs sepal length': 5, 
               'sepal length vs sepal width':6 
            
           }
    plotit = plotting_bq()
    if figlist[arg] == 1:         
        plotit.field1= 'petal width (cm)'
        plotit.field2 = 'petal length (cm)'
        plotit.label_flag =1
        plotit.xposV= 0.2
        plotit.yposV =0.5
        plotit.xposVR = 0.8
        plotit.yposVR = 0.5

        plotit.switch_list =[0,2,1]
        all_data = plotit.get_data()
    
    elif figlist[arg] == 2: 
        
        plotit.field1= 'petal width (cm)'
        plotit.field2 = 'sepal length (cm)'
        plotit.label_flag =1
        plotit.xposS= 0.2
        plotit.yposS =0.08

        plotit.xposV= 0.2
        plotit.yposV =0.5
        plotit.xposVR = 0.6
        plotit.yposVR = 0.9

        plotit.switch_list =[2,0,1]
        all_data = plotit.get_data()

    elif figlist[arg] == 3 : 
        
        plotit.field1= 'petal width (cm)'
        plotit.field2 = 'sepal width (cm)'
        plotit.label_flag =1
        plotit.xposS= 0.05
        plotit.yposS =0.3

        plotit.xposV= 0.3
        plotit.yposV =0.5
        plotit.xposVR = 0.6
        plotit.yposVR = 0.7

        plotit.switch_list =[0,2,1]
        all_data = plotit.get_data()

    elif figlist[arg] ==4: 
        plotit.field1= 'petal length (cm)'
        plotit.field2 = 'sepal width (cm)'
        plotit.label_flag =1
        plotit.xposS= 0.05
        plotit.yposS =0.3

        plotit.xposV= 0.4
        plotit.yposV =0.6
        plotit.xposVR = 0.7
        plotit.yposVR = 0.7

        plotit.switch_list =[0,2,1]
        all_data = plotit.get_data()


    elif figlist[arg] == 5 : 
        plotit.field1= 'petal length (cm)'
        plotit.field2 = 'sepal length (cm)'
        plotit.label_flag =1
        plotit.xposS= 0.05
        plotit.yposS =0.6

        plotit.xposV= 0.4
        plotit.yposV =0.6
        plotit.xposVR = 0.7
        plotit.yposVR = 0.9

        plotit.switch_list =[0,2,1]
        all_data = plotit.get_data()

            
        
    elif figlist[arg] ==6: 
        plotit.field1= 'sepal length (cm)'
        plotit.field2 = 'sepal width (cm)'
        plotit.label_flag =1
        plotit.xposS= 0.05
        plotit.yposS =0.8

        plotit.xposV= 0.4
        plotit.yposV =0.6
        plotit.xposVR = 0.7
        plotit.yposVR = 0.7

        plotit.switch_list =[2,0,1]
        
        all_data = plotit.get_data()

        
        
    return plotit.plotting(all_data)
        


In [4]:
    
from IPython.display import display
import ipywidgets as widgets 
from ipywidgets import HBox, VBox
w = widgets.Select(
    options=['petal width vs petal length', 
              'petal width vs sepal length',
              'petal width vs sepal width', 
              'petal length vs sepal width',
              'petal length vs sepal length',
               'sepal length vs sepal width',
            
            ],
    # rows=10,
    description='COMPARE :',
    disabled=False
)
widgets.interact(plots,arg = w)

<function __main__.plots>