# Simple Interactive Kmeans Visualization
## Using plotly, ipywidgets, and scikit-learn's Kmeans algorithm 


<table><tr><td><img src='../images/2d_demo.gif'></td><td><img src='../images/3d_demo.gif'></td></tr></table>

### [2D Widget](#2DWidget)
### [3D Widget](#3DWidget)
#### Next steps:
* Create a toggle button to toggle between 2D and 3D in one interactive plot
* Add more widgets for Kmeans algo parameters to adjust predictions

### Imports

In [1]:
import pandas as pd
import numpy as np

import plotly as py
import plotly.graph_objs as go

import ipywidgets as widgets
from IPython.display import display, HTML

from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

py.offline.init_notebook_mode(connected=True)

### Widgets and layouts

In [2]:
layout = go.Layout(
    title='Kmeans Interactive Visualization',
    autosize=False,
    width=750,
    height=500
)

style = {
    'description_width': 'initial'
}

# widgets
sample_input = widgets.IntSlider(
    min=2, 
    max=10000, 
    value=1000, 
    description='N_samples: ',
    style=style,
    continuous_update=False
)

center_input = widgets.IntSlider(
    min=1, 
    max=40, 
    value=1, 
    description='Centers: ', 
    style=style,
    continuous_update=False
)

cluster_input = widgets.BoundedIntText(
    min=2, 
    max=100, 
    value=8, 
    description='N_clusters: ', 
    style=style
)

sample_input3D = widgets.IntSlider(
    min=2, 
    max=10000, 
    value=1000, 
    description='N_samples: ',
    style=style,
    continuous_update=False
)

center_input3D = widgets.IntSlider(
    min=1, 
    max=40, 
    value=1, 
    description='Centers: ', 
    style=style,
    continuous_update=False
)

cluster_input3D = widgets.BoundedIntText(
    min=2, 
    max=100, 
    value=8, 
    description='N_clusters: ', 
    style=style
)


# scatter_controls = widgets.HBox(children=[sample_input, center_input])
# algo_controls = widgets.HBox(children=[cluster_input])

In [3]:
class Blobs:
 
    def __init__(self): 
        pass
        
        
    def generate_data(self):
        self.X, y = make_blobs(
            n_samples=sample_input.value,                    
            centers=center_input.value, 
            random_state=None
        )
        return self.X

    
    def predict(self):
        self.y_predict = KMeans(n_clusters=cluster_input.value).fit_predict(self.X)
        return self.y_predict
    
    
    def generate_3D_data(self):
        self.X3D, y = make_blobs(
            n_samples=sample_input3D.value, 
            centers=center_input3D.value, 
            n_features=3, 
            random_state=None
        )
        return self.X3D

    
    def predict3D(self):
        self.y_predict3D = KMeans(n_clusters=cluster_input3D.value).fit_predict(self.X3D)
        return self.y_predict3D

### <a name="2DWidget"></a>2D Widget

In [4]:
state = Blobs()
state.X = state.generate_data()
state.y_predict = state.predict()

In [5]:
def scatter_responses(*args):
    '''
    Limits center_input <= sample_input.
    
    Updates scatter generation params.
    '''
    center_input.max = sample_input.value

    state.X, y = make_blobs(
        n_samples=sample_input.value, 
        centers=center_input.value, 
        random_state=None
    )
    
    
    
def algo_responses(*args):
    '''
    Limits cluster_input <= sample_input.
    '''
    cluster_input.max = sample_input.value
    

def update_plot(n_samples, centers, n_clusters):
    
    state.y_predict = KMeans(n_clusters=cluster_input.value).fit_predict(state.X)    
    
    trace1 = go.Scatter(
        x=state.X[:,0],
        y=state.X[:,-1],
        mode='markers',
        marker = dict(
            symbol='hexagon',
            size = 12,
            color = state.y_predict,
            colorscale='Jet',
            line = dict(
                width = 1,
                color = 'black'
            )
        )
    )
    
    fig = go.Figure(data=[trace1], layout=layout)
    py.offline.iplot(fig)

In [6]:
sample_input.observe(scatter_responses, 'value')
center_input.observe(scatter_responses, 'value')

cluster_input.observe(algo_responses, 'value')

In [7]:
widgets.interactive(
    update_plot, 
    n_samples=sample_input, 
    centers=center_input, 
    n_clusters=cluster_input
)

interactive(children=(IntSlider(value=1000, continuous_update=False, description='N_samples: ', max=10000, min…

### <a name="3DWidget"></a>3D Widget

In [8]:
state.X3D = state.generate_3D_data()
state.y_predict3D = state.predict3D()

In [9]:
def scatter_responses3D(*args):
    '''
    Limits center_input <= sample_input.
    
    Updates scatter generation params.
    '''
    center_input3D.max = sample_input3D.value

    state.X3D, y = make_blobs(
        n_samples=sample_input3D.value, 
        centers=center_input3D.value, 
        n_features=3, 
        random_state=None
    )
    
    
    
def algo_responses3D(*args):
    '''
    Limits cluster_input <= sample_input.
    '''
    cluster_input3D.max = sample_input3D.value
    

def update_plot_3D(n_samples, centers, n_clusters):
    
    state.y_predict3D = KMeans(n_clusters=cluster_input3D.value).fit_predict(state.X3D)
    
    trace1 = go.Scatter3d(
        x=state.X3D[:,0],
        y=state.X3D[:,1],
        z=state.X3D[:,2],
        mode='markers',
        marker = dict(
            size = 6,
            color = state.y_predict3D,
            colorscale='Jet',
            line = dict(
                width = 1,
                color = 'black'
            )
        )
    )
    
    fig = go.Figure(data=[trace1], layout=layout)
    py.offline.iplot(fig)

In [10]:
sample_input3D.observe(scatter_responses3D, 'value')
center_input3D.observe(scatter_responses3D, 'value')

cluster_input3D.observe(algo_responses3D, 'value')

In [11]:
widgets.interactive(
    update_plot_3D, 
    n_samples=sample_input3D, 
    centers=center_input3D, 
    n_clusters=cluster_input3D
)

interactive(children=(IntSlider(value=1000, continuous_update=False, description='N_samples: ', max=10000, min…