Hierarchical Clustering Demo
===

In [20]:
import numpy as np
import scipy.cluster.hierarchy as hac
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram
import time
from ipywidgets import interactive_output
import ipywidgets as widgets
from sklearn import datasets as skdatasets


## Make data sets

In [27]:
datasets={}

N = 50
X = np.vstack((2 * np.random.randn(N, 2) - 4, 
              (2 * np.random.randn(N, 2) + 4),
              (np.random.randn(N, 2) + np.array([[-6,4]]))
              ))
X = X[np.random.permutation(3*N),:]
datasets["Blobs"] = X 

N = 100
X,_ = skdatasets.make_moons(n_samples=N, shuffle=True, noise=0.1, random_state=589)
datasets["Moons"] = 5*X 

N = 100
X, _ = skdatasets.make_circles(n_samples=N, shuffle=True, noise=0.08, random_state=589, factor=0.4)
datasets["Circles"] = 5*X

Show Clustering UI
---

In [None]:
def plot_model(p=0, dataset=None, link='single'):

    fig = plt.figure(1,figsize=(16,6))
    plt.rcParams.update({'font.size': 12})
    
    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2)

    X        = datasets[dataset]
    z        = hac.linkage(X, method=link)
    ds       = np.array(z)[:,2] 
    maxd     = np.max(ds)
    t        = maxd*p
    clusters = hac.fcluster(z, t, 'distance') 
    
    N = X.shape[0]
    K = np.max(clusters)
    cmap = plt.get_cmap('jet')
    clr = cmap(np.linspace(0, 1, K+1)) 

    for k in set(clusters):
      ind = clusters==k
      ax1.plot(X[ind,0],X[ind,1],'o',color=clr[k,:],markersize=10)
    ax1.set_title("HAC (%s, T=%.2f)"%(link,t))
    ax1.axis("Equal")
    ax1.grid(True)

    out = dendrogram(z,link_color_func=lambda k: "#000000",color_threshold=t)
    ax2.plot([0,10*N],[t,t],'--b')
    ax2.set_title("Dendrogram (T=%.2f)"%(t,))
    ax2.set_ylim(-0.25,maxd*1.02)
    for i in range(N):
      n = out['leaves'][i]
      ax2.plot([10*i+5],[-0.15],'o',color=clr[clusters[n]],markersize=10)
    
    plt.tight_layout()
    plt.show()


wd = widgets.Dropdown(options=datasets.keys(),description="Dataset")
wl = widgets.Dropdown(options=['single','complete','average'],description="Linkage")
wp = widgets.FloatSlider(value=0,min=0,max=1,step=0.01,description="p",continuous_update=False,layout=widgets.Layout(width='1000px'))

out = interactive_output(plot_model, {"dataset":wd,"p":wp, "link":wl});
box1=widgets.HBox([wd,wl])
ui=widgets.VBox([box1,wp])
display(ui,out)

VBox(children=(HBox(children=(Dropdown(description='Dataset', options=('Blobs', 'Moons', 'Circles'), value='Blâ€¦

Output()