# Hierarchical Clustering Interactive Demo

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import (
    HBox, VBox, interactive_output, Dropdown,
    FloatSlider, IntSlider,
    Checkbox
)
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster

In [2]:
def update_plot(
    method, metric, color_threshold, criterion, cut_value, depth_value,
    display_true_labels
):
    """
    - method, metric, color_threshold are used for the dendrogram
    - criterion, cut_value, depth_value are used for fcluster
    - display_true_labels is used to display true cluster allocation
    """

    # Perform hierarchical clustering
    Z = linkage(X, method=method, metric=metric)

    # Create a 1-row, 2-column figure
    fig, axes = plt.subplots(1, 2, figsize=(12, 5), tight_layout=True)

    # Left subplot: Dendrogram
    dendrogram(
        Z,
        color_threshold=color_threshold,  # Colors clusters above this distance
        ax=axes[0]
    )
    axes[0].set_title(f"Dendrogram ({method} linkage, {metric} metric)")
    axes[0].set_xlabel("Sample Index")
    axes[0].set_ylabel("Distance")

    # Right subplot: Scatter
    # - color points by cluster membership from fcluster
    labels = fcluster(Z, t=cut_value, criterion=criterion, depth=depth_value)
    nunique = len(np.unique(labels))

    # Quick scatter plot, color by cluster label
    c_ = labels
    title_ = (
        f"{nunique} Cluster(s) (criterion={criterion}, "
        f"t={cut_value:.2f}, depth={depth_value})"
    )
    if display_true_labels:
        c_ = y
        title_ = "True Labels"

    scatter = axes[1].scatter(X[:, 0], X[:, 1], c=c_, cmap='rainbow', s=25)
    for idx, row in enumerate(X):
        axes[1].text(*row, f"{idx:3d}", va="bottom", ha="left")
        
    axes[1].set_title(title_)
    axes[1].set_xlabel("Feature 1")
    axes[1].set_ylabel("Feature 2")

In [3]:
# Column 1: Dendrogram controls
method_widget = Dropdown(
    options=['single', 'complete', 'average', 'ward'],
    value='ward',
    description='Method:'
)

metric_widget = Dropdown(
    options=['euclidean', 'cityblock', 'cosine'],
    value='euclidean',
    description='Metric:'
)

color_threshold_widget = FloatSlider(
    value=0.0,
    min=0.0,
    max=15.0,
    step=0.5,
    description='Color thr:',
)

dendrogram_controls = VBox([method_widget, metric_widget, color_threshold_widget])

# Column 2: fcluster controls
criterion_widget = Dropdown(
    options=["inconsistent", 'distance', 'maxclust',],
    value='maxclust',
    description='Criterion:',
)

cut_value_widget = FloatSlider(
    value=2.0,
    min=1.0,
    max=30.0,
    step=0.5,
    description='Cut value:',
)

depth_widget = IntSlider(
    value=2,
    min=1,
    max=30,
    description='Depth value:',
    # disabled = criterion_widget.value != "inconsistent",
)

display_true_widget = Checkbox(
    value=False,
    description="Display True Labels",
    indent=True
)

fcluster_controls = VBox(
    [display_true_widget, criterion_widget, cut_value_widget, depth_widget]
)

# Combine columns into one horizontal box
controls_box = HBox([dendrogram_controls, fcluster_controls, ])

In [4]:
np.random.seed(17)
X0 = np.random.normal(loc=1.0, scale=1.0, size=(15, 2))
X1 = np.random.normal(loc=5.0, scale=2.0, size=(10, 2))
X = np.vstack([X0, X1])
y = np.zeros(len(X))
y[len(X0):] = 1

In [5]:
out = interactive_output(
    update_plot,
    {
        'method': method_widget,
        'metric': metric_widget,
        'color_threshold': color_threshold_widget,
        'criterion': criterion_widget,
        'cut_value': cut_value_widget,
        'depth_value': depth_widget,
        'display_true_labels': display_true_widget
    }
)

In [6]:
# Display the controls and the output in the notebook
display(controls_box, out)

HBox(children=(VBox(children=(Dropdown(description='Method:', index=3, options=('single', 'complete', 'average…

Output()