In [15]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

In [16]:
# Generate synthetic data
np.random.seed(42)
X = np.random.randn(300, 2) * 0.60 + np.array([2, 2])
X = np.vstack([X, np.random.randn(300, 2) * 0.50 + np.array([-2, -2])])
X = np.vstack([X, np.random.randn(300, 2) * 0.30 + np.array([2, -3])])
n_clusters = 3

In [17]:
# Initialize cluster centers randomly
centers = X[np.random.choice(len(X), n_clusters, replace=False)]
colors = ['purple', 'green', 'orange']

In [18]:
# Storage for visualization
history = []

In [19]:
def assign_clusters(X, centers):
    labels = np.argmin(np.linalg.norm(X[:, np.newaxis] - centers, axis=2), axis=1)
    return labels

def update_centers(X, labels, k):
    return np.array([X[labels == i].mean(axis=0) for i in range(k)])

In [20]:
# Run K-means step-by-step and save each iteration
for _ in range(10):
    labels = assign_clusters(X, centers)
    history.append((X.copy(), centers.copy(), labels.copy()))
    new_centers = update_centers(X, labels, n_clusters)
    if np.allclose(new_centers, centers):
        break
    centers = new_centers

In [21]:
# Set up animation
fig, ax = plt.subplots(figsize=(6, 6))

def animate(i):
    ax.clear()
    X_i, centers_i, labels_i = history[i]
    for k in range(n_clusters):
        points = X_i[labels_i == k]
        ax.scatter(points[:, 0], points[:, 1], s=30, color=colors[k], label=f'Cluster {k}')
        ax.scatter(*centers_i[k], c='red', s=200, marker='X')
    ax.set_title(f'Iteration {i + 1}')
    ax.legend()

anim = FuncAnimation(fig, animate, frames=len(history), interval=1000)
plt.close()

# Show animation
HTML(anim.to_jshtml())