In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from matplotlib.animation import FuncAnimation

In [None]:
# Generate random data points
n_samples = 500  # Total points
n_features = 2   # Points in 2D space
n_clusters = 4  # Number of clusters to find
# Number of clusters (K)
K = n_clusters

data, _ = make_blobs(n_samples=n_samples, centers=n_clusters, n_features=n_features, random_state=42)

In [None]:
def plot_clusters(data, centroids, labels, iteration):
    plt.scatter(data[:, 0], data[:, 1], c=labels, marker='o')
    plt.scatter(centroids[:, 0], centroids[:, 1], color='red', marker='x')
    plt.title(f"Iteration {iteration}")
    plt.show()

In [None]:
def kmeans(data, K, max_iters=10):
    # Initialize centroids
    centroids = data[np.random.choice(data.shape[0], K, replace=False)]
    for i in range(max_iters):
        # Step 1: Assign labels to each point based on centroids
        labels = np.array([np.argmin([np.linalg.norm(point - centroid) for centroid in centroids]) for point in data])

        # Plot the current state
        plot_clusters(data, centroids, labels, i+1)

        # Step 2: Calculate new centroids from the labels
        new_centroids = np.array([data[labels == k].mean(axis=0) for k in range(K)])

        # Stop if centroids do not change
        if np.all(centroids == new_centroids):
            break
        centroids = new_centroids

    return centroids, labels

In [None]:
# Function to animate the K-means process
def animate_kmeans(data, centroids, K, max_iters=10):
    # Initialize centroids and set up colors for clusters
    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
    fig, ax = plt.subplots()

    # Initialize centroids randomly
    initial_centroids = data[np.random.choice(data.shape[0], K, replace=False)]

    def update(frame):
        ax.clear()  # Clear the plot for each frame
        labels = np.array([np.argmin([np.linalg.norm(point - centroid) for centroid in centroids]) for point in data])
        for k in range(K):
            # Plot points for each cluster
            cluster_points = data[labels == k]
            ax.scatter(cluster_points[:, 0], cluster_points[:, 1], s=50, color=colors[k % len(colors)])
            ax.scatter(centroids[k, 0], centroids[k, 1], color='black', marker='x')

        # Update centroids for the next frame
        new_centroids = np.array([data[labels == k].mean(axis=0) for k in range(K)])
        np.copyto(centroids, new_centroids)
        return ax

    # Assign the animation to a variable to keep it in memory
    anim = FuncAnimation(fig, update, frames=max_iters, repeat=False)
    plt.show()
    return anim  # Return the animation object if further usage is needed

In [None]:
# Running the animation
# Initialize centroids randomly for animation
centroids = data[np.random.choice(data.shape[0], K, replace=False)]
animate_kmeans(data, centroids, K, max_iters=10)
