In [11]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import gradio as gr
import os
from datetime import datetime

In [14]:
def kmeans(X, K, max_iters):
    # Initialize centroids randomly
    centroids = X[np.random.choice(X.shape[0], K, replace=False)]
    
    losses = []
    
    for _ in range(max_iters):
        # Assign points to nearest centroid
        distances = np.sqrt(((X - centroids[:, np.newaxis])**2).sum(axis=2))
        labels = np.argmin(distances, axis=0)
        
        # Update centroids
        new_centroids = np.array([X[labels == k].mean(axis=0) for k in range(K)])
        
        # Calculate loss
        loss = np.sum((X - new_centroids[labels])**2)
        losses.append(loss)
        
        # Check for convergence
        if np.all(centroids == new_centroids):
            break
        
        centroids = new_centroids
    
    return labels, centroids, losses

In [19]:
def visualize_clusters(X, labels, centroids):
    dim = X.shape[1]
    
    fig = plt.figure(figsize=(10, 6))
    
    if dim == 1:
        plt.scatter(X, np.zeros_like(X), c=labels, cmap='viridis')
        plt.scatter(centroids, np.zeros_like(centroids), c='red', marker='x', s=200, linewidths=3)
        plt.title('1D K-means Clustering')
        plt.xlabel('X')
    elif dim == 2:
        plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
        plt.scatter(centroids[:, 0], centroids[:, 1], c='red', marker='x', s=200, linewidths=3)
        plt.title('2D K-means Clustering')
        plt.xlabel('X')
        plt.ylabel('Y')
    elif dim == 3:
        ax = fig.add_subplot(111, projection='3d')
        scatter = ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=labels, cmap='viridis')
        ax.scatter(centroids[:, 0], centroids[:, 1], centroids[:, 2], c='red', marker='x', s=200, linewidths=3)
        ax.set_title('3D K-means Clustering')
        ax.set_xlabel('X')
        ax.set_ylabel('Y')
        ax.set_zlabel('Z')
    
    plt.tight_layout()
    return fig


In [20]:
def plot_loss(losses):
    fig = plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(losses) + 1), losses)
    plt.title('K-means Loss per Iteration')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.tight_layout()
    return fig

In [21]:
def run_kmeans(dimensions, n_clusters, n_iterations, n_samples):
    # Generate random data
    X = np.random.rand(n_samples, dimensions)
    
    # Run K-means
    labels, centroids, losses = kmeans(X, n_clusters, n_iterations)
    
    # Create output directory if it doesn't exist
    output_dir = "kmeans_output"
    os.makedirs(output_dir, exist_ok=True)
    
    # Generate timestamp for unique filenames
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Visualize clusters
    cluster_fig = visualize_clusters(X, labels, centroids)
    cluster_filename = f"{output_dir}/cluster_plot_{timestamp}.png"
    cluster_fig.savefig(cluster_filename)
    plt.close(cluster_fig)
    
    # Plot loss
    loss_fig = plot_loss(losses)
    loss_filename = f"{output_dir}/loss_plot_{timestamp}.png"
    loss_fig.savefig(loss_filename)
    plt.close(loss_fig)
    
    return cluster_filename, loss_filename, cluster_filename, loss_filename

In [22]:
iface = gr.Interface(
    fn=run_kmeans,
    inputs=[
        gr.Slider(1, 3, step=1, label="Number of Dimensions"),
        gr.Slider(2, 10, step=1, label="Number of Clusters (K)"),
        gr.Slider(1, 100, step=1, label="Number of Iterations"),
        gr.Slider(100, 1000, step=100, label="Number of Samples")
    ],
    outputs=[
        gr.Image(label="Cluster Visualization"),
        gr.Image(label="Loss per Iteration"),
        gr.Text(label="Cluster Plot Filename"),
        gr.Text(label="Loss Plot Filename")
    ],
    title="K-means Clustering",
    description="Specify the number of dimensions, clusters, iterations, and samples to run K-means clustering. Images are saved locally.",
)

iface.launch()

Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.


