In [None]:
import struct
import numpy as np
import umap
import plotly.graph_objects as go
import plotly as px
import ipywidgets as widgets
from IPython.display import display

In [None]:
def read_all_iterations(filename):
    with open(filename, "rb") as f:
        n_clusters = struct.unpack("I", f.read(4))[0]
        n_points = struct.unpack("I", f.read(4))[0]
        n_dim = struct.unpack("I", f.read(4))[0]

        centroid_block_size = (n_dim * 4 + 4) * n_clusters  # floats + int32 ID
        point_block_size = (n_dim * 4 + 4) * n_points       # floats + int32 ID
        iteration_size = centroid_block_size + point_block_size

        all_iterations = []

        while True:
            iter_data = f.read(iteration_size)
            if len(iter_data) < iteration_size:
                break  # done reading all iterations

            offset = 0
            centroids = np.frombuffer(iter_data[offset:offset + n_clusters * n_dim * 4], dtype=np.float32).reshape((n_clusters, n_dim))
            offset += n_clusters * n_dim * 4
            centroid_ids = np.frombuffer(iter_data[offset:offset + n_clusters * 4], dtype=np.int32)
            offset += n_clusters * 4

            points = np.frombuffer(iter_data[offset:offset + n_points * n_dim * 4], dtype=np.float32).reshape((n_points, n_dim))
            offset += n_points * n_dim * 4
            point_cluster_ids = np.frombuffer(iter_data[offset:offset + n_points * 4], dtype=np.int32)

            all_iterations.append({
                'centroids': centroids,
                'centroid_ids': centroid_ids,
                'points': points,
                'point_cluster_ids': point_cluster_ids
            })

    return n_clusters, n_points, n_dim, all_iterations

In [6]:
filename = "clustered_data"
point_data, point_cluster_ids, centroids, centroid_ids, n_dim = read_clustered_data(filename)

n_clusters: 4
n_points: 50
n_dim: 3
point_data shape: (50, 3)
centroid_data shape: (4, 3)


In [7]:
reducer = umap.UMAP(n_components=3)
points_3d = reducer.fit_transform(point_data)
centroids_3d = reducer.transform(centroids)



In [8]:
fig = go.Figure()

unique_clusters = centroid_ids.copy()
colors = px.colors.qualitative.Plotly
cluster_color_map = {cluster_id: colors[i % len(colors)] for i, cluster_id in enumerate(unique_clusters)}

for cluster_id in unique_clusters:
    # Add points
    mask = point_cluster_ids == cluster_id
    fig.add_trace(go.Scatter3d(
        x=points_3d[mask, 0],
        y=points_3d[mask, 1],
        z=points_3d[mask, 2],
        mode='markers',
        marker=dict(
            size=3,
            color=cluster_color_map[cluster_id],
            opacity=0.5
        ),
        name=f'p - C{cluster_id}',
        showlegend=False
    ))


    # Add centroids
    mask = centroid_ids == cluster_id
    fig.add_trace(go.Scatter3d(
        x=centroids_3d[mask, 0],
        y=centroids_3d[mask, 1],
        z=centroids_3d[mask, 2],
        mode='markers+text',
        marker=dict(
            size=6,
            symbol='diamond',
            color=cluster_color_map[cluster_id],
            line=dict(
                color=cluster_color_map[cluster_id],
                width=5
            )
        ),
        name=f'Cluster {cluster_id}',
        text=f"C{cluster_id}",
        textposition="top center"
    ))

fig.update_layout(
    title="UMAP 3D Projection of K-Means Clustered Data",
    margin=dict(l=0, r=0, b=0, t=40),
    scene=dict(
        xaxis_title='UMAP-1',
        yaxis_title='UMAP-2',
        zaxis_title='UMAP-3',
        bgcolor='white'
    ),
    legend=dict(x=0.02, y=0.98),
    showlegend=True
)

fig.show()