In [1]:
import numpy as np
import umap
import matplotlib.pyplot as plt

In [2]:
def visualize_numpy_umap_with_labels(features: np.array, labels: np.array, discarded_indices: np.array = np.array([])):

    # Apply UMAP for dimensionality reduction
    embedding = umap.UMAP(n_components=2, random_state=42).fit_transform(features)
    
    cmap = plt.get_cmap("tab20")
    unique_labels = np.unique(labels)
    colors = {label: cmap(i / len(unique_labels)) for i, label in enumerate(unique_labels)}

    # Plot the results
    plt.figure(figsize=(8, 6))
    
    # Plot normal points (excluding discarded)
    for label in unique_labels:
        idx = (labels == label) & ~np.isin(np.arange(len(labels)), discarded_indices)
        plt.scatter(
            embedding[idx, 0], embedding[idx, 1], 
            color=[colors[label]], label=f"Class {label}", s=20, alpha=0.7
        )
    
    # Plot discarded points with same color but black outline
    if discarded_indices.size > 0:
        for label in unique_labels:
            idx = (labels == label) & np.isin(np.arange(len(labels)), discarded_indices)
            plt.scatter(
                embedding[idx, 0], embedding[idx, 1], 
                color=[colors[label]], edgecolors="black", linewidth=1.5, 
                s=50, alpha=0.9, label=f"Discarded (Class {label})"
            )
    
    plt.title("UMAP Projection with Class Labels & Discarded Points")
    plt.xlabel("Component 1")
    plt.ylabel("Component 2")
    plt.legend()
    plt.show()

In [None]:
features = np.load("features.npy")
labels = np.load("labels.npy")[:, 0]
discarded_indices = np.load("discarded_indices.npy")

visualize_numpy_umap_with_labels(features, labels, discarded_indices)