First, import necessary dependences:

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import umap

# Path to repo root
repo_root = os.path.expanduser("~/embedor") 
sys.path.insert(0, repo_root)

from src.embedor import EmbedOR
from src.utils.orcmanl import ORCManL
from src.plotting import plot_graph_2D

Load the sample chimp data and labels files: 

In [None]:
# Load data
np_file = "sample_data/chimp.data.npy"
labels_file = "sample_data/chimp.labels.npy" 

data = np.load(np_file)

labels = None
if os.path.exists(labels_file):
    labels = np.load(labels_file)

# Convert labels to numeric
if labels.dtype.kind in {'U', 'S', 'O'}: 
    unique_labels = np.unique(labels)
    label_to_int = {label: i for i, label in enumerate(unique_labels)}
    numeric_labels = np.array([label_to_int[l] for l in labels])
else:
    numeric_labels = labels

# Clean data
data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0)
zero_rows = np.where(np.linalg.norm(data, axis=1) == 0)[0]
if len(zero_rows) > 0:
    data[zero_rows] += 1e-6

print("Dataset shape:", data.shape)

Build ORCManL graph and run EmbedOR on chimp dataset:

In [None]:
# Subsample data
n_points = 5000
indices = np.random.choice(data.shape[0], size=n_points, replace=False)
data_sub = data[indices]
numeric_labels_sub = numeric_labels[indices] if numeric_labels is not None else None

# Build ORCManL graph
orcmanl = ORCManL(verbose=True)
orcmanl.fit(data_sub)
print("Graph nodes:", len(orcmanl.G_pruned.nodes()))
print("Graph edges:", len(orcmanl.G_pruned.edges()))

# EmbedOR parameters
exp_params = {
    'p': 3,
    'mode': 'nbrs',
    'n_neighbors': 15,
}

# Run EmbedOR 
embedor = EmbedOR(exp_params, layout="torch", seed=42)
embedding = embedor.fit_transform(data_sub)
print("Embedding shape:", embedding.shape)

Plot 2-D embeddings:

In [None]:
plt.figure(figsize=(10, 10))

plt.scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=labels if labels is not None else None,
    s=10,
    cmap="tab10"
)

plt.show()