In [None]:
import numpy as np
from baselines.scripts.variable_density_swiss_roll import non_uniform_swiss2
seeds = [20251106, 20251108]

In [None]:
N = 1500
K = 2
np.random.seed(seeds[0])
# pi0 = 0.3 is okay, sigma = 0.2
X, t = non_uniform_swiss2(2, 1, .2, N, pi0=0.25, K=K, dim=3)

## Data Generation

Here's a plot of the swiss roll data.

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

sc = ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=t, s=20, alpha=0.7, cmap='viridis')
plt.colorbar(sc, ax=ax, label='t')
plt.show()

In [None]:
import altair as alt
import pandas as pd

def plot_swiss_emb(X_emb, t):
    df_emb = pd.DataFrame(X_emb, columns=['x', 'y'])
    df_emb["t"] = t
    return alt.Chart(df_emb).mark_circle(size=60).encode(
        x='x',
        y='y',
        color='t'
    ).properties(width=400, height=300)

## $t$-SNE Distortions

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, perplexity=50, random_state=seeds[0], learning_rate='auto', init="random")
Z1 = tsne.fit_transform(X)

tsne = TSNE(n_components=2, perplexity=50, random_state=seeds[1], learning_rate='auto', init="random")
Z2 = tsne.fit_transform(X)

In [None]:
plots = [
    plot_swiss_emb(Z1, t),
    plot_swiss_emb(Z2, t)
]

[display(p) for p in plots]

In [None]:
from distortions.geometry import Geometry, bind_metric, local_distortions, neighborhoods
from distortions.visualization import dplot
from anndata import AnnData
from sklearn.neighbors import NearestNeighbors

def distortion_plot(Z, X, t, n_neighbors=40, geom_radius=1, threshold=0.1, outlier_factor=3):
    geom = Geometry(affinity_kwds={"radius": geom_radius}, adjacency_kwds={"n_neighbors": n_neighbors})
    H, Hvv, Hs = local_distortions(Z, X, geom)
    embedding = bind_metric(Z, Hvv, Hs)
    embedding["t"] = t

    adata = AnnData(X=X)
    nn = NearestNeighbors(n_neighbors=n_neighbors, metric="euclidean").fit(X)
    knn_graph = nn.kneighbors_graph(X, mode="distance")
    adata.obsp["distances"] = knn_graph
    adata.obsm["X_tsne"] = Z

    N = neighborhoods(adata, threshold=threshold, outlier_factor=outlier_factor, embed_key="X_tsne")

    plot = dplot(embedding, height=400, width=600)\
        .mapping(x="embedding_0", y="embedding_1", color="t")\
        .inter_edge_link(N=N, strokeWidth=.2, opacity=0.9, threshold=10, stroke="#F25E7A", highlightColor="#C83F58", backgroundOpacity=0.6)\
        .geom_ellipse(radiusMin=1, radiusMax=20)
    return plot, H

In [None]:
distortion_data = [
    distortion_plot(Z1, X, t),
    distortion_plot(Z2, X, t)
]

[display(p[0]) for p in distortion_data]

## Neighbor Distance Preservation

In [None]:
# D1 is a distance matrix induced by Z1
# D2 is a distance matrix induced by Z2
# R is the ratio of D1 / D2
# M is a 0/1 mask of whether nodes i and j are within the 15 nearest neighbors in the original space, X
# V is a vector of n variances in rows of R, restricted to entries where M = 1


In [None]:
from scipy.spatial.distance import cdist
from sklearn.neighbors import NearestNeighbors

# Compute pairwise distances in embedding spaces, then the ratio matrix
D1 = cdist(Z1, Z1)
D2 = cdist(Z2, Z2)
R = D1 / D2

# Compute 15 nearest neighbors in original space X
n_neighbors = 15
nn = NearestNeighbors(n_neighbors=n_neighbors, metric='euclidean').fit(X)
knn_indices = nn.kneighbors(X, return_distance=False)

# Build mask M: M[i, j] = 1 if j is among i's 15 nearest neighbors (excluding self)
n = X.shape[0]
M = np.zeros((n, n), dtype=int)
for i in range(n):
    for j in knn_indices[i][1:]:  # skip self (first neighbor)
        M[i, j] = 1

# For each row, compute variance of R[i, j] over j where M[i, j] == 1
V = np.array([np.var(R[i][M[i]==1]) for i in range(n)])