In [None]:
import numpy as np
from sklearn.datasets import make_swiss_roll
from sklearn.manifold import TSNE

In [None]:
n_samples = 1500
noise = 0.5
random_state = 42

In [None]:
X, t = make_swiss_roll(n_samples, noise=0.0, random_state=random_state)
rng = np.random.RandomState(random_state)
X_noisy = X + noise * rng.normal(size=X.shape)

In [None]:
tsne = TSNE(n_components=2, perplexity=100, random_state=random_state, learning_rate='auto')
X_emb = tsne.fit_transform(X_noisy)

In [None]:
import pandas as pd
import altair as alt

df = pd.DataFrame({'x': X_emb[:, 0], 'y': X_emb[:, 1], 't': t })
alt.Chart(df)\
    .mark_circle(size=30)\
    .encode(
        x=alt.X('x', title='t-SNE 1'),
        y=alt.Y('y', title='t-SNE 2'),
        color=alt.Color('t:Q', title='unrolled coordinate', scale=alt.Scale(scheme='spectral')),
    )\
    .properties(title=f"t-SNE embedding of Swiss Roll (noise={noise})")

In [None]:
from distortions.geometry import Geometry, bind_metric, local_distortions, neighborhood_distances
from distortions.visualization import dplot
from anndata import AnnData
from sklearn.neighbors import NearestNeighbors

n_neighbors = 15
geom = Geometry(affinity_kwds={"radius": 2}, adjacency_kwds={"n_neighbors": n_neighbors})
_, Hvv, Hs = local_distortions(X_emb, X_noisy, geom)
embedding = bind_metric(X_emb, Hvv, Hs)

adata = AnnData(X=X_noisy)
nn = NearestNeighbors(n_neighbors=n_neighbors, metric="euclidean").fit(X_noisy)
knn_graph = nn.kneighbors_graph(X_noisy, mode="distance")  # sparse CSR matrix
adata.obsp["distances"] = knn_graph
adata.obsm["X_tsne"] = X_emb

In [None]:
distances = neighborhood_distances(adata, "X_tsne")
plot = dplot(embedding)\
    .mapping(x="embedding_0", y="embedding_1")\
    .inter_boxplot(dists=distances, outlier_iqr=10, highlightColor="#F25E7A", strokeWidth=0.4)\
    .geom_ellipse()

In [None]:
plot

## MDBD Result

In [None]:
pd.DataFrame(X_noisy).to_csv(f"data/swiss_noise_{noise}.csv", index=False)
pd.DataFrame(X_emb).to_csv(f"data/swiss_noise_{noise}_embedding.csv", index=False)

In [None]:
pscores = pd.read_csv(f"data/pscore_{noise}.csv")
df = pd.DataFrame({'x': X_emb[:, 0], 'y': X_emb[:, 1], 'p': pscores["score"].values })

alt.Chart(df)\
    .mark_circle(size=30)\
    .encode(
        x=alt.X('x', title='t-SNE 1'),
        y=alt.Y('y', title='t-SNE 2'),
        color=alt.Color('p:Q', title='pscore', scale=alt.Scale(scheme='teals')),
        opacity=alt.Opacity('p:Q', title='pscore')
    )\
    .properties(title=f"Perturbation Scores (noise={noise})")

In [None]:
pscores["score"].values