In [1]:
# not strictly necessary, but useful in the notebook
import numpy as np
import torch

In [1]:
# set visible cuda devices.
# first visible device will be set as default. Separate multiple devices by comma: ...=0,1,2
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [3]:
# necessary imports to nicely render progress bars in the notebook
import tqdm
from tqdm.auto import tqdm as autotqdm
tqdm.tqdm = autotqdm

In [4]:
import losses # should print "pyKeOps with torch bindings is working!", otherwise PyKeOps is not functioning properly

pyKeOps with torch bindings is working!


In [5]:
from losses import UMAP_loss, TSNE_loss
from autoneb import autoneb

## Dataset loading

In [8]:
# Any dataset can be used, it does not need to be from torchvision.datasets
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor

fashionmnist = datasets.FashionMNIST(
    root="datasets",
    train=True,
    download=True,
    transform=ToTensor()
)

x_data = fashionmnist.data[:500]    # load your preferred dataset here, x values
x_data = torch.flatten(x_data, start_dim=1) # x_data needs to be (N, 1)-dimensional

y_data = fashionmnist.targets[:500] # load your preferred dataset here, labels

## (Embedding generation)

This step is not necessary for running this test notebook, as example embeddings have already been created

In [None]:
# generate UMAP embeddings which can later be connected
import umap
num_minima = 3

umap_embs = []
for i in tqdm.tqdm(range(num_minima), "Find UMAP minimum"):
    reducer = umap.UMAP(a=1.0, b=1.0, init="random")
    umap_embs = umap_embs + [reducer.fit_transform(x_data)]

np.save("embeddings/UMAP_Fashion", np.array(umap_embs))

In [None]:
# generate t-SNE embeddings which can later be connected
from openTSNE import TSNE
num_minima = 3
TSNE_params = {"initialization": "random",
               "n_jobs": 1,
               "verbose": True}

tsne_embs = []
for i in tqdm.tqdm(range(num_minima), "Find t-SNE minimum"):
    tsne = TSNE(**TSNE_params)
    tsne_embs = tsne_embs + [tsne.fit(x_data)]

np.save("embeddings/TSNE_Fashion", np.array(tsne_embs))

## Loss instantiation

The loss directly depends on the original high-dimensional data. Loss functions are thus implemented as objects, processing the dataset during initialization. They can only be used on the dataset they were initialized on, but several instances of the same loss class, initialized on different datasets, can exist simultaneously.

In [9]:
# several instances of the UMAP_precomputed_loss class with different parameters can exist simultaneously
fashion_UMAP_loss = UMAP_loss(x_data, y_data) # takes some time when using the whole dataset, as
                                              # one example umap embedding needs to be optimized

# several instances of the TSNE_precomputed_loss class with different parameters can exist simultaneously
fashion_TSNE_loss = TSNE_loss(x_data, y_data)

UMAP_loss initialized successfully
TSNE_loss initialized successfully


## AutoNEB connection

In [8]:
tsne_embs = np.load("embeddings/TSNE_Fashion.npy")
tsne_embs.shape

(3, 500, 2)

In [9]:
umap_embs = np.load("embeddings/UMAP_Fashion.npy")
umap_embs.shape

(3, 500, 2)

In [10]:
print(autoneb.__doc__)

Connect two minima on the UMAP or TSNE loss surface, optionally with a predefined initial path.
        nodes : np.ndarray or torch.Tensor or list of np.ndarrays or torch.Tensors
            Embeddings; data points from x_data that are to be connected.
        loss_inst : Instance of one of the classes in "losses.py"
            Object containing the precalculated high-dimensional similarities of a specific
            dataset, which is passed as parameter during its initialization.
        config_path : str
            Path to the config file to be used for optimization during graph creation and autoneb.
        initialize : int or np.ndarray or torch.Tensor, default=3
            If int, path will be initialized by interpolating with this number of points.
            Alternatively, an arbitrary initial path can be passed, consisting of an
            arbitrary number of embeddings, excluding the node embeddings themselves.
            Path needs to be of shape (num_pivots, num_datap

In [None]:
# run autoneb, connecting two embeddings. Use suitable loss instance
# to use a predefined path as initialization, use "initialize" argument with np.ndarray or torch.Tensor
autoneb(umap_embs, loss_inst=fashion_UMAP_loss, config_path="configs/example_config.yaml", initialize=3, graph_name="umap_example", node_idxs=[1, 2, 3])

The warning ```Minima 1 and 2 are missing intermediate cycles set().``` always arises and can be ignored. It stems from the changed initialization procedure which makes torch_autoneb believe there should already exist a proper optimized connection instead of just the initial path.