NB! Umap installation required. Type: 'pip install umap-learn'.

This notebook visualises the Synthetic Gaussians, Swissroll and MNIST datasets.
Embedding into a pre-trained AE latent space to standard dimensionality reduction techniques such as:

0) PCA https://pytorch.org/docs/stable/generated/torch.pca_lowrank.html
1) LLE https://cs.nyu.edu/~roweis/lle/papers/lleintroa4.pdf
2) t-SNE https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
3) UMAP https://umap-learn.readthedocs.io/en/latest/

In [None]:
# Minimal imports
import torch
import matplotlib.pyplot as plt
import ricci_regularization
import yaml, os
import sklearn # for t-SNE and LLE
import umap

In [None]:
Path_pictures = "../../plots/datasets_visualization"
violent_saving = True
opacity = 0.5 # point opacity on plots

# Check if the folder exists
if not os.path.exists(Path_pictures):
    os.makedirs(Path_pictures)
    print(f"Created folder: {Path_pictures}")
else:
    print(f"Folder already exists: {Path_pictures}")
print(f"Plots will be saved to: {Path_pictures}")

 I. Loading datasets and AE weights to produce AE latent encodings

In [None]:
experiment_config = {} # dictionary of experimental configs to load pretrained AE weights
experiment_config["MNIST"] = '../../experiments/MNIST_Setting_1_config.yaml'
experiment_config["Synthetic"] = '../../experiments/Synthetic_Setting_1_config.yaml'
experiment_config["Swissroll"] = '../../experiments/Swissroll_Setting_1_config.yaml'

In [None]:
visualization = {} # dictionary to keep all visualizations
dataset_names = ["MNIST","Swissroll","Synthetic"]
# loading test dataset, its encoding and labels for each dataset
for dataset_name in dataset_names:
    with open( experiment_config[dataset_name], 'r') as yaml_file:
        yaml_config = yaml.load(yaml_file, Loader=yaml.FullLoader)

    # Load data loaders based on YAML configuration
    dict = ricci_regularization.DataLoaders.get_dataloaders(
        datasets_root= '../../datasets/',
        dataset_config=yaml_config["dataset"],
        data_loader_config=yaml_config["data_loader_settings"], 
        dtype=torch.float32
    )
    #train_loader = dict["train_loader"]
    test_loader = dict["test_loader"]
    print("Data loaders created successfully.")
    torus_ae, Path_ae_weights = ricci_regularization.DataLoaders.get_tuned_nn(config=yaml_config, additional_path="../", verbose=False)
    print("AE weights loaded successfully from", Path_ae_weights)
    torus_ae.cpu()
    print(f"AE moved to {next(torus_ae.parameters()).device}")
    # add input_dataset (as a single tensor), AE_latent_encoding and labels to visualization
    visualization[dataset_name] = ricci_regularization.DataLoaders.get_dataset_and_encoding_from_dataloader(
        test_loader,
        torus_ae.encoder_to_lifting,
        input_dim=yaml_config["architecture"]["input_dim"])

II. Standard embeddings

In [None]:
for dataset_name in dataset_names:
    input_dataset = visualization[dataset_name]["input_dataset"]
    #PCA
    visualization[dataset_name]["PCA"],s,v = torch.pca_lowrank(input_dataset,q=2)
    #LLE
    visualization[dataset_name]["LLE"], _ = sklearn.manifold.locally_linear_embedding(input_dataset, n_neighbors=12, n_components=2,eigen_solver='arpack')
    #t-SNE
    visualization[dataset_name]["TSNE"] = sklearn.manifold.TSNE(n_components=2, perplexity=30, random_state=0).fit_transform(input_dataset)
    #UMAP
    mapper = umap.UMAP().fit(input_dataset)
    visualization[dataset_name]["UMAP"] = mapper.embedding_

III. Plotting

In [None]:
for dataset_name in dataset_names:
    for visualization_method in ["PCA","LLE","TSNE","UMAP","AE_latent_encoding"]:
        ricci_regularization.PlottingTools.plot_2d_encoding(
            encoded_points=visualization[dataset_name][visualization_method],
            color_labels=visualization[dataset_name]["labels"],
            cmap="jet",
            opacity=opacity,
            Saving_file_name=f'{Path_pictures}/{dataset_name}_{visualization_method}.pdf',
            verbose=False,
            save_plot=violent_saving
            )