NB! Umap installation required. Type: 'pip install umap-learn'.

This notebook visualises the Swissroll dataset and compares its embedding into a pre-trained AE latent space to standard dimensionality reduction techniques such as:

0) PCA https://pytorch.org/docs/stable/generated/torch.pca_lowrank.html
1) LLE https://cs.nyu.edu/~roweis/lle/papers/lleintroa4.pdf
2) t-SNE https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding
3) UMAP https://umap-learn.readthedocs.io/en/latest/

In [None]:
# Minimal imports
import math
import torch
import matplotlib.pyplot as plt

import ricci_regularization


import yaml
from sklearn import datasets
from tqdm.notebook import tqdm


import matplotlib.pyplot as plt
from sklearn import datasets, manifold
import torch
import math

In [None]:
Path_pictures = f"../plots/"

alpha = 0.5 # point opacity
n_samples=18000
sr_points, sr_color = datasets.make_swiss_roll(n_samples=n_samples, noise=0.05,random_state=1)

In [None]:
points_tensor = torch.tensor(sr_points)
cov_matrix = torch.cov(points_tensor.T)
print("Covariance matrix:\n", cov_matrix)
mean = points_tensor.mean(dim=0)
print("Mean vector:", mean)

In [None]:
import numpy as np
N = 1000
noise = 0.05

t = 3*np.pi/2 * (1 + 2*np.random.rand(1,N))
h = 21 * np.random.rand(1,N)
data = np.concatenate((t*np.cos(t),h,t*np.sin(t))) + noise*np.random.randn(3,N)	
#np.transpose(data), np.squeeze(t)

In [None]:
norms = torch.tensor(sr_points).norm(dim = 1)

In [None]:
min = norms.min()

In [None]:
max= norms.max()

In [None]:
mean = norms.mean()
std = norms.std()

In [None]:
plt.hist(norms)
plt.title("Histogram of norms of all points of the swiss roll")
plt.show()
print(f"Norms vary between {min:0.2f} and {max:0.2f} \n with mean: {mean:0.2f} and std dev: {std:0.2f} ")

In [None]:
plt.title("Histogram of norms of normalized points of the swiss roll")
plt.hist((norms-mean)/std)
plt.show()

## resampling for plots

In [None]:
sr_points, sr_color = datasets.make_swiss_roll(n_samples=round(n_samples*0.2), noise=0.05,random_state=1)

In [None]:
fig = plt.figure(figsize=(9,9),dpi=400)
plt.rcParams.update({'font.size': 20}) # makes all fonts on the plot be 20

ax = fig.add_subplot(projection='3d')
p = ax.scatter(sr_points[:,0],
           sr_points[:,1],
           sr_points[:,2],
           c=sr_color, s=40, alpha = alpha,cmap='jet')
#plt.title("Swiss roll dataset in 3d",fontsize=20)
#plt.colorbar(p, label="sr_color", shrink = 0.5,location="left")
ax.view_init(azim=-70, elev=15)

ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False

# Now set color to white (or whatever is "invisible")
ax.xaxis.pane.set_edgecolor('w')
ax.yaxis.pane.set_edgecolor('w')
ax.zaxis.pane.set_edgecolor('w')


ax.grid(True)

#plt.savefig(f'{Path_pictures}/swissroll3d_cbar.pdf',bbox_inches='tight',format='pdf')
#plt.savefig(f'{Path_pictures}/swissroll3d.pdf',bbox_inches='tight',format='pdf')
plt.show()

### The solution of unrolling the doe

In [None]:
colors_tensor = torch.from_numpy(sr_color)
#colors_normalized = torch.nn.functional.normalize(colors_tensor,dim=0)
y_tensor = torch.from_numpy(sr_points[:,1])
#y_normalized = torch.nn.functional.normalize(y_tensor,dim=0)
fig = plt.figure(figsize=(9,9),dpi=400)
plt.rcParams.update({'font.size': 20})
#plt.figure(figsize=(20,10),dpi=100)
#plt.title("The unrolled swiss roll: projection on the\n 2nd and the 4th coordinate (the color)")
#plt.xlabel("the y-coordinate of sr_points")
#plt.ylabel("sr_color")
#plt.scatter(y_normalized,colors_normalized,c=sr_color)
plt.scatter(y_tensor,colors_tensor,c=sr_color,cmap='jet',s=40,alpha=alpha)
#plt.savefig(f'{Path_pictures}/swissroll_solution.pdf',bbox_inches='tight',format='pdf')
plt.show()

# PCA

In [None]:
u,s,v = torch.pca_lowrank(torch.tensor(sr_points),q=2)

In [None]:
plt.figure(figsize=(9,9),dpi=400)
plt.rcParams.update({'font.size': 20}) # makes all fonts on the plot be 20
plt.scatter( u[:,0], u[:,1], c=sr_color, s= 40,alpha=alpha, cmap='jet',marker='o',edgecolors=None )
#plt.title( "PCA embedding of the swiss roll")
#plt.colorbar(orientation='vertical',shrink = 0.7)
#plt.savefig(f'{Path_pictures}/swissroll_pca.pdf',bbox_inches='tight',format='pdf')
plt.show()

# LLE 

In [None]:
sr_lle, sr_err = manifold.locally_linear_embedding(
    sr_points, n_neighbors=12, n_components=2
)
"""
fig, axs = plt.subplots(figsize=(8, 8), nrows=2)
axs[0].scatter(sr_lle[:, 0], sr_lle[:, 1], c=sr_color)
axs[0].set_title("LLE Embedding of Swiss Roll")
axs[1].scatter(sr_tsne[:, 0], sr_tsne[:, 1], c=sr_color)
_ = axs[1].set_title("t-SNE Embedding of Swiss Roll")
"""


In [None]:
plt.rcParams.update({'font.size': 20})
fig = plt.figure(figsize=(9,9),dpi=400)
plt.scatter(sr_lle[:, 0], sr_lle[:, 1], c=sr_color,cmap='jet',s=40,alpha=alpha)
#plt.title("LLE Embedding of the swiss roll")
#plt.savefig(f'{Path_pictures}/swissroll_lle.pdf',bbox_inches='tight',format='pdf')
plt.show()

# t-SNE

In [None]:
sr_tsne = manifold.TSNE(n_components=2, perplexity=40, random_state=0).fit_transform(
    sr_points
)

In [None]:
fig = plt.figure(figsize=(9,9),dpi=400)
plt.rcParams.update({'font.size': 20})
plt.scatter(sr_tsne[:, 0], sr_tsne[:, 1], c=sr_color,cmap='jet',s=40,alpha=alpha)
#plt.title("t-SNE embedding of the swiss roll")
#plt.savefig(f'{Path_pictures}/swissroll_tsne.pdf',bbox_inches='tight',format='pdf')
#plt.savefig(f'{Path_pictures}/swissroll_tsne.pdf',bbox_inches='tight',format='pdf')
plt.show()

# UMAP

In [None]:
import umap

In [None]:
mapper = umap.UMAP().fit(sr_points)

In [None]:
encoded_points = mapper.embedding_

In [None]:
fig = plt.figure(figsize=(9,9),dpi=400)
plt.rcParams.update({'font.size': 20}) # makes all fonts on the plot be 20
plt.scatter( encoded_points[:,0], encoded_points[:,1], c=sr_color, s= 40,alpha=alpha, cmap='jet',marker='o',edgecolors=None )
#plt.title( "UMAP embedding of the swiss roll")
#plt.colorbar(orientation='vertical',shrink = 0.7)
#plt.savefig(f'{Path_pictures}/swissroll_umap.pdf',bbox_inches='tight',format='pdf')
plt.show()

# AE latent space

In [None]:
with open('../../experiments/Swissroll_exp0_config.yaml', 'r') as yaml_file:
    yaml_config = yaml.load(yaml_file, Loader=yaml.FullLoader)

In [None]:

# Load data loaders based on YAML configuration
dict = ricci_regularization.DataLoaders.get_dataloaders(
    dataset_config=yaml_config["dataset"],
    data_loader_config=yaml_config["data_loader_settings"]
)
train_loader = dict["train_loader"]
test_loader = dict["test_loader"]
test_dataset = dict.get("test_dataset")  # Assuming 'test_dataset' is a key returned by get_dataloaders

print("Data loaders created successfully.")

torus_ae = ricci_regularization.DataLoaders.get_tuned_nn(config=yaml_config, additional_path="../")

print("AE weights loaded successfully.")

In [None]:
experiment_name = yaml_config["experiment"]["name"]
curv_w = yaml_config["loss_settings"]["lambda_curv"]

dataset_name = yaml_config["dataset"]["name"]
D = yaml_config["architecture"]["input_dim"]
# D is the dimension of the dataset
if dataset_name in ["MNIST", "MNIST01", "Synthetic"]:
    # k from the JSON configuration file is the number of classes
    selected_labels = yaml_config["dataset"]["selected_labels"]
    k = len ( selected_labels )

## AE latent space plotting

In [None]:
# choose train or test loader
loader = test_loader
#loader = train_loader
#zlist = []
torus_ae.cpu()
colorlist = []
enc_list = []
input_dataset_list = []
recon_dataset_list = []
#for (data, labels) in tqdm( test_loader, position=0 ):
for (data, labels) in tqdm( loader, position=0 ):
    input_dataset_list.append(data)
    recon_dataset_list.append(torus_ae(data)[0])
    #zlist.append(vae(data)[1])
    enc_list.append(torus_ae.encoder_to_lifting(data.view(-1,D)))
    colorlist.append(labels) 

In [None]:
#x = torch.cat(zlist)
#enc = circle2anglevectorized(x).detach()
input_dataset = torch.cat(input_dataset_list)
recon_dataset = torch.cat(recon_dataset_list)
encoded_points = torch.cat(enc_list)
encoded_points_no_grad = encoded_points.detach()
color_array = torch.cat(colorlist).detach()
#assert torch.equal(enc,enc_tensor)

# latent \in [-1,1]. grid reparametrization for plotting
encoded_points_no_grad = encoded_points_no_grad/math.pi

In [None]:
plt.rcParams.update({'font.size': 20})
plt.figure(figsize=(9, 9),dpi=400)

if dataset_name == "Swissroll":
    plt.scatter(encoded_points_no_grad[:,0],encoded_points_no_grad[:,1], c=color_array, marker='o',s=40,alpha=alpha, edgecolor='none', cmap= 'jet')
else:
    plt.scatter(encoded_points_no_grad[:,0],encoded_points_no_grad[:,1], c=color_array, marker='o', edgecolor='none', cmap=ricci_regularization.discrete_cmap(k, 'jet'))
    #plt.colorbar(ticks=range(k))
plt.xticks([-1.,-0.5,0.,0.5,1.])
plt.yticks([-1.,-0.5,0.,0.5,1.])
plt.ylim(-1., 1.)
plt.xlim(-1., 1.)
plt.grid(True)
#if violent_saving == True:
#    plt.savefig(f"{Path_pictures}/latent_space.pdf",format="pdf",bbox_inches='tight')
#plt.savefig(f"{Path_pictures}/latent_space_{experiment_name}.jpg",bbox_inches='tight', format="pdf")
plt.show()