# I. Train and test datasets

In [None]:
# Hyperparameters for dataset

D = 784 #dimension
k = 3 # num of 2d planes in dim D
n = 6*(10**3) # num of points in each plane
#n = 10**3 # num of points in each plane
shift_class = 0


# Hyperparameters for data loaders
batch_size  = 16
split_ratio = 0.2

# Set manual seed for reproducibility
# torch.manual_seed(0)

In [None]:
# adding path to the set generating package
import sys
sys.path.append('../') # have to go 1 level up

import ricci_regularization

In [None]:
import torch
import ricci_regularization

train_dataset = ricci_regularization.generate_dataset(D, k, n, shift_class=shift_class)

m = len(train_dataset)
train_data, test_data = torch.utils.data.random_split(train_dataset, [int(m-m*split_ratio), int(m*split_ratio)])

test_loader  = torch.utils.data.DataLoader(test_data , batch_size=batch_size)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

# test_data[:][0] will give the vectors of data without labels from the test part of the dataset

# II. Fit dimension reduction models

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

## II.1. TSNE check

In [None]:
# TNSE check on test set
synthetic_points = test_data[:][0].view(-1,28*28)

tsne   = TSNE(n_components=2, verbose=1, random_state=123)
z_test = tsne.fit_transform(synthetic_points.numpy())

In [None]:
plt.scatter( z_test[:,0], z_test[:,1], c=test_data[:][1], alpha=0.5 )
plt.title( "TSNE projection of test data")
plt.show()

In [None]:
# TNSE check on train set
synthetic_points = train_data[:][0].view(-1,28*28)

tsne   = TSNE(n_components=2, verbose=1, random_state=123)
z_train = tsne.fit_transform(synthetic_points.numpy())

In [None]:
plt.scatter( z_train[:,0], z_train[:,1], c=train_data[:][1], alpha=0.5 )
plt.title( "TSNE projection of train data")
plt.show()

## II.2. Plots using Seaborn

In [None]:
import pandas as pd 

# Format data
df = pd.DataFrame()
#df["y"] = labels.numpy()
df["y"] = test_data[:][1].numpy() #test_data[:][1] are labels
df["comp-1"] = z_test[:,0]
df["comp-2"] = z_test[:,1]

In [None]:
import seaborn as sns
import numpy as np # this module is useful to work with numerical arrays


sns.scatterplot(x="comp-1", y="comp-2", hue=df.y.tolist(),
                palette=sns.color_palette("hls", 10),
                data=df).set(title="Synthetic dataset data T-SNE projection")