load data
plot data

In [1]:
import numpy as np
import pickle
import pandas as pd
import time
from umap import UMAP

In [2]:
from tfumap.paths import ensure_dir, MODEL_DIR, DATA_DIR

In [3]:
from tfumap.paths import FIGURE_DIR, save_fig

In [4]:
save_loc = DATA_DIR / 'knn_classifier' 

In [5]:
datasets = [
    'cassins_dtw',
    'cifar10',
    'fmnist',
    'macosko2015',
    'mnist'
]

In [6]:
projection_speeds = pd.concat([pd.read_pickle(save_loc / (dataset + '.pickle')) for dataset in datasets])
projection_speeds[:3]

Unnamed: 0,method_,dimensions,dataset,1NN_acc,5NN_acc
0,network,2,cassins_dtw,0.991,0.995
1,network,64,cassins_dtw,0.988,0.991
2,autoencoder,2,cassins_dtw,0.989,0.993


In [7]:
# load parametric tsne, vae, ae
for dataset in datasets:
    for n_components in ['2', '64']:
        save_loc =  DATA_DIR / 'knn_classifier' / str(n_components) / (dataset + '.pickle')
        try:
            metric_df = pd.read_pickle(save_loc)
            projection_speeds = pd.concat([projection_speeds, metric_df])
        except FileNotFoundError:
            print(save_loc)
            
        vae_save_loc = (
            DATA_DIR
            / "knn_classifier"
            / 'vae'
            / "train"
            / str(n_components)
            / (dataset + ".pickle")
        )
        try:
            metric_df = pd.read_pickle(vae_save_loc)
            # display(metric_df)
            projection_speeds = pd.concat([projection_speeds, metric_df])
        except FileNotFoundError:
            print(vae_save_loc)
            
        ae_save_loc = (
            DATA_DIR
            / "knn_classifier"
            / 'ae_only'
            / "train"
            / str(n_components)
            / (dataset + ".pickle")
        )
        try:
            metric_df = pd.read_pickle(ae_save_loc)
            # display(metric_df)
            projection_speeds = pd.concat([projection_speeds, metric_df])
        except FileNotFoundError:
            print(ae_save_loc)
            
            
        ##### IVIS
        
        ivis_save_loc = (
            DATA_DIR
            / "knn_classifier"
            / 'ivis'
            / "train"
            / str(n_components)
            / (dataset + ".pickle")
        )
        try:
            metric_df = pd.read_pickle(ivis_save_loc)
            metric_df['method_'] = 'ivis'
            # display(metric_df)
            projection_speeds = pd.concat([projection_speeds, metric_df])
        except FileNotFoundError:
            print(ivis_save_loc)
            
            
        #### scvis
        
        scvis_save_loc = (
            DATA_DIR
            / "knn_classifier"
            / 'scvis'
            / "train"
            / str(n_components)
            / (dataset + ".pickle")
        )
        try:
            metric_df = pd.read_pickle(scvis_save_loc)
            metric_df['method_'] = 'scvis'
            # display(metric_df)
            projection_speeds = pd.concat([projection_speeds, metric_df])
        except FileNotFoundError:
            print(scvis_save_loc)
            
            
        #### phate
        phate_save_loc = (
            DATA_DIR
            / "knn_classifier"
            / 'phate'
            / "train"
            / str(n_components)
            / (dataset + ".pickle")
        )
        try:
            metric_df = pd.read_pickle(phate_save_loc)
            metric_df['method_'] = 'phate'
            projection_speeds = pd.concat([projection_speeds, metric_df])
        except FileNotFoundError:
            print(phate_save_loc)
projection_speeds.loc[projection_speeds.dataset == 'cassins', 'dataset'] = 'cassins_dtw'

In [8]:
projection_speeds = projection_speeds[(((projection_speeds.dimensions == 64) & (projection_speeds.method_ == 'scvis')) == False)]

In [9]:
projection_speeds[projection_speeds.dataset == 'cassins_dtw']

Unnamed: 0,method_,dimensions,dataset,1NN_acc,5NN_acc
0,network,2,cassins_dtw,0.991,0.995
1,network,64,cassins_dtw,0.988,0.991
2,autoencoder,2,cassins_dtw,0.989,0.993
3,autoencoder,64,cassins_dtw,0.994,0.997
4,umap-learn,2,cassins_dtw,0.986,0.989
5,umap-learn,64,cassins_dtw,0.985,0.986
6,PCA,2,cassins_dtw,0.626,0.691
7,PCA,64,cassins_dtw,0.995,0.992
8,TSNE,2,cassins_dtw,0.988,0.991
0,parametric-tsne,2,cassins_dtw,0.986,0.993


In [10]:
projection_speeds.dataset.unique()

array(['cassins_dtw', 'cifar10', 'fmnist', 'macosko2015', 'mnist'],
      dtype=object)

In [11]:
metrics_df = projection_speeds[["method_","dimensions","dataset","1NN_acc"]].set_index(['dataset', 'dimensions'])
metrics_df = metrics_df.pivot_table(
    index=["dataset", "dimensions"],
    columns="method_",
    values="1NN_acc",
    aggfunc="first",
)
metrics_df 

Unnamed: 0_level_0,method_,PCA,TSNE,ae_only,autoencoder,ivis,network,parametric-tsne,phate,scvis,umap-learn,vae
dataset,dimensions,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
cassins_dtw,2,0.626,0.988,0.874,0.989,0.957,0.991,0.986,0.969,0.987,0.986,0.73
cassins_dtw,64,0.995,,0.995,0.994,0.995,0.988,0.995,0.995,,0.985,0.98
cifar10,2,0.1436,0.2457,0.1696,0.1592,0.1445,0.1512,0.1675,0.1599,0.138,0.1689,0.1665
cifar10,64,0.3829,,0.379,0.2223,0.3571,0.2139,0.3426,0.2249,,0.2375,0.3949
fmnist,2,0.4467,0.7825,0.6816,0.7083,0.6108,0.6941,0.6834,0.6655,0.6165,0.7144,0.6646
fmnist,64,0.8398,,0.8671,0.7772,0.8188,0.7431,0.83,0.7824,,0.7682,0.8747
macosko2015,2,0.808525,0.971658,0.94287,0.858067,0.856728,0.964294,0.966079,0.958268,0.840661,0.966525,0.854497
macosko2015,64,0.975898,,0.975006,0.957599,0.953805,0.968311,0.977237,0.974113,,0.972104,0.966972
mnist,2,0.3765,0.9411,0.7647,0.9403,0.6369,0.9402,0.9118,0.7684,0.688,0.9317,0.7241
mnist,64,0.9707,,0.9748,0.9481,0.9588,0.9518,0.9697,0.9634,,0.9449,0.9785


In [12]:
def can_float(x):
    try:
        float(x)
        if np.isnan(float(x)):
            return False
        return True
    except:
        return False

In [13]:
metric_string = (
    metrics_df[['TSNE', 'parametric-tsne', 'umap-learn',  'network', 'autoencoder',
        'scvis', 'ivis', 'phate', 'vae', 'ae_only', 'PCA']]
    .round(4)
    .to_latex()
    .replace("cassins\_dtw", "Cassin's")
    .replace("cifar10", "CIFAR10")
    .replace("fmnist", "FMNIST")
    .replace("mnist", "MNIST")
    .replace("macosko2015", "Retina")
    .replace("autoencoder", "UMAP/AE")
    .replace("ae\_only", "AE")
    .replace("network", "P. UMAP")
    .replace("umap-learn", "UMAP")
    .replace("vae", "VAE")
    .replace("pca", "PCA")
    .replace("parametric-tsne", "P. t-SNE")
    .replace("TSNE", "t-SNE")
    .replace("NaN", "-")
    .replace("scvis", "SCVIS")
    .replace("ivis", "IVIS")
    .replace("phate", "PHATE")
)

In [14]:
lines = metric_string.split('\n')
skip = 1
for line in lines:
    line_elements = line.split(' ')
    floatables = [can_float(le) for (le) in line_elements]
    floats = [float(j) for i, j in zip(floatables, line_elements) if i]
    if len(floats)> 1:
        best = np.argmax(floats[skip:])
        replace_element = np.where(floatables)[0][skip + best]
        line_elements[replace_element] = '\\textbf{' +line_elements[replace_element]+ '}'
    print(' '.join(line_elements))

\begin{tabular}{llrrrrrrrrrrr}
\toprule
      & method\_ &    t-SNE &  P. t-SNE &  UMAP &  P. UMAP &  UMAP/AE &   SCVIS &    IVIS &   PHATE &     VAE &  AE &     PCA \\
dataset & dimensions &         &                  &             &          &              &         &         &         &         &          &         \\
\midrule
Cassin's & 2  &  0.9880 &           0.9860 &      0.9860 &   \textbf{0.9910} &       0.9890 &  0.9870 &  0.9570 &  0.9690 &  0.7300 &   0.8740 &  0.6260 \\
      & 64 &     - &           \textbf{0.9950} &      0.9850 &   0.9880 &       0.9940 &     - &  0.9950 &  0.9950 &  0.9800 &   0.9950 &  0.9950 \\
CIFAR10 & 2  &  \textbf{0.2457} &           0.1675 &      0.1689 &   0.1512 &       0.1592 &  0.1380 &  0.1445 &  0.1599 &  0.1665 &   0.1696 &  0.1436 \\
      & 64 &     - &           0.3426 &      0.2375 &   0.2139 &       0.2223 &     - &  0.3571 &  0.2249 &  \textbf{0.3949} &   0.3790 &  0.3829 \\
FMNIST & 2  &  \textbf{0.7825} &           0.6834 &      0.

In [15]:
metrics_df = projection_speeds[["method_","dimensions","dataset","5NN_acc"]].set_index(['dataset', 'dimensions'])
metrics_df = metrics_df.pivot_table(
    index=["dataset", "dimensions"],
    columns="method_",
    values="5NN_acc",
    aggfunc="first",
)


metric_string = (
    metrics_df[['TSNE', 'parametric-tsne', 'umap-learn',  'network', 'autoencoder',
        'scvis', 'ivis', 'phate', 'vae', 'ae_only', 'PCA']]
    .round(4)
    .to_latex()
    .replace("cassins\_dtw", "Cassin's")
    .replace("cifar10", "CIFAR10")
    .replace("fmnist", "FMNIST")
    .replace("mnist", "MNIST")
    .replace("macosko2015", "Retina")
    .replace("autoencoder", "UMAP/AE")
    .replace("ae\_only", "AE")
    .replace("network", "P. UMAP")
    .replace("umap-learn", "UMAP")
    .replace("vae", "VAE")
    .replace("pca", "PCA")
    .replace("parametric-tsne", "P. t-SNE")
    .replace("TSNE", "t-SNE")
    .replace("NaN", "-")
    .replace("scvis", "SCVIS")
    .replace("ivis", "IVIS")
    .replace("phate", "PHATE")
)

lines = metric_string.split('\n')
skip = 1
for line in lines:
    line_elements = line.split(' ')
    floatables = [can_float(le) for (le) in line_elements]
    floats = [float(j) for i, j in zip(floatables, line_elements) if i]
    if len(floats)> 1:
        best = np.argmax(floats[skip:])
        replace_element = np.where(floatables)[0][skip + best]
        line_elements[replace_element] = '\\textbf{' +line_elements[replace_element]+ '}'
    print(' '.join(line_elements))

\begin{tabular}{llrrrrrrrrrrr}
\toprule
      & method\_ &    t-SNE &  P. t-SNE &  UMAP &  P. UMAP &  UMAP/AE &   SCVIS &    IVIS &   PHATE &     VAE &  AE &     PCA \\
dataset & dimensions &         &                  &             &          &              &         &         &         &         &          &         \\
\midrule
Cassin's & 2  &  0.9910 &           0.9930 &      0.9890 &   \textbf{0.9950} &       0.9930 &  0.9880 &  0.9740 &  0.9840 &  0.7740 &   0.9090 &  0.6910 \\
      & 64 &     - &           0.9950 &      0.9860 &   0.9910 &       0.9970 &     - &  0.9940 &  \textbf{0.9980} &  0.9880 &   0.9930 &  0.9920 \\
CIFAR10 & 2  &  \textbf{0.2608} &           0.2017 &      0.1936 &   0.1722 &       0.1833 &  0.1584 &  0.1592 &  0.1815 &  0.1941 &   0.2007 &  0.1503 \\
      & 64 &     - &           0.3556 &      0.2694 &   0.2519 &       0.2477 &     - &  \textbf{0.3800} &  0.2517 &  0.3777 &   0.3728 &  0.3769 \\
FMNIST & 2  &  \textbf{0.8039} &           0.7361 &      0.