In [1]:
import numpy as np
import pandas as pd

from sklearn.manifold import trustworthiness
from sklearn.metrics import silhouette_score

from tqdm.autonotebook import tqdm

from tfumap.paths import ensure_dir, MODEL_DIR, DATA_DIR

output_dir = MODEL_DIR/'projections' 



In [2]:
metrics_df = pd.read_pickle(DATA_DIR / 'projection_metrics.pickle')
# metrics_df= pd.read_pickle(DATA_DIR / 'projection_metrics_test.pickle')

In [3]:
metrics_df[:3]

Unnamed: 0,dataset,class_,dim,trustworthiness,silhouette_score,silhouette_samples
0,mnist,umap-learn,2,0.960056,0.518626,"[0.639216248568865, 0.781801993664271, 0.44323..."
1,mnist,direct,2,0.960416,0.519683,"[0.6649555493948871, 0.7389523838324524, 0.464..."
2,mnist,network,2,0.957346,0.555885,"[0.6068383792564549, 0.8137081492037734, 0.522..."


In [4]:
datasets = [
    'cassins_dtw',
    'cifar10',
    'fmnist',
    'macosko2015',
    'mnist'
]

In [5]:
from IPython.display import display

In [6]:
# load parametric t-SNE / vae / ae
for dataset in datasets:
    for n_components in ["2", "64"]:
        parametric_tsne_save_loc = (
            DATA_DIR
            / "projection_metrics"
            / "train"
            / str(n_components)
            / (dataset + ".pickle")
        )
        try:
            metric_df = pd.read_pickle(parametric_tsne_save_loc)
            # display(metric_df)
            metrics_df = pd.concat([metrics_df, metric_df])
        except FileNotFoundError:
            print(parametric_tsne_save_loc)
            
        vae_save_loc = (
            DATA_DIR
            / "projection_metrics"
            / 'vae'
            / "train"
            / str(n_components)
            / (dataset + ".pickle")
        )
        try:
            metric_df = pd.read_pickle(vae_save_loc)
            # display(metric_df)
            metrics_df = pd.concat([metrics_df, metric_df])
        except FileNotFoundError:
            print(vae_save_loc, 'test')
            
            
        ae_save_loc = (
            DATA_DIR
            / "projection_metrics"
            / 'ae_only'
            / "train"
            / str(n_components)
            / (dataset + ".pickle")
        )
        try:
            metric_df = pd.read_pickle(ae_save_loc)
            # display(metric_df)
            metrics_df = pd.concat([metrics_df, metric_df])
        except FileNotFoundError:
            print(ae_save_loc)

In [7]:
metrics_df.loc[metrics_df.dataset == 'cassins', 'dataset'] = 'cassins_dtw'

In [75]:
metrics_df_sil = metrics_df[["dataset","class_","dim","silhouette_score"]].set_index(['dataset', 'dim'])
metrics_df_sil = metrics_df_sil.pivot_table(
    index=["dataset", "dim"],
    columns="class_",
    values="silhouette_score",
    aggfunc="first",
)
metrics_df_sil 

Unnamed: 0_level_0,class_,PCA,TSNE,ae_only,autoencoder,direct,network,parametric-tsne,umap-learn,vae
dataset,dim,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
cassins_dtw,2,0.073145,0.543132,0.112507,0.771364,0.730696,0.801333,0.743944,0.774884,0.085296
cassins_dtw,64,0.291449,,0.241136,0.827067,0.742424,0.817335,0.229868,0.853613,0.158339
cifar10,2,-0.114246,-0.121588,-0.143642,-0.131968,-0.134247,-0.135945,-0.275726,-0.133964,-0.11143
cifar10,64,-0.058005,,-0.064358,-0.11718,-0.11504,-0.116342,-0.053554,-0.116621,-0.052915
fmnist,2,-0.033089,0.125117,0.042654,0.206028,0.194861,0.213923,0.201303,0.193571,0.106445
fmnist,64,0.061794,,0.065501,0.230527,0.229873,0.231502,0.054349,0.219534,0.0376
macosko2015,2,0.40088,0.015058,0.43944,0.397324,0.186503,0.451885,0.25781,0.27998,0.44486
macosko2015,64,0.418754,,0.428861,0.466169,0.349337,0.465248,-0.021432,0.352174,0.387342
mnist,2,0.022789,0.349754,-0.025757,0.463664,0.519683,0.555885,0.371015,0.518626,0.062725
mnist,64,0.056866,,0.065334,0.516617,0.541104,0.557076,0.0488,0.527598,0.043106


In [84]:
metric_string = (
    metrics_df_sil[["TSNE", 'parametric-tsne', "umap-learn", "network", "autoencoder", "ae_only", "vae", "PCA"]]
    .round(4)
    .to_latex()
    .replace("cassins\_dtw", "Cassin's")
    .replace("cifar10", "CIFAR10")
    .replace("fmnist", "FMNIST")
    .replace("mnist", "MNIST")
    .replace("macosko2015", "Retina")
    .replace("autoencoder", "UMAP/AE")
    .replace("ae\_only", "AE")
    .replace("network", "P. UMAP")
    .replace("umap-learn", "UMAP")
    .replace("vae", "VAE")
    .replace("pca", "PCA")
    .replace("parametric-tsne", "P. t-SNE")
    .replace("TSNE", "t-SNE")
    .replace("NaN", "-")
)

In [91]:
def can_float(x):
    try:
        float(x)
        if np.isnan(float(x)):
            return False
        return True
    except:
        return False

In [92]:
print(
    metric_string
)

\begin{tabular}{llrrrrrrrr}
\toprule
      & class\_ &    TSNE &  parametric-tsne &  UMAP &  P. UMAP &  UMAP/AE &  AE &     VAE &     PCA \\
dataset & dim &         &                  &             &          &              &          &         &         \\
\midrule
Cassin's & 2  &  0.9949 &           0.9867 &      0.9758 &   0.9756 &       0.9777 &   0.9488 &  0.8976 &  0.8380 \\
      & 64 &     NaN &           0.9990 &      0.9831 &   0.9840 &       0.9907 &   0.9981 &  0.9949 &  0.9999 \\
CIFAR10 & 2  &  0.9216 &           0.7773 &      0.8310 &   0.8187 &       0.8273 &   0.8564 &  0.8510 &  0.8202 \\
      & 64 &     NaN &           0.9971 &      0.9209 &   0.9140 &       0.9199 &   0.9992 &  0.9913 &  0.9996 \\
FMNIST & 2  &  0.9906 &           0.9827 &      0.9777 &   0.9733 &       0.9842 &   0.9803 &  0.9751 &  0.9126 \\
      & 64 &     NaN &           0.9991 &      0.9897 &   0.9894 &       0.9913 &   0.9991 &  0.9960 &  0.9995 \\
Retina & 2  &  0.9702 &           0.9463 & 

In [93]:
lines = metric_string.split('\n')
skip = 1
for line in lines:
    line_elements = line.split(' ')
    floatables = [can_float(le) for (le) in line_elements]
    floats = [float(j) for i, j in zip(floatables, line_elements) if i]
    if len(floats)> 1:
        lowest = np.argmax(floats[skip:])
        replace_element = np.where(floatables)[0][skip + lowest]
        line_elements[replace_element] = '\\textbf{' +line_elements[replace_element]+ '}'
    print(' '.join(line_elements))

\begin{tabular}{llrrrrrrrr}
\toprule
      & class\_ &    TSNE &  parametric-tsne &  UMAP &  P. UMAP &  UMAP/AE &  AE &     VAE &     PCA \\
dataset & dim &         &                  &             &          &              &          &         &         \\
\midrule
Cassin's & 2  &  \textbf{0.9949} &           0.9867 &      0.9758 &   0.9756 &       0.9777 &   0.9488 &  0.8976 &  0.8380 \\
      & 64 &     NaN &           0.9990 &      0.9831 &   0.9840 &       0.9907 &   0.9981 &  0.9949 &  \textbf{0.9999} \\
CIFAR10 & 2  &  \textbf{0.9216} &           0.7773 &      0.8310 &   0.8187 &       0.8273 &   0.8564 &  0.8510 &  0.8202 \\
      & 64 &     NaN &           0.9971 &      0.9209 &   0.9140 &       0.9199 &   0.9992 &  0.9913 &  \textbf{0.9996} \\
FMNIST & 2  &  \textbf{0.9906} &           0.9827 &      0.9777 &   0.9733 &       0.9842 &   0.9803 &  0.9751 &  0.9126 \\
      & 64 &     NaN &           0.9991 &      0.9897 &   0.9894 &       0.9913 &   0.9991 &  0.9960 &  \textbf{

In [94]:
floats

[]

In [95]:
metrics_df_sil

Unnamed: 0_level_0,class_,PCA,TSNE,ae_only,autoencoder,direct,network,parametric-tsne,umap-learn,vae
dataset,dim,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
cassins_dtw,2,0.837998,0.994897,0.948826,0.977733,0.975666,0.975571,0.986715,0.975799,0.897559
cassins_dtw,64,0.99986,,0.998053,0.990695,0.987792,0.983971,0.998983,0.983063,0.994884
cifar10,2,0.820158,0.921586,0.856425,0.827345,0.845165,0.818671,0.777304,0.830992,0.851039
cifar10,64,0.999552,,0.999158,0.91994,0.910493,0.913951,0.99712,0.920946,0.991305
fmnist,2,0.912567,0.99055,0.980272,0.984236,0.979961,0.973298,0.98272,0.977682,0.975138
fmnist,64,0.999465,,0.999142,0.991316,0.986902,0.9894,0.99909,0.989691,0.996019
macosko2015,2,0.74454,0.970151,0.806296,0.917325,0.951348,0.943548,0.946283,0.949427,0.75334
macosko2015,64,1.0,,0.99215,0.95422,0.964634,0.962785,0.991776,0.970824,0.93418
mnist,2,0.743419,0.987363,0.96633,0.967531,0.960416,0.957346,0.965484,0.960056,0.951298
mnist,64,0.999917,,0.999713,0.990507,0.984672,0.987972,0.999686,0.989493,0.999385


In [96]:
metrics_df_sil = metrics_df[["dataset","class_","dim","trustworthiness"]].set_index(['dataset', 'dim'])
metrics_df_sil = metrics_df_sil.pivot_table(
    index=["dataset", "dim"],
    columns="class_",
    values="trustworthiness",
    aggfunc="first",
) 
metrics_df_sil

Unnamed: 0_level_0,class_,PCA,TSNE,ae_only,autoencoder,direct,network,parametric-tsne,umap-learn,vae
dataset,dim,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
cassins_dtw,2,0.837998,0.994897,0.948826,0.977733,0.975666,0.975571,0.986715,0.975799,0.897559
cassins_dtw,64,0.99986,,0.998053,0.990695,0.987792,0.983971,0.998983,0.983063,0.994884
cifar10,2,0.820158,0.921586,0.856425,0.827345,0.845165,0.818671,0.777304,0.830992,0.851039
cifar10,64,0.999552,,0.999158,0.91994,0.910493,0.913951,0.99712,0.920946,0.991305
fmnist,2,0.912567,0.99055,0.980272,0.984236,0.979961,0.973298,0.98272,0.977682,0.975138
fmnist,64,0.999465,,0.999142,0.991316,0.986902,0.9894,0.99909,0.989691,0.996019
macosko2015,2,0.74454,0.970151,0.806296,0.917325,0.951348,0.943548,0.946283,0.949427,0.75334
macosko2015,64,1.0,,0.99215,0.95422,0.964634,0.962785,0.991776,0.970824,0.93418
mnist,2,0.743419,0.987363,0.96633,0.967531,0.960416,0.957346,0.965484,0.960056,0.951298
mnist,64,0.999917,,0.999713,0.990507,0.984672,0.987972,0.999686,0.989493,0.999385


In [98]:
metric_string = (
    metrics_df_sil[["TSNE", 'parametric-tsne', "umap-learn", "network", "autoencoder", "ae_only", "vae", "PCA"]]
    .round(4)
    .to_latex()
    .replace("cassins\_dtw", "Cassin's")
    .replace("cifar10", "CIFAR10")
    .replace("fmnist", "FMNIST")
    .replace("mnist", "MNIST")
    .replace("macosko2015", "Retina")
    .replace("autoencoder", "UMAP/AE")
    .replace("ae\_only", "AE")
    .replace("network", "P. UMAP")
    .replace("umap-learn", "UMAP")
    .replace("vae", "VAE")
    .replace("pca", "PCA")
    .replace("parametric-tsne", "P. t-SNE")
    .replace("TSNE", "t-SNE")
    .replace("NaN", "-")
)

lines = metric_string.split('\n')
skip = 1
for line in lines:
    line_elements = line.split(' ')
    floatables = [can_float(le) for (le) in line_elements]
    floats = [float(j) for i, j in zip(floatables, line_elements) if i]
    if len(floats)> 1:
        lowest = np.argmax(floats[skip:])
        replace_element = np.where(floatables)[0][skip + lowest]
        line_elements[replace_element] = '\\textbf{' +line_elements[replace_element]+ '}'
    print(' '.join(line_elements))

\begin{tabular}{llrrrrrrrr}
\toprule
      & class\_ &    t-SNE &  P. t-SNE &  UMAP &  P. UMAP &  UMAP/AE &  AE &     VAE &     PCA \\
dataset & dim &         &                  &             &          &              &          &         &         \\
\midrule
Cassin's & 2  &  \textbf{0.9949} &           0.9867 &      0.9758 &   0.9756 &       0.9777 &   0.9488 &  0.8976 &  0.8380 \\
      & 64 &     - &           0.9990 &      0.9831 &   0.9840 &       0.9907 &   0.9981 &  0.9949 &  \textbf{0.9999} \\
CIFAR10 & 2  &  \textbf{0.9216} &           0.7773 &      0.8310 &   0.8187 &       0.8273 &   0.8564 &  0.8510 &  0.8202 \\
      & 64 &     - &           0.9971 &      0.9209 &   0.9140 &       0.9199 &   0.9992 &  0.9913 &  \textbf{0.9996} \\
FMNIST & 2  &  \textbf{0.9906} &           0.9827 &      0.9777 &   0.9733 &       0.9842 &   0.9803 &  0.9751 &  0.9126 \\
      & 64 &     - &           0.9991 &      0.9897 &   0.9894 &       0.9913 &   0.9991 &  0.9960 &  \textbf{0.9995} \\
R