first we need to train a parametric-umap network for each dataset... (5 datasets x 2 dimensions)
For umap-learn, UMAP AE, Param. UMAP, PCA
- load dataset
- load network
- compute reconstruction MSE
- count time

In [1]:
# reload packages
%load_ext autoreload
%autoreload 2

### Choose GPU (this may not be needed on your computer)

In [2]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [3]:
import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
if len(gpu_devices)>0:
    tf.config.experimental.set_memory_growth(gpu_devices[0], True)

In [4]:
gpu_devices

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
import numpy as np
import pickle
import pandas as pd
import time
from umap import UMAP

In [6]:
from tfumap.umap import tfUMAP
import tensorflow as tf
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error, r2_score



In [7]:
from tqdm.autonotebook import tqdm

In [8]:
from tfumap.paths import ensure_dir, MODEL_DIR, DATA_DIR

In [9]:
output_dir = MODEL_DIR/'projections' 

In [10]:
reconstruction_acc_df = pd.DataFrame(columns = ['method_', 'dimensions', 'dataset', 'MSE', 'MAE', 'MedAE', 'R2'])

In [11]:
reconstruction_speed_df = pd.DataFrame(columns = ['method_', 'dimensions', 'dataset', 'embed_time', 'recon_time', 'speed', 'nex'])

### MNIST

In [12]:
dataset = 'cifar10'
dims = (32,32,3)

##### load dataset

In [13]:
from tensorflow.keras.datasets import cifar10

# load dataset
(train_images, Y_train), (test_images, Y_test) = cifar10.load_data()
X_train = (train_images/255.).astype('float32')
X_test = (test_images/255.).astype('float32')
X_train = X_train.reshape((len(X_train), np.product(np.shape(X_train)[1:])))
X_test = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

# subset a validation set
n_valid = 10000
X_valid = X_train[-n_valid:]
Y_valid = Y_train[-n_valid:].flatten()
X_train = X_train[:-n_valid]
Y_train = Y_train[:-n_valid].flatten()
Y_test = Y_test.flatten()

print(len(X_train), len(X_valid), len(X_test))

X_test_flat = X_test
X_train_flat = X_train

40000 10000 10000


In [14]:
X_test = X_test.reshape((10000, 32,32,3))

### AE 

##### 2 dims

In [15]:
load_loc = output_dir / dataset / 'autoencoder' 

In [16]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    decoding_method = "autoencoder",
    batch_size = 100,
    dims = dims
)

In [17]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [18]:
decoder = tf.keras.models.load_model((load_loc / 'decoder').as_posix())
embedder.decoder = decoder

In [19]:
X_recon = decoder(encoder(X_test)).numpy()
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

In [20]:
MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

In [21]:
reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['AE', 2, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903


##### 64 dims

In [22]:
load_loc = output_dir / dataset / '64' / 'autoencoder' 

In [23]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    decoding_method = "autoencoder",
    batch_size = 100,
    dims = dims
)

In [24]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [25]:
decoder = tf.keras.models.load_model((load_loc / 'decoder').as_posix())
embedder.decoder = decoder

In [26]:
X_recon = decoder(encoder(X_test)).numpy()
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

In [27]:
MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

In [28]:
reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['AE', 64, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418


### Network

##### 2 dims

In [29]:
load_loc = output_dir / dataset / 'recon-network' 

In [30]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    decoding_method = "network",
    batch_size = 100,
    dims = dims
)

In [31]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [32]:
decoder = tf.keras.models.load_model((load_loc / 'decoder').as_posix())
embedder.decoder = decoder

In [33]:
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = encoder(X_test)
    end_time = time.monotonic()
    print("seconds: ", end_time - start_time)
    embed_time = end_time - start_time

    start_time = time.monotonic()
    x_test_recon = decoder(z_test)
    end_time = time.monotonic()
    print("seconds: ", end_time - start_time)
    recon_time = end_time - start_time
    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "network",
        2,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.11485811113379896
seconds:  0.09156258194707334
seconds:  0.08690015599131584
seconds:  0.003268125932663679
seconds:  0.12388642504811287
seconds:  0.003580765100196004
seconds:  0.11969384318217635
seconds:  0.0032865761313587427
seconds:  0.1210703831166029
seconds:  0.0034980319906026125
seconds:  0.12065843096934259
seconds:  0.0032458449713885784
seconds:  0.11982168722897768
seconds:  0.00336117809638381
seconds:  0.1206874018535018
seconds:  0.003353468142449856
seconds:  0.11967774201184511
seconds:  0.003355917986482382
seconds:  0.12496622698381543
seconds:  0.0034395901020616293



In [34]:
with tf.device('/CPU:0'):
    n_repeats = 10
    for i in tqdm(range(n_repeats)):
        start_time = time.monotonic()
        z_test = encoder(X_test)
        end_time = time.monotonic()
        print("seconds: ", end_time - start_time)
        embed_time = end_time - start_time

        start_time = time.monotonic()
        x_test_recon = decoder(z_test)
        end_time = time.monotonic()
        print("seconds: ", end_time - start_time)
        recon_time = end_time - start_time
        reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
            "network-cpu",
            2,
            dataset,
            embed_time,
            recon_time,
            embed_time + recon_time,
        len(X_test_flat)
        ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.22668297495692968
seconds:  0.928623448126018
seconds:  0.15982816508039832
seconds:  0.886087556835264
seconds:  0.1591781449969858
seconds:  0.8952258629724383
seconds:  0.16626196214929223
seconds:  0.8977729480247945
seconds:  0.16659339098259807
seconds:  0.8925167541019619
seconds:  0.1672428110614419
seconds:  0.9846575430128723
seconds:  0.1640205371659249
seconds:  0.899480827851221
seconds:  0.16422246280126274
seconds:  1.1240649910178035
seconds:  0.18245329381898046
seconds:  0.9477057550102472
seconds:  0.17511625005863607
seconds:  0.910248912172392



In [35]:
X_recon = decoder(encoder(X_test)).numpy()
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

In [36]:
MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

In [37]:
reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['network', 2, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821


##### 64 dims

In [38]:
load_loc = output_dir / dataset / '64' / 'recon-network' 

In [39]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    decoding_method = "autoencoder",
    batch_size = 100,
    dims = dims
)

In [40]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [41]:
decoder = tf.keras.models.load_model((load_loc / 'decoder').as_posix())
embedder.decoder = decoder

In [42]:
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = encoder(X_test)
    end_time = time.monotonic()
    print("seconds: ", end_time - start_time)
    embed_time = end_time - start_time

    start_time = time.monotonic()
    x_test_recon = decoder(z_test)
    end_time = time.monotonic()
    print("seconds: ", end_time - start_time)
    recon_time = end_time - start_time
    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "network",
        64,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.11657201196067035
seconds:  0.09330560197122395
seconds:  0.07561178598552942
seconds:  0.0032574948854744434
seconds:  0.12267446890473366
seconds:  0.0035495131742209196
seconds:  0.12008333392441273
seconds:  0.00329367583617568
seconds:  0.12001422210596502
seconds:  0.005914292996749282
seconds:  0.11670337500981987
seconds:  0.0029000239446759224
seconds:  0.12145541282370687
seconds:  0.002969837049022317
seconds:  0.1248045212123543
seconds:  0.0028595540206879377
seconds:  0.11898522218689322
seconds:  0.003568924032151699
seconds:  0.1207721249666065
seconds:  0.0034503908827900887



In [43]:
with tf.device("/CPU:0"):
    n_repeats = 10
    for i in tqdm(range(n_repeats)):
        start_time = time.monotonic()
        z_test = encoder(X_test)
        end_time = time.monotonic()
        print("seconds: ", end_time - start_time)
        embed_time = end_time - start_time

        start_time = time.monotonic()
        x_test_recon = decoder(z_test)
        end_time = time.monotonic()
        print("seconds: ", end_time - start_time)
        recon_time = end_time - start_time
        reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
            "network-cpu",
            64,
            dataset,
            embed_time,
            recon_time,
            embed_time + recon_time,
        len(X_test_flat)
        ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.20894349785521626
seconds:  0.9680911200121045
seconds:  0.16628512204624712
seconds:  0.8796881511807442
seconds:  0.15982554387301207
seconds:  0.8832851140759885
seconds:  0.16065200814045966
seconds:  0.9567414498887956
seconds:  0.1698929681442678
seconds:  0.9116363830398768
seconds:  0.16931391088292003
seconds:  0.8939382159151137
seconds:  0.1601123819127679
seconds:  0.909452049061656
seconds:  0.16660675196908414
seconds:  0.9099991449620575
seconds:  0.1702697779983282
seconds:  0.8896017700899392
seconds:  0.15965068899095058
seconds:  0.890405602985993



In [44]:
reconstruction_speed_df

Unnamed: 0,method_,dimensions,dataset,embed_time,recon_time,speed,nex
0,network,2,cifar10,0.114858,0.091563,0.206421,10000
1,network,2,cifar10,0.0869,0.003268,0.090168,10000
2,network,2,cifar10,0.123886,0.003581,0.127467,10000
3,network,2,cifar10,0.119694,0.003287,0.12298,10000
4,network,2,cifar10,0.12107,0.003498,0.124568,10000
5,network,2,cifar10,0.120658,0.003246,0.123904,10000
6,network,2,cifar10,0.119822,0.003361,0.123183,10000
7,network,2,cifar10,0.120687,0.003353,0.124041,10000
8,network,2,cifar10,0.119678,0.003356,0.123034,10000
9,network,2,cifar10,0.124966,0.00344,0.128406,10000


In [45]:
X_recon = decoder(encoder(X_test)).numpy()
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

In [46]:
MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

In [47]:
reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['network', 64, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113


#### UMAP-learn

##### 2 dims

In [48]:
embedder = UMAP(n_components = 2, verbose=True)
z_umap = embedder.fit_transform(X_train_flat)

UMAP(dens_frac=0.0, dens_lambda=0.0, verbose=True)
Construct fuzzy simplicial set
Fri Jul 17 15:48:24 2020 Finding Nearest Neighbors
Fri Jul 17 15:48:24 2020 Building RP forest with 15 trees
Fri Jul 17 15:48:26 2020 parallel NN descent for 15 iterations
	 0  /  15
	 1  /  15
	 2  /  15
	 3  /  15
	 4  /  15
	 5  /  15
Fri Jul 17 15:48:37 2020 Finished Nearest Neighbor Search
Fri Jul 17 15:48:40 2020 Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
Fri Jul 17 15:49:06 2020 Finished embedding


In [49]:
x_test_samples= []
x_test_recon_samples= []
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = embedder.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    embed_time = end_time - start_time

    nex = 10 # it would take far too long to reconstruct the entire dataset
    samp_idx = np.random.randint(len(z_test),  size= nex)
    sample = np.array(z_test)[samp_idx]
    x_test_samples.append(samp_idx)
    start_time = time.monotonic()
    x_test_recon = embedder.inverse_transform(sample);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    recon_time = (end_time - start_time)*len(z_test)/nex

    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "umap-learn",
        2,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]
    x_test_recon_samples.append(x_test_recon)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  23.39345172396861
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  78.7236777881626
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  10.731074622832239
	completed  0  /  1

In [50]:
x_recon = np.concatenate(x_test_recon_samples)

In [51]:
x_real = np.array(X_test_flat)[np.concatenate(x_test_samples)]

In [52]:

MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['umap-learn', 2, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113
4,umap-learn,2,cifar10,0.047413,0.174495,0.149409,0.254802


##### PCA

##### 2 dims


In [53]:
pca = PCA(n_components=2)
z = pca.fit_transform(X_train_flat)

In [54]:
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = pca.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    embed_time = end_time - start_time

    start_time = time.monotonic()
    x_test_recon = pca.inverse_transform(z_test);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    recon_time = (end_time - start_time)

    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "pca",
        2,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.06524737412109971
seconds:  0.061934988014400005
seconds:  0.04584545409306884
seconds:  0.0626235178206116
seconds:  0.045959727140143514
seconds:  0.06004590401425958
seconds:  0.04591950587928295
seconds:  0.0617062218952924
seconds:  0.045964577002450824
seconds:  0.06206604209728539
seconds:  0.04595038597472012
seconds:  0.059127748012542725
seconds:  0.04589886497706175
seconds:  0.061126384884119034
seconds:  0.04601204791106284
seconds:  0.06070400308817625
seconds:  0.04586034291423857
seconds:  0.061398983001708984
seconds:  0.045916936127468944
seconds:  0.06117443693801761



In [55]:
X_recon = pca.inverse_transform(pca.transform(X_test_flat))
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['pca', 2, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113
4,umap-learn,2,cifar10,0.047413,0.174495,0.149409,0.254802
5,pca,2,cifar10,0.037042,0.151388,0.126452,0.379544


##### 64 dims

In [56]:
pca = PCA(n_components=64)
z = pca.fit_transform(X_train_flat)

In [57]:
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = pca.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    embed_time = end_time - start_time

    start_time = time.monotonic()
    x_test_recon = pca.inverse_transform(z_test);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    recon_time = (end_time - start_time)

    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "pca",
        64,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.08457015198655427
seconds:  0.07463448494672775
seconds:  0.06355920480564237
seconds:  0.07206980115734041
seconds:  0.06367199798114598
seconds:  0.0750897778198123
seconds:  0.0641158721409738
seconds:  0.07459666416980326
seconds:  0.06427785591222346
seconds:  0.07422176306135952
seconds:  0.06382953305728734
seconds:  0.07524773199111223
seconds:  0.0634234210010618
seconds:  0.07289729500189424
seconds:  0.06402558879926801
seconds:  0.07427029497921467
seconds:  0.06378019182011485
seconds:  0.08525394299067557
seconds:  0.06958561902865767
seconds:  0.07472653803415596



In [58]:
X_recon = pca.inverse_transform(pca.transform(X_test_flat))
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['pca', 64, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113
4,umap-learn,2,cifar10,0.047413,0.174495,0.149409,0.254802
5,pca,2,cifar10,0.037042,0.151388,0.126452,0.379544
6,pca,64,cifar10,0.008396,0.066066,0.047329,0.859842


In [61]:
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113
4,umap-learn,2,cifar10,0.047413,0.174495,0.149409,0.254802
5,pca,2,cifar10,0.037042,0.151388,0.126452,0.379544
6,pca,64,cifar10,0.008396,0.066066,0.047329,0.859842


### Save

In [77]:
save_loc = DATA_DIR / 'reconstruction_speed' / (dataset + '.pickle')
ensure_dir(save_loc)
reconstruction_speed_df.to_pickle(save_loc)

In [60]:
save_loc = DATA_DIR / 'reconstruction_acc' / (dataset + '.pickle')
ensure_dir(save_loc)
reconstruction_acc_df.to_pickle(save_loc)