first we need to train a parametric-umap network for each dataset... (5 datasets x 2 dimensions)
For umap-learn, UMAP AE, Param. UMAP, PCA
- load dataset
- load network
- compute reconstruction MSE
- count time

In [1]:
# reload packages
%load_ext autoreload
%autoreload 2

### Choose GPU (this may not be needed on your computer)

In [2]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [3]:
import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
if len(gpu_devices)>0:
    tf.config.experimental.set_memory_growth(gpu_devices[0], True)

In [4]:
gpu_devices

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
import numpy as np
import pickle
import pandas as pd
import time
from umap import UMAP

In [6]:
from tfumap.umap import tfUMAP
import tensorflow as tf
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error, r2_score



In [7]:
from tqdm.autonotebook import tqdm

In [8]:
from tfumap.paths import ensure_dir, MODEL_DIR, DATA_DIR

In [9]:
output_dir = MODEL_DIR/'projections' 

In [10]:
reconstruction_acc_df = pd.DataFrame(columns = ['method_', 'dimensions', 'dataset', 'MSE', 'MAE', 'MedAE', 'R2'])

In [11]:
reconstruction_speed_df = pd.DataFrame(columns = ['method_', 'dimensions', 'dataset', 'embed_time', 'recon_time', 'speed', 'nex'])

### MNIST

In [12]:
dataset = 'cifar10'
dims = (32,32,3)

##### load dataset

In [13]:
from tensorflow.keras.datasets import cifar10

# load dataset
(train_images, Y_train), (test_images, Y_test) = cifar10.load_data()
X_train = (train_images/255.).astype('float32')
X_test = (test_images/255.).astype('float32')
X_train = X_train.reshape((len(X_train), np.product(np.shape(X_train)[1:])))
X_test = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

# subset a validation set
n_valid = 10000
X_valid = X_train[-n_valid:]
Y_valid = Y_train[-n_valid:].flatten()
X_train = X_train[:-n_valid]
Y_train = Y_train[:-n_valid].flatten()
Y_test = Y_test.flatten()

print(len(X_train), len(X_valid), len(X_test))

X_test_flat = X_test
X_train_flat = X_train

40000 10000 10000


In [14]:
X_test = X_test.reshape((10000, 32,32,3))

### AE 

##### 2 dims

In [15]:
load_loc = output_dir / dataset / 'autoencoder' 

In [16]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    decoding_method = "autoencoder",
    batch_size = 100,
    dims = dims
)

In [17]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [18]:
decoder = tf.keras.models.load_model((load_loc / 'decoder').as_posix())
embedder.decoder = decoder

In [19]:
X_recon = tf.nn.relu(decoder(encoder(X_test))).numpy()
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

In [20]:
MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

In [21]:
reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['AE', 2, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903


##### 64 dims

In [22]:
load_loc = output_dir / dataset / '64' / 'autoencoder' 

In [23]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    decoding_method = "autoencoder",
    batch_size = 100,
    dims = dims
)

In [24]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [25]:
decoder = tf.keras.models.load_model((load_loc / 'decoder').as_posix())
embedder.decoder = decoder

In [26]:
X_recon = tf.nn.relu(decoder(encoder(X_test))).numpy()
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

In [27]:
MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

In [28]:
reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['AE', 64, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418


### Network

##### 2 dims

In [29]:
load_loc = output_dir / dataset / 'recon-network' 

In [30]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    decoding_method = "network",
    batch_size = 100,
    dims = dims
)

In [31]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [32]:
decoder = tf.keras.models.load_model((load_loc / 'decoder').as_posix())
embedder.decoder = decoder

In [33]:
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = encoder(X_test)
    end_time = time.monotonic()
    print("seconds: ", end_time - start_time)
    embed_time = end_time - start_time

    start_time = time.monotonic()
    x_test_recon = decoder(z_test)
    end_time = time.monotonic()
    print("seconds: ", end_time - start_time)
    recon_time = end_time - start_time
    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "network",
        2,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.11679400899447501
seconds:  0.08252820000052452
seconds:  0.08703693095594645
seconds:  0.0033805971033871174
seconds:  0.13302278704941273
seconds:  0.0034420290030539036
seconds:  0.12098562996834517
seconds:  0.0034550491254776716
seconds:  0.12563082412816584
seconds:  0.003728098003193736
seconds:  0.1211826961953193
seconds:  0.003611805150285363
seconds:  0.12212398299016058
seconds:  0.0033756971824914217
seconds:  0.11870035296306014
seconds:  0.003424158087000251
seconds:  0.12043608399108052
seconds:  0.003429229138419032
seconds:  0.12483409116975963
seconds:  0.004978144075721502



In [34]:
with tf.device('/CPU:0'):
    n_repeats = 10
    for i in tqdm(range(n_repeats)):
        start_time = time.monotonic()
        z_test = encoder(X_test)
        end_time = time.monotonic()
        print("seconds: ", end_time - start_time)
        embed_time = end_time - start_time

        start_time = time.monotonic()
        x_test_recon = decoder(z_test)
        end_time = time.monotonic()
        print("seconds: ", end_time - start_time)
        recon_time = end_time - start_time
        reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
            "network-cpu",
            2,
            dataset,
            embed_time,
            recon_time,
            embed_time + recon_time,
        len(X_test_flat)
        ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.23068737308494747
seconds:  0.9231758571695536
seconds:  0.1575746249873191
seconds:  0.8764556588139385
seconds:  0.16150903794914484
seconds:  0.8700761960353702
seconds:  0.1626434710342437
seconds:  0.8703491028863937
seconds:  0.15731652709655464
seconds:  0.8661779828835279
seconds:  0.15920106205157936
seconds:  0.8594287689775229
seconds:  0.16214963607490063
seconds:  1.1919325289782137
seconds:  0.16024967189878225
seconds:  0.9024080480448902
seconds:  0.17168135195970535
seconds:  0.8785134789068252
seconds:  0.16232791217043996
seconds:  0.8657087199389935



In [35]:
X_recon = tf.nn.relu(decoder(encoder(X_test))).numpy()
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

In [36]:
MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

In [37]:
reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['network', 2, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821


##### 64 dims

In [38]:
load_loc = output_dir / dataset / '64' / 'recon-network' 

In [39]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    decoding_method = "autoencoder",
    batch_size = 100,
    dims = dims
)

In [40]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [41]:
decoder = tf.keras.models.load_model((load_loc / 'decoder').as_posix())
embedder.decoder = decoder

In [42]:
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = encoder(X_test)
    end_time = time.monotonic()
    print("seconds: ", end_time - start_time)
    embed_time = end_time - start_time

    start_time = time.monotonic()
    x_test_recon = decoder(z_test)
    end_time = time.monotonic()
    print("seconds: ", end_time - start_time)
    recon_time = end_time - start_time
    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "network",
        64,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.11812027613632381
seconds:  0.09298711200244725
seconds:  0.07644069497473538
seconds:  0.0031570810824632645
seconds:  0.12470955704338849
seconds:  0.003587723011150956
seconds:  0.12004045280627906
seconds:  0.003513262141495943
seconds:  0.12039426201954484
seconds:  0.0029313841369003057
seconds:  0.1211978062056005
seconds:  0.002724118996411562
seconds:  0.12220531492494047
seconds:  0.0031696120277047157
seconds:  0.12069964106194675
seconds:  0.002737279050052166
seconds:  0.12039983319118619
seconds:  0.002949264831840992
seconds:  0.12216116394847631
seconds:  0.003241343889385462



In [43]:
with tf.device("/CPU:0"):
    n_repeats = 10
    for i in tqdm(range(n_repeats)):
        start_time = time.monotonic()
        z_test = encoder(X_test)
        end_time = time.monotonic()
        print("seconds: ", end_time - start_time)
        embed_time = end_time - start_time

        start_time = time.monotonic()
        x_test_recon = decoder(z_test)
        end_time = time.monotonic()
        print("seconds: ", end_time - start_time)
        recon_time = end_time - start_time
        reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
            "network-cpu",
            64,
            dataset,
            embed_time,
            recon_time,
            embed_time + recon_time,
        len(X_test_flat)
        ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.2089722971431911
seconds:  0.9210848759394139
seconds:  0.163468113867566
seconds:  1.0379351479932666
seconds:  0.17269370099529624
seconds:  0.8708737690467387
seconds:  0.16924117086455226
seconds:  0.8840357079170644
seconds:  0.16331415087915957
seconds:  0.891037309076637
seconds:  0.183066519908607
seconds:  0.870317252818495
seconds:  0.1669135638512671
seconds:  0.873598787933588
seconds:  0.16841599810868502
seconds:  0.8653330879751593
seconds:  0.16915245912969112
seconds:  0.8631360351573676
seconds:  0.16732385591603816
seconds:  0.8706716930028051



In [44]:
reconstruction_speed_df

Unnamed: 0,method_,dimensions,dataset,embed_time,recon_time,speed,nex
0,network,2,cifar10,0.116794,0.082528,0.199322,10000
1,network,2,cifar10,0.087037,0.003381,0.090418,10000
2,network,2,cifar10,0.133023,0.003442,0.136465,10000
3,network,2,cifar10,0.120986,0.003455,0.124441,10000
4,network,2,cifar10,0.125631,0.003728,0.129359,10000
5,network,2,cifar10,0.121183,0.003612,0.124795,10000
6,network,2,cifar10,0.122124,0.003376,0.1255,10000
7,network,2,cifar10,0.1187,0.003424,0.122125,10000
8,network,2,cifar10,0.120436,0.003429,0.123865,10000
9,network,2,cifar10,0.124834,0.004978,0.129812,10000


In [45]:
X_recon = tf.nn.relu(decoder(encoder(X_test))).numpy()
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

In [46]:
MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

In [47]:
reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['network', 64, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113


#### UMAP-learn

##### 2 dims

In [48]:
embedder = UMAP(n_components = 2, verbose=True)
z_umap = embedder.fit_transform(X_train_flat)

UMAP(dens_frac=0.0, dens_lambda=0.0, verbose=True)
Construct fuzzy simplicial set
Sat Jul 18 11:28:25 2020 Finding Nearest Neighbors
Sat Jul 18 11:28:25 2020 Building RP forest with 15 trees
Sat Jul 18 11:28:28 2020 parallel NN descent for 15 iterations
	 0  /  15
	 1  /  15
	 2  /  15
	 3  /  15
	 4  /  15
	 5  /  15
Sat Jul 18 11:28:39 2020 Finished Nearest Neighbor Search
Sat Jul 18 11:28:42 2020 Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
Sat Jul 18 11:29:08 2020 Finished embedding


In [49]:
x_test_samples= []
x_test_recon_samples= []
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = embedder.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    embed_time = end_time - start_time

    nex = 10 # it would take far too long to reconstruct the entire dataset
    samp_idx = np.random.randint(len(z_test),  size= nex)
    sample = np.array(z_test)[samp_idx]
    x_test_samples.append(samp_idx)
    start_time = time.monotonic()
    x_test_recon = embedder.inverse_transform(sample);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    recon_time = (end_time - start_time)*len(z_test)/nex

    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "umap-learn",
        2,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]
    x_test_recon_samples.append(x_test_recon)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  23.646549155935645
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  73.80770940799266
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  10.786347480956465
	completed  0  / 

In [50]:
x_recon = np.concatenate(x_test_recon_samples)

In [51]:
x_real = np.array(X_test_flat)[np.concatenate(x_test_samples)]

In [52]:

MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['umap-learn', 2, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113
4,umap-learn,2,cifar10,0.052772,0.182507,0.155053,0.195293


##### PCA

##### 2 dims


In [53]:
pca = PCA(n_components=2)
z = pca.fit_transform(X_train_flat)

In [54]:
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = pca.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    embed_time = end_time - start_time

    start_time = time.monotonic()
    x_test_recon = pca.inverse_transform(z_test);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    recon_time = (end_time - start_time)

    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "pca",
        2,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.05136263114400208
seconds:  0.06317429197952151
seconds:  0.045511292992159724
seconds:  0.062314426992088556
seconds:  0.04579224088229239
seconds:  0.05906378314830363
seconds:  0.04555217386223376
seconds:  0.061885114992037416
seconds:  0.04603867814876139
seconds:  0.06060622679069638
seconds:  0.04569631698541343
seconds:  0.0619364760350436
seconds:  0.04559281491674483
seconds:  0.06216520210728049
seconds:  0.04589757299982011
seconds:  0.06215196312405169
seconds:  0.04593922500498593
seconds:  0.06461698398925364
seconds:  0.046300975838676095
seconds:  0.06280132103711367



In [55]:
X_recon = pca.inverse_transform(pca.transform(X_test_flat))
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['pca', 2, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113
4,umap-learn,2,cifar10,0.052772,0.182507,0.155053,0.195293
5,pca,2,cifar10,0.037042,0.151388,0.126452,0.379544


##### 64 dims

In [56]:
pca = PCA(n_components=64)
z = pca.fit_transform(X_train_flat)

In [57]:
n_repeats = 10
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    z_test = pca.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    embed_time = end_time - start_time

    start_time = time.monotonic()
    x_test_recon = pca.inverse_transform(z_test);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    recon_time = (end_time - start_time)

    reconstruction_speed_df.loc[len(reconstruction_speed_df)] = [
        "pca",
        64,
        dataset,
        embed_time,
        recon_time,
        embed_time + recon_time,
        len(X_test_flat)
    ]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.0697757019661367
seconds:  0.07490651006810367
seconds:  0.064160370035097
seconds:  0.07128763478249311
seconds:  0.06396984402090311
seconds:  0.07524386001750827
seconds:  0.06420253100804985
seconds:  0.07431941293179989
seconds:  0.06474336702376604
seconds:  0.07503970409743488
seconds:  0.0641822898760438
seconds:  0.07473708502948284
seconds:  0.06381790991872549
seconds:  0.07520176796242595
seconds:  0.06439739698544145
seconds:  0.07499905303120613
seconds:  0.06352232187055051
seconds:  0.07520194817334414
seconds:  0.06431677401997149
seconds:  0.07513528689742088



In [58]:
X_recon = pca.inverse_transform(pca.transform(X_test_flat))
x_real = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
x_recon = X_recon.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

MSE = mean_squared_error(
    x_real, 
    x_recon
)
MAE = mean_absolute_error(
    x_real, 
    x_recon
)
MedAE = median_absolute_error(
    x_real, 
    x_recon
)
R2 = r2_score(
    x_real, 
    x_recon
)

reconstruction_acc_df.loc[len(reconstruction_acc_df)] = ['pca', 64, dataset, MSE, MAE, MedAE, R2]
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113
4,umap-learn,2,cifar10,0.052772,0.182507,0.155053,0.195293
5,pca,2,cifar10,0.037042,0.151388,0.126452,0.379544
6,pca,64,cifar10,0.008396,0.066063,0.047322,0.859826


In [59]:
reconstruction_acc_df

Unnamed: 0,method_,dimensions,dataset,MSE,MAE,MedAE,R2
0,AE,2,cifar10,0.03641,0.14936,0.122907,0.3903
1,AE,64,cifar10,0.009391,0.068816,0.048234,0.843418
2,network,2,cifar10,0.036922,0.151219,0.125664,0.381821
3,network,64,cifar10,0.029988,0.132275,0.10409,0.498113
4,umap-learn,2,cifar10,0.052772,0.182507,0.155053,0.195293
5,pca,2,cifar10,0.037042,0.151388,0.126452,0.379544
6,pca,64,cifar10,0.008396,0.066063,0.047322,0.859826


### Save

In [60]:
#save_loc = DATA_DIR / 'reconstruction_speed' / (dataset + '.pickle')
#ensure_dir(save_loc)
#reconstruction_speed_df.to_pickle(save_loc)

In [61]:
save_loc = DATA_DIR / 'reconstruction_acc' / (dataset + '.pickle')
ensure_dir(save_loc)
reconstruction_acc_df.to_pickle(save_loc)