In [None]:
from models import *

import numpy as np
import pandas as pd

In [22]:
# Define helper function
    
def save_reconstruction(data, vae_path, pca_path, cosine_kpca_path, sigmoid_kpca_path, rbf_kpca_path, poly_kpca_path):
    vae_rcon = pd.DataFrame(mtmodel.reconstruct(data.values),
                            columns = data.columns)
    vae_rcon.to_csv(vae_path, index=False)
    
    pca_recon = pd.DataFrame(PCA_model_.reconstruct(data),
                             columns = data.columns)
    pca_recon.to_csv(pca_path, index=False)
    
    cosine_kpca_recon = pd.DataFrame(cosine_KPCA_model_.reconstruct(data),
                             columns = data.columns)
    cosine_kpca_recon.to_csv(cosine_kpca_path, index=False)
    
    sigmoid_kpca_recon = pd.DataFrame(sigmoid_KPCA_model_.reconstruct(data),
                             columns = data.columns)
    sigmoid_kpca_recon.to_csv(sigmoid_kpca_path, index=False)
    
    rbf_kpca_recon = pd.DataFrame(rbf_KPCA_model_.reconstruct(data),
                             columns = data.columns)
    rbf_kpca_recon.to_csv(rbf_kpca_path, index=False)
    
    poly_kpca_recon = pd.DataFrame(poly_KPCA_model_.reconstruct(data),
                             columns = data.columns)
    poly_kpca_recon.to_csv(poly_kpca_path, index=False)

In [23]:
# Model and reconstruction paths
model_path = 'models/'
recon_path = 'results/reconstructions/'

# VAE model paths
path_vae     = model_path + 'VAE.h5'
path_encoder = model_path + 'VAE_encoder.h5'
path_decoder = model_path + 'VAE_decoder.h5'

# Data paths
twins_path  = 'data/TwinsUK.xls'

In [24]:
# Load datasets
twins_train_df = pd.read_excel(twins_path, sheet_name='Training Set')
twins_test_df  = pd.read_excel(twins_path, sheet_name='Testing Set')
twins_full_data = pd.concat([twins_train_df, twins_test_df], ignore_index = True)

In [25]:
# Data & model configuration
input_dim = twins_train_df.shape[1]
intermediate_dim = 200
latent_dim = 18

# Note, to reconstruct Figure 2a and 2b, which shows
# the correlation matrix MSE curve for varying latent dimensionalities d,
# run the following code in a loop with 
#latent_dims = [5, 10, 15, 18, 20, 30, 40, 60, 80, 100, 120, 160, 200]

kl_beta = 1e-2
learning_rate = 1e-3

# Instantiate model
mtmodel = mtVAE(input_dim,
                intermediate_dim,
                latent_dim,
                kl_beta,
                learning_rate)


# Load VAE model
mtmodel.vae.load_weights(path_vae)
mtmodel.encoder.load_weights(path_encoder)
mtmodel.decoder.load_weights(path_decoder)


######################
# Define PCA model
######################
PCA_model_ = PCA_model(twins_train_df.values, latent_dim)

######################
# Define KPCA models
######################
poly_KPCA_model_ = KPCA_model(twins_train_df.values, latent_dim,"poly", 2, 0.001, 3, 5.0)
cosine_KPCA_model_ = KPCA_model(twins_train_df.values,latent_dim,"cosine", 1, 0, 0, 0)
sigmoid_KPCA_model_ = KPCA_model(twins_train_df.values,latent_dim,"sigmoid", 1, 0.05, 0, 0)
rbf_KPCA_model_ = KPCA_model(twins_train_df.values,latent_dim,"rbf", 1, 0.005, 0, 0)

## Create and save data reconstructions

In [None]:
# Create save paths for reconstructed data
train_vae_recon = f'{recon_path}Twins_train_VAE_reconstruction_d_{latent_dim}.csv'
train_pca_recon = f'{recon_path}Twins_train_PCA_reconstruction_d_{latent_dim}.csv'
train_cosine_kpca_recon = f'{recon_path}Twins_train_cosine_KPCA_reconstruction_d_{latent_dim}.csv'
train_sigmoid_kpca_recon = f'{recon_path}Twins_train_sigmoid_KPCA_reconstruction_d_{latent_dim}.csv'
train_rbf_kpca_recon = f'{recon_path}Twins_train_rbf_KPCA_reconstruction_d_{latent_dim}.csv'
train_poly_kpca_recon = f'{recon_path}Twins_train_poly_KPCA_reconstruction_d_{latent_dim}.csv'

test_vae_recon = f'{recon_path}Twins_test_VAE_reconstruction_d_{latent_dim}.csv'
test_pca_recon = f'{recon_path}Twins_test_PCA_reconstruction_d_{latent_dim}.csv'
test_cosine_kpca_recon = f'{recon_path}Twins_test_cosine_KPCA_reconstruction_d_{latent_dim}.csv'
test_sigmoid_kpca_recon = f'{recon_path}Twins_test_sigmoid_KPCA_reconstruction_d_{latent_dim}.csv'
test_rbf_kpca_recon = f'{recon_path}Twins_test_rbf_KPCA_reconstruction_d_{latent_dim}.csv'
test_poly_kpca_recon = f'{recon_path}Twins_test_poly_KPCA_reconstruction_d_{latent_dim}.csv'

In [None]:
# Twins reconstruction
save_reconstruction(twins_train_df, train_vae_recon, train_pca_recon, train_cosine_kpca_recon, train_sigmoid_kpca_recon, train_rbf_kpca_recon, train_poly_kpca_recon)
save_reconstruction(twins_test_df, test_vae_recon, test_pca_recon, test_cosine_kpca_recon, test_sigmoid_kpca_recon, test_rbf_kpca_recon, test_poly_kpca_recon)