In [None]:
import numpy as np
import pandas as pd

from sklearn import preprocessing

from framework.common.dataset import Dataset
from framework.keras.autoencoder import VariationalAutoencoder as VAE
from framework.common.util import save_data_table

In [None]:
# Model directory
model_dir = "/Users/kevin/Desktop/GBMVAE_Final"
model_config = VAE.load_config("results/gbm/vae/Train1000gGBMVAE/GBMVAE_Final")
model_config["model_dir"] = model_dir

dataset_file = "data/GSE57872_GBM/processed/gbm.1000g.centered.txt"
# GSE57872_GBM; GSE72056_Melanoma, GSE75688_Breast_Cancer
dataset_features_start_idx = 1
# Start indexes: melanoma = 1; gbm = 1; breast = 2

In [None]:
model_config["encoder_layers"] = [
    "Dense:1000:activation='elu'",
    "Dense:100:activation='elu'"
]

In [None]:
def load_dataset():
    dataset = pd.read_csv(dataset_file, sep="\t", header=0, index_col=0)
    features = dataset.iloc[:, dataset_features_start_idx:].values.astype(dtype=np.float64)
    cell_ids = dataset.index.values
    
    return dataset, cell_ids, features

dataset, cell_ids, features = load_dataset()

scaler = preprocessing.StandardScaler()
scaler.fit(features)
features_scaled = scaler.transform(features)

train_dataset = Dataset(features_scaled, features_scaled, sample_data=[cell_ids])

In [None]:
vae = VAE(model_config)
vae.train(train_dataset, epochs=3, batch_size=model_config["batch_size"])

In [None]:
results = vae.evaluate(train_dataset)
print(results)

import json
with open("/Users/kevin/Desktop/GBMVAE_Final/results.json", "w") as f:
    json.dump(results, f)

In [None]:
ae_weights = vae.autoencoder_model.get_weights()
enc_weights = vae.encoder_model.get_weights()
gen_weights = vae.generator_model.get_weights()

In [None]:
latent_reps = vae.encode(train_dataset.features)
results = np.hstack((
    np.expand_dims(train_dataset.sample_data[0], axis=1),
    latent_reps
))

header = ["cell_ids"]
for l in range(1, model_config["latent_size"] + 1):
    header.append("dim{}".format(l))
header = np.array(header)

results = np.vstack((header, results))
save_data_table(results,
                model_config["model_dir"] + "/latent_representations.txt")

In [None]:
import tensorflow as tf
import keras.backend as K
tf.reset_default_graph()
K.clear_session()
del vae

In [None]:
vae = VAE.restore(model_dir)

In [None]:
vae.evaluate(train_dataset)

In [None]:
restored_ae_weights = vae.autoencoder_model.get_weights()
restored_enc_weights = vae.encoder_model.get_weights()
restored_gen_weights = vae.generator_model.get_weights()
restored_weights = dict(zip(["ae", "enc", "gen"], 
                            [restored_ae_weights, 
                             restored_enc_weights, 
                             restored_gen_weights]))

In [None]:
orig_weights = dict(zip(["ae", "enc", "gen"], [ae_weights, enc_weights, gen_weights]))

for name, weight_list in orig_weights.items():
    for index, weights in enumerate(weight_list):
        if not np.allclose(weights, restored_weights[name][index]):
            print("Mismatch in weights:", name, "- Layer", index)

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

def fit_tsne(features):
    tsne_model = TSNE(n_components=2, init='pca', random_state=0, perplexity=30)
    return tsne_model.fit_transform(features)

tsne_output_1 = fit_tsne(latent_reps)
tsne_output_2 = fit_tsne(vae.encode(train_dataset.features))

In [None]:
plt.figure(1)
plt.scatter(tsne_output_1[:, 0], tsne_output_1[:, 1])
plt.title("Original VAE Latent Space")

plt.figure(2)
plt.scatter(tsne_output_2[:, 0], tsne_output_2[:, 1])
plt.title("Restored VAE Latent Space")

plt.show()

In [None]:
vae.__dict__

In [None]:
vae.autoencoder_model.summary()

In [None]:
vae.predict(features_scaled)