In [1]:
%matplotlib inline

import h5py
import numpy as np
import matplotlib.pyplot as plt

from autoencoder.model import MoleculeVAE
from autoencoder.utils import one_hot_array, one_hot_index, from_one_hot_array, \
    decode_smiles_from_indexes, load_dataset

from keras.models import Model
from keras.layers import Input

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

from IPython.display import SVG
from keras.utils.visualize_util import model_to_dot

Using TensorFlow backend.


In [2]:
data, charset = load_dataset('data/all_smiles_120_one_hot.h5', split = False)

In [3]:
model = MoleculeVAE()
model.load(charset, 'target/model.h5', latent_rep_size = 292)

In [4]:
mol = data[0].reshape(1, 120, 55)

decode_smiles_from_indexes(mol[0].argmax(axis = 1), charset)

'COc1cc2C(=O)c3ccccc3Oc2cc1O'

# encoder/decoder not functioning properly

There are three `Model`s defined in `autoencoder/model.py`: autoencoder, encoder, and decoder. They all share weights. I've manually checked that the weights are being loaded properly. Running the autoencoder model forward works just fine:

In [5]:
y = model.autoencoder.predict(mol)
y

array([[[  9.96431845e-06,   6.24905488e-06,   2.08488107e-08, ...,
           9.50873618e-06,   1.27682796e-07,   1.37490214e-10],
        [  2.64823520e-05,   1.10168279e-04,   2.88211510e-07, ...,
           8.31406197e-08,   2.01843545e-06,   5.11892369e-11],
        [  8.30612294e-07,   6.02378520e-08,   1.85686980e-10, ...,
           3.79500875e-09,   2.59887997e-06,   2.54319829e-12],
        ..., 
        [  9.99999881e-01,   4.83881372e-11,   1.30446365e-09, ...,
           4.54286525e-10,   2.41011603e-11,   7.87176964e-13],
        [  9.99999404e-01,   8.72303757e-11,   2.35188136e-09, ...,
           2.70053735e-09,   7.95984043e-11,   5.27387758e-13],
        [  9.99999046e-01,   1.50223139e-10,   4.28040003e-09, ...,
           5.68195491e-09,   5.57447644e-10,   4.44262438e-13]]], dtype=float32)

In [6]:
decode_smiles_from_indexes(y[0].argmax(axis = 1), charset)

'COc1cc2C(=O)c3ccccc3)c2cc1O'

# but running the encoder and decoder models together doesn't work at all and I'm at a loss for why:

In [7]:
x_latent = model.encoder.predict(mol)
y_hat = model.decoder.predict(x_latent)
y_hat

array([[[ 0.0196742 ,  0.01981948,  0.01690715, ...,  0.01669697,
          0.01540486,  0.01897484],
        [ 0.01967454,  0.01930014,  0.01631894, ...,  0.01600836,
          0.01431536,  0.01968929],
        [ 0.0193207 ,  0.01795789,  0.0159542 , ...,  0.01556105,
          0.01336346,  0.02021892],
        ..., 
        [ 0.01481041,  0.00914408,  0.01681479, ...,  0.01505758,
          0.01204179,  0.01918893],
        [ 0.01481041,  0.00914408,  0.01681479, ...,  0.01505758,
          0.01204179,  0.01918893],
        [ 0.01481041,  0.00914408,  0.0168148 , ...,  0.01505758,
          0.01204179,  0.01918893]]], dtype=float32)

In [8]:
decode_smiles_from_indexes(y_hat[0].argmax(axis = 1), charset)



# the weight matrices seem to be loaded totally fine:

In [9]:
model.encoder.get_layer("z_mean").get_weights()[0] == model.autoencoder.get_layer("z_mean").get_weights()[0]

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ..., 
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]], dtype=bool)

In [10]:
model.decoder.get_layer("decoded_mean").get_weights()[0] == model.autoencoder.get_layer("decoded_mean").get_weights()[0]

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ..., 
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]], dtype=bool)