# How to use the trained molecule VAE


In [1]:
import sys
sys.path.insert(0, '..')

import molecule_vae

Using TensorFlow backend.


### You need to have a .hdf5 file with trained weights somewhere.

In [2]:
grammar_weights = "/Users/brookspaige/Downloads/zinc_vae_L56.hdf5"

In [3]:
grammar_model = molecule_vae.ZincGrammarModel(grammar_weights)



### Here are some example smiles strings.

The `encode` method takes a list of smiles strings, and maps them (deterministically) to the posterior mean estimate.

This returns a matrix of dimension `[num_inputs, 56]`.

In [4]:
smiles = ["C[C@@H]1CN(C(=O)c2cc(Br)cn2C)CC[C@H]1[NH3+]",
          "CC[NH+](CC)[C@](C)(CC)[C@H](O)c1cscc1Br",
          "O=C(Nc1nc[nH]n1)c1cccnc1Nc1cccc(F)c1",
          "Cc1c(/C=N/c2cc(Br)ccn2)c(O)n2c(nc3ccccc32)c1C#N",
          "CSc1nncn1/N=C\c1cc(Cl)ccc1F"]

In [5]:
z1 = grammar_model.encode(smiles)
print z1.shape

(5, 56)


### Decoding is stochastic.

Calling the `decode` method will draw a single sample from the decoding distribution.

In [6]:
for mol in grammar_model.decode(z1):
    print mol

C[C@@H]1CN(C(=O)c2cc(Br)cs2C)CC[C@H]1[NH3+]
CC[NH+](CC)[C@](C)(CC(C)(C)(c1)c)scc1BrOCO
O=C(Nc1nc[nH])n1c1cccnc1Nc1cccc(F)c1
Cc1c(/N=N/c2cc(Br)cs)c2c(=O)c3c(nc3ccc)cc422c1
CSc1ncnn1/CN\c1\cc(Cl)ccc1F


In [7]:
for mol in grammar_model.decode(z1):
    print mol

C[C@@H]1CN(C(=O)c2cc(Br)cs2C)CC[C@H]1[NH3+]
CC(=O)NC(C)[C@][C@H](C)[C@H](O)c1nccc1
O=C(Nc1nc[nH])n1c1cccnn1Nc1cccc(F)c1
Cc1c(C=N/c2cc(Br)csc2)c(C)c2c(nc3cccc)cc2c1=C
COc1ccsn1C=N/c1/cc(c)cc/c1Fc


## The character model has the same interface as the grammar model.

In [8]:
char_weights = "/Users/brookspaige/Downloads/zinc_str_vae_L56_E99.hdf5"

In [9]:
char_model = molecule_vae.ZincCharacterModel(char_weights)

In [10]:
z2 = char_model.encode(smiles)
print z2.shape

(5, 56)


The encoded smiles strings in the character model are also each 56-dimensional vectors.

In [11]:
for mol in char_model.decode(z2):
    print mol

C[C@@H]1CN(C(=O)c2cc(Br)cc2C)CC[C@H]1[NH3+]
CC[NH+](CC)[C@](C)(CC)[C@H](O)c1cscc1Br
O=C(Nc1nn[nH]n1)c1ccccc1Nc1cccc(F)c1
Cn1c(/N=C/c2cc(Br)cc22)c(C)n2c(nc3ccccc32)c1C#N
CCc1nnnc1/N=C/c1cc(Cl)ccc1F


In [12]:
for mol in char_model.decode(z2):
    print mol

C[C@@H]1CN(C(=O)c2cc(Br)cn3C)CC[C@H]1[NH3+]
CC[NH+](CC)[C@](C)(CC)[C@H](O)c1cscc1Br
O=C(Nc1nc[nC]n1)c1cccnc1Nc1cccc(F)c1
Cc1c(/N=C/c2cc(Br)cc2Fcc(C)n2c(nc3ccccc32)c1C#N
CNc1nnnn1/N=C\c1cc(Cl)ccc1F
