# How to use the trained molecule VAE


In [1]:
import sys
sys.path.insert(0, '..')

import molecule_vae

Using TensorFlow backend.


### You need to have a .hdf5 file with trained weights somewhere.

In [2]:
grammar_weights = "/Users/brooks/Downloads/zinc_vae_L56.hdf5"

In [3]:
grammar_model = molecule_vae.ZincGrammarModel(grammar_weights)



### Here are some example smiles strings.

The `encode` method takes a list of smiles strings, and maps them (deterministically) to the posterior mean estimate.

This returns a matrix of dimension `[num_inputs, 56]`.

In [4]:
smiles = ["C[C@@H]1CN(C(=O)c2cc(Br)cn2C)CC[C@H]1[NH3+]",
          "CC[NH+](CC)[C@](C)(CC)[C@H](O)c1cscc1Br",
          "O=C(Nc1nc[nH]n1)c1cccnc1Nc1cccc(F)c1",
          "Cc1c(/C=N/c2cc(Br)ccn2)c(O)n2c(nc3ccccc32)c1C#N"]

In [5]:
z1 = grammar_model.encode(smiles)
print z1.shape

(4, 56)


### Decoding is stochastic.

Calling the `decode` method will draw a single sample from the decoding distribution.

In [6]:
for mol in grammar_model.decode(z1):
    print mol

C[C@@H]1CN(C(=O)c2cc(Br)cs2C)CC[C@H]1[NH3+]
CC[NH+](CC[C@](C))CCC[C@H](O)c1scc
O=C(Nc1nc[nH])n1c1cccnc1Nc1cccc(F)c1
Cc1c(C=N/c2cc(Br)Osc2)c(=C)c2c(nc3cccc)c32c1C#N


In [7]:
for mol in grammar_model.decode(z1):
    print mol

C[C@@H]1CN(C(=O)c2cc(Br)cn2C)CC[C@H]1[NH3+]
CC=[NH+](C)C[C@](C)(C)([C@H](O))c1sscc1Br
OC(Nc1nn[nH]c1c1)cccnn1Nc1cccc(F)c1F
cc1c(C#N\\c2cc(Br)Osc2)c(=O)n3c(nc3ccc)cc322c1C


## The character model has the same interface as the grammar model.

In [8]:
char_weights = "/Users/brooks/Downloads/zinc_str_vae_L56_E99.hdf5"

In [9]:
char_model = molecule_vae.ZincCharacterModel(char_weights)

In [10]:
z2 = char_model.encode(smiles)
print z2.shape

(4, 56)


### The character decoder has some "minor issues"

The charset is in the wrong order at the moment. (At the very least it seems like 's' should actually be ' ', and probably ' ' is 'C'...)

In [11]:
for mol in char_model.decode(z2):
    print mol

  +  S #12(#12(3# ( )-@)# (@)-()-)# (//-( +sssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss
 C  # 3)+@)@@+ ( +#))@/)# ## ((@@-(++sssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss
21 +  -# 1   -(   +(  3+   +- 44  ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss
 3#   C3(    ( ))@)# ()-)+6 #3(# (((@@- ( -  +ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss


In [12]:
for mol in char_model.decode(z2):
    print mol

  +  # #12(#12(3# ( )-/)# (@)-()-)# (@/-  +sssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss
 C    3)+@))@++( +#)-+/)#  # ((@@-( ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss
21 +3 -# F1  -( # +(+ 3+   ++(3+   sssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss
 3#   C3(  #3(  +@)#2()-)#S # (# # (@@# (@+  +ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss
