In [1]:
# tensorflow backend
from os import environ
environ['KERAS_BACKEND'] = 'tensorflow'
# vae stuff
from chemvae.vae_utils import VAEUtils
from chemvae import mol_utils as mu
# import scientific py
import numpy as np
import pandas as pd
# rdkit stuff
from rdkit.Chem import AllChem as Chem
from rdkit.Chem import PandasTools
# plotting stuff
import matplotlib.pyplot as plt
import matplotlib as mpl
from IPython.display import SVG, display
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
vae = VAEUtils(directory='chemical_vae/models/zinc_properties')

In [None]:
def process(molecule):
    smiles = mu.canon_smiles(molecule)

    hot = vae.smiles_to_hot(smiles, canonize_smiles=True)
    encoded = vae.encode(hot)

    print('{:30s} : {}'.format('Input',smiles))
    print('{:30s} : {} with norm {:.3f}'.format('Encoded (Z representation)', encoded.shape, np.linalg.norm(encoded)))

    print('{:30s} : '.format('Decoded (several attempts)'))
    noise = 5.0
    decoded = vae.z_to_smiles(encoded, decode_attempts=100, noise_norm=noise)
    print('Found {:d} unique mols, out of {:d}'.format(len(set(decoded['smiles'])), sum(decoded['count'])))
    print('SMILES\n', decoded.smiles)
    display(PandasTools.FrameToGridImage(decoded, column='mol', legendsCol='smiles', molsPerRow=5))
    
    prop = vae.predict_prop_Z(encoded)[0]
    print('{:30s} : {}'.format('Properties (qed,SAS,logP)', prop))

In [None]:
process('Cc1ccc(S2(=O)=NC(=O)Nc3ccccc32)cc1')

In [None]:
process('CN(Cc1ccc2c(c1)C(=O)CC2)C(=O)OC(C)(C)C')

In [None]:
process('COC(=O)C1CCC(Oc2ccc(NC(=O)C(=O)NN)cn2)CC1')