In [1]:
import pandas as pd
import numpy as np
import joblib

In [2]:
from nfp import GraphModel
from preprocessor_utils import ConcatGraphSequence
from prediction import preprocessor, model, predict_bdes

embedding_model = GraphModel(model.inputs, [model.layers[-3].input])

Using TensorFlow backend.


In [3]:
smiles = 'c1ccc2c(c1)CCc1ccccc1C2'
bond_index = 21

In [4]:
bde_df = predict_bdes(smiles)

inputs = preprocessor.predict((smiles,))
embeddings = embedding_model.predict_generator(
    ConcatGraphSequence(inputs, batch_size=128, shuffle=False), verbose=0)

100%|██████████| 1/1 [00:00<00:00, 499.80it/s]
100%|██████████| 1/1 [00:00<00:00, 598.50it/s]


In [10]:
bde_df.set_index('bond_index').loc[bond_index]

index                                                          4
molecule                                 c1ccc2c(c1)CCc1ccccc1C2
bond_type                                                    C-H
fragment1                                                    [H]
fragment2                               [CH]1Cc2ccccc2Cc2ccccc21
bde_pred                                                 85.9576
svg            <?xml version='1.0' encoding='iso-8859-1'?>\n<...
bde                                                          NaN
has_dft_bde                                                False
Name: 21, dtype: object

In [6]:
bond_embed_df = pd.read_csv('model_files/20190604_bonds_for_neighbors.csv.gz')
nbrs_pipe = joblib.load('model_files/20190604_bond_embedding_nbrs.p.z')

def pipe_kneighbors(pipe, X):
    Xt = pipe.steps[0][-1].transform(X)
    return pipe.steps[-1][-1].kneighbors(Xt)



In [7]:
distances, indices = pipe_kneighbors(
    nbrs_pipe, embeddings[inputs[0]['bond_indices'] == bond_index])

In [8]:
neighbor_df = bond_embed_df.dropna().iloc[indices.flatten()]
neighbor_df['distance'] = distances.flatten()

In [9]:
reduced_df = neighbor_df.drop_duplicates(['molecule', 'fragment1', 'fragment2']).sort_values('distance')
reduced_df

Unnamed: 0,molecule,bond_index,bde,fragment1,fragment2,distance
665861,O=C1CCc2[nH]ncc2C1,14,86.61695,[H],O=C1C[CH]c2[nH]ncc2C1,3.406792
617876,O=C1OCC2=C1CCCC2,19,84.16088,[H],O=C1OCC2=C1CCC[CH]2,3.767896
198461,c1ncc2c(n1)CCCC2,14,87.016046,[H],[CH]1CCCc2cncnc21,4.105878
821904,CN1CCC(=CCO)CC1,22,85.57152,[H],CN1C[CH]C(=CCO)CC1,4.213024
417890,O=C1NCC2=C1CCCC2,15,85.969988,[H],O=C1NCC2=C1[CH]CCC2,4.239782
644415,c1cnc2c(c1)OCCC2,19,87.013535,[H],[CH]1CCOc2cccnc21,4.304628
432763,c1cc2c(cn1)CCCC2,21,86.195891,[H],[CH]1CCCc2cnccc21,4.335591
114330,C1CCc2nonc2C1,16,88.584191,[H],[CH]1CCCc2nonc21,4.729384
