In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(context='talk', style='ticks',
        color_codes=True, rc={'legend.frameon': False})

import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras import layers

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
        
%matplotlib inline

2023-08-24 17:07:49.827816: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-24 17:07:59.092625: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /nopt/slurm/current/lib:
2023-08-24 17:07:59.092804: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-08-24 17:07:59.577808: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-08-24 17:08:

In [2]:
#load preprocess - used to convert to graph structue
import nfp
from preprocess_inputs_cfc import preprocessor
preprocessor.from_json('model_3_tfrecords_multi_halo_cfc/preprocessor.json')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#load model
class Slice(layers.Layer):
    def call(self, inputs):
        input_shape = tf.shape(inputs)
        num_bonds = input_shape[1] / 2
        output = tf.slice(inputs, [0, 0, 0], [-1, num_bonds, -1])
        output.set_shape(self.compute_output_shape(inputs.shape))
        return output

    def compute_output_shape(self, input_shape):
        return [input_shape[0], None, input_shape[2]]
    
custom_objects = {**nfp.custom_objects,'Slice':Slice}

model = tf.keras.models.load_model('model_3_multi_halo_cfc/best_model.hdf5', custom_objects=custom_objects)

2023-08-24 17:09:08.081061: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
#test compound
test = np.array(['CCC','CCCCC'])

In [5]:
#make the test data graphs

def get_data(smiles):
    input_dict = preprocessor(smiles)
    input_dict['n_atom'] = len(input_dict['atom'] )
    input_dict['n_bond'] = len(input_dict['bond'] )
    return input_dict

test_dataset = (
    tf.data.Dataset.from_generator(
        lambda:  (iter(get_data(smiles) for smiles in test)), 
        output_signature= { **preprocessor.output_signature,'n_atom': tf.TensorSpec(shape=(), dtype=tf.int32, name=None),\
        'n_bond': tf.TensorSpec(shape=(), dtype=tf.int32, name=None) })
    .padded_batch(batch_size=1000, padding_values={**preprocessor.padding_values,'n_atom': tf.constant(0, dtype="int32"),\
        'n_bond': tf.constant(0, dtype="int32")})
)

In [6]:
# model to predict
predicted_bdes = model.predict(test_dataset, verbose=True)

  inputs = self._flatten_to_reference_inputs(inputs)




In [7]:
# comparing model predicted numbers
df = pd.DataFrame(predicted_bdes.reshape(-1, 2), columns=['pred_bde','pred_bdfe'])
df.index = test[np.repeat(np.arange(predicted_bdes.shape[0]), predicted_bdes.shape[1])]

def func(x):
    x['bond_index'] = range(0, predicted_bdes.shape[1])
    return x

pred_bdes = df.reset_index().rename(columns={'index': 'molecule'})
pred_bdes = pred_bdes.groupby('molecule',group_keys=False).apply(func)
pred_bdes = pred_bdes[(pred_bdes['pred_bde'] != 0.000000) &(pred_bdes['pred_bde'] != 0.000000)]

In [8]:
# final data
pred_bdes

Unnamed: 0,molecule,pred_bde,pred_bdfe,bond_index
0,CCC,88.825073,75.127815,0
1,CCC,88.825073,75.127815,1
2,CCC,100.196999,91.183891,2
3,CCC,100.196999,91.183891,3
4,CCC,100.196999,91.183891,4
5,CCC,96.638062,87.245567,5
6,CCC,96.638062,87.245567,6
7,CCC,100.196999,91.183891,7
8,CCC,100.196999,91.183891,8
9,CCC,100.196999,91.183891,9
