In [13]:
import numpy as np
import tensorflow as tf
from pinn import get_network
from pinn.utils import connect_dist_grad
from glob import glob
from ase.collections import g2
from pinn.io import load_qm9, sparse_batch
from pinn.optimizers import get
import time
import psutil


In [2]:
physical_devices = tf.config.list_physical_devices()
tf.config.set_visible_devices(physical_devices[0], 'CPU')
tf.config.set_visible_devices([], 'GPU')

In [14]:
filelist = glob('/Users/miguelnavaharris/Project/QM9/*.xyz')
dataset = load_qm9(filelist, splits={'train':8, 'test':2})

In [15]:
def get_traintest_sets(batch_size):
    train_set = dataset['train'].shuffle(20000).apply(sparse_batch(batch_size))
    test_set = dataset['test'].apply(sparse_batch(batch_size))
    return (train_set, test_set, batch_size)

def get_dataset_size(dataset):
    return len(list(dataset))
    
def preprocess_traintest_sets(train_set, test_set):
    for batch in train_set:
        batch = network.preprocess(batch)
        connect_dist_grad(batch)
    for batch in test_set:
        batch = network.preprocess(batch)
        connect_dist_grad(batch)

def get_compiled_network():
    optimizer = get(params['optimizer'])
    loss_fn = tf.keras.losses.mse
    network.compile(optimizer=optimizer, loss=loss_fn, metrics=[tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.MeanSquaredError()]) #setting run_eagerly=True was a possible fix for memory leak
    return network


In [16]:
# import gc
# from tensorflow.keras import backend as k
# from tensorflow.keras.callbacks import Callback


# class ClearMemory(Callback):
#     def on_epoch_end(self, epoch, logs=None):
#         gc.collect()
#         k.clear_session()


In [18]:
class MoleculesPerSec(tf.keras.callbacks.Callback):
    def __init__(self, no_batches, batch_size, logdir):
        self.no_batches = no_batches
        self.batch_size = batch_size
        self.tb_callback = tb_callback
        self.no_molecules = self.no_batches * self.batch_size
        self.batch_number = 0
        self.global_step = 0
        self.writer = tf.summary.create_file_writer(logdir)

    def on_train_batch_begin(self, batch, logs=None):
        self.batch_time_start = time.time()

    def on_train_batch_end(self, batch, logs=None):
        self.batch_number += 1
        batch_time = time.time() - self.batch_time_start
        molecules_per_second = self.batch_size / batch_time
        ram_usage_mb, swap_usage_mb = self.get_ram_and_swap_usage()

        with self.writer.as_default():
            tf.summary.scalar('batch_moleculespersec', molecules_per_second, step=self.global_step)
            tf.summary.scalar('batch_ram_usage_mb', ram_usage_mb, step=self.global_step)
            tf.summary.scalar('batch_swap_usage_mb', swap_usage_mb, step=self.global_step)

        self.global_step += 1

    def get_ram_and_swap_usage(self):
        mem_info = psutil.virtual_memory()
        ram_usage_mb = mem_info.used / (1024 * 1024)

        swap_info = psutil.swap_memory()
        swap_usage_mb = swap_info.used / (1024 * 1024)

        return ram_usage_mb, swap_usage_mb

In [19]:
params = {'optimizer': {'class_name': 'Adam', 'config': {'learning_rate': {'class_name': 'ExponentialDecay', 'config': {'initial_learning_rate': 0.0003, 'decay_steps': 10000, 'decay_rate': 0.994}}, 'clipnorm': 0.01}}, 'network': {'name': 'PiNet', 'params': {'depth': 4, 'rc': 4.0, 'atom_types': [1, 6, 7, 8, 9]}}}

In [20]:
network = get_network(params['network'])


In [21]:
train_set, test_set, batch_size = get_traintest_sets(batch_size=256)
preprocess_traintest_sets(train_set, test_set)
no_batches = get_dataset_size(train_set)
test_set_size = get_dataset_size(test_set)
steps_per_epoch = 107108 / batch_size


In [22]:
network = get_compiled_network()
logdir = '/Users/miguelnavaharris/New_Benchmarks/PiNet_TF2/' +  str(batch_size)
tb_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq=1)
moleculespersec_callback = MoleculesPerSec(no_batches, batch_size, logdir)
callbacks=[tb_callback, moleculespersec_callback]

2023-03-31 01:04:12.842101: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session initializing.
2023-03-31 01:04:12.842223: I tensorflow/core/profiler/lib/profiler_session.cc:141] Profiler session started.
2023-03-31 01:04:12.844615: I tensorflow/core/profiler/lib/profiler_session.cc:159] Profiler session tear down.


In [24]:
network.fit(train_set, epochs=1, steps_per_epoch=steps_per_epoch, validation_data=test_set, callbacks=callbacks)

Shape mismatch in elems: Tensor("pi_net_1/preprocess_layer_1/cond/Shape:0", shape=(1,), dtype=int32)




Shape mismatch in elems: Tensor("pi_net_1/preprocess_layer_1/cond/Shape:0", shape=(1,), dtype=int32)


2023-03-31 01:04:21.652579: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


  1/418 [..............................] - ETA: 57:08 - loss: 209798.8125 - mean_absolute_error: 456.8308 - mean_squared_error: 209798.8125

2023-03-31 01:04:28.889059: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session initializing.
2023-03-31 01:04:28.889070: I tensorflow/core/profiler/lib/profiler_session.cc:141] Profiler session started.


  2/418 [..............................] - ETA: 8:13 - loss: 203106.3594 - mean_absolute_error: 448.9636 - mean_squared_error: 203106.3594 

2023-03-31 01:04:30.134519: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2023-03-31 01:04:30.143956: I tensorflow/core/profiler/lib/profiler_session.cc:159] Profiler session tear down.
2023-03-31 01:04:30.151980: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: /Users/miguelnavaharris/New_Benchmarks/PiNet_TF2/256/train/plugins/profile/2023_03_31_01_04_30
2023-03-31 01:04:30.157062: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to /Users/miguelnavaharris/New_Benchmarks/PiNet_TF2/256/train/plugins/profile/2023_03_31_01_04_30/ch-gouldmac7.ch.ic.ac.uk.trace.json.gz
2023-03-31 01:04:30.164981: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: /Users/miguelnavaharris/New_Benchmarks/PiNet_TF2/256/train/plugins/profile/2023_03_31_01_04_30
2023-03-31 01:04:30.165281: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzippe



Exception ignored in: <function _EagerDefinedFunctionDeleter.__del__ at 0x13f77d670>
Traceback (most recent call last):
  File "/Users/miguelnavaharris/miniforge3/envs/pinn/lib/python3.9/site-packages/tensorflow/python/eager/function.py", line 406, in __del__
  File "/Users/miguelnavaharris/miniforge3/envs/pinn/lib/python3.9/site-packages/tensorflow/python/eager/context.py", line 2443, in remove_function
  File "/Users/miguelnavaharris/miniforge3/envs/pinn/lib/python3.9/site-packages/tensorflow/python/eager/context.py", line 1208, in remove_function
KeyboardInterrupt: 


# ASE Calculator

In [12]:
def _generator(molecule):
        data = {'coord': molecule.positions,
                'ind_1': np.zeros([len(molecule), 1]),
                'elems': molecule.numbers}
        yield data

def predict_energy(molecule):
        '''Takes an ASE Atoms object and outputs PiNet's energy prediction'''
        dtype=tf.float32
        dtypes = {'coord': dtype, 'elems': tf.int32, 'ind_1': tf.int32}
        shapes = {'coord': [None, 3], 'elems': [None], 'ind_1': [None, 1]}

        pred_dataset = tf.data.Dataset.from_generator(lambda:_generator(molecule), dtypes, shapes)

        for molecule in pred_dataset:
                molecule = network.preprocess(molecule)
                pred = network(molecule, training=False)
                ind = molecule['ind_1']
                nbatch = tf.reduce_max(ind)+1
                energy_prediction = tf.math.unsorted_segment_sum(pred, ind[:, 0], nbatch)
                energy_prediction_numpy = energy_prediction.numpy()[0]
        return energy_prediction_numpy

In [13]:
next(_generator(g2['CH4']))

{'coord': array([[ 0.      ,  0.      ,  0.      ],
        [ 0.629118,  0.629118,  0.629118],
        [-0.629118, -0.629118,  0.629118],
        [ 0.629118, -0.629118, -0.629118],
        [-0.629118,  0.629118, -0.629118]]),
 'ind_1': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'elems': array([6, 1, 1, 1, 1])}

In [14]:
predict_energy(g2['CH4'])

-39.6263