In [1]:
import numpy as np
import tensorflow as tf
from pinn import get_network
from pinn.utils import connect_dist_grad
from glob import glob
from ase.collections import g2
from pinn.io import load_qm9, sparse_batch
from pinn.optimizers import get
import psutil
import os
import time
from pinn.layers import PolynomialBasis, GaussianBasis, ANNOutput
from pinn.networks.pinet import OutLayer, GCBlock, ResUpdate, PreprocessLayer
from pinn.utils import atomic_dress, get_atomic_dress
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
import gc
import csv
from tensorflow.keras import backend as K

Init Plugin
Init Graph Optimizer
Init Kernel


In [2]:
filelist = glob('/Users/miguelnavaharris/Project/QM9/*.xyz')[:100]
dataset = load_qm9(filelist, splits={'train':8, 'test':2})
dress, error = get_atomic_dress(dataset['train'],[1,6,7,8,9])

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2023-04-19 19:10:24.331005: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-04-19 19:10:24.331097: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2023-04-19 19:10:24.348323: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2023-04-19 19:10:24.348440: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [3]:
def get_traintest_sets(dataset,batch_size, buffer_size=20000):
    train_set = dataset['train'].cache().shuffle(buffer_size).apply(sparse_batch(batch_size))
    test_set = dataset['test'].cache().apply(sparse_batch(batch_size))
    return (train_set, test_set, batch_size)

def get_dataset_size(dataset):
    return len(list(dataset))
    
def preprocess_traintest_sets(train_set, test_set, network):
    for batch in train_set:
        batch = network.preprocess(batch)
        connect_dist_grad(batch)
    for batch in test_set:
        batch = network.preprocess(batch)
        connect_dist_grad(batch)

def get_compiled_network(network):
    optimizer = get(params['optimizer'])
    loss_fn = tf.keras.losses.mse
    network.compile(optimizer=optimizer, loss=loss_fn, metrics=[tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.MeanSquaredError()]) #setting run_eagerly=True was a possible fix for memory leak
    return network


In [4]:
class PiNet(tf.keras.Model):
    """Keras model for the PiNet neural network

    Args:
        tensors: input data (nested tensor from dataset).
        atom_types (list): elements for the one-hot embedding.
        pp_nodes (list): number of nodes for pp layer.
        pi_nodes (list): number of nodes for pi layer.
        ii_nodes (list): number of nodes for ii layer.
        en_nodes (list): number of nodes for en layer.
        depth (int): number of interaction blocks.
        rc (float): cutoff radius.
        basis_type (string): type of basis function to use,
            can be "polynomial" or "gaussian".
        n_basis (int): number of basis functions to use.
        gamma (float or array): width of gaussian function for gaussian basis.
        center (float or array): center of gaussian function for gaussian basis.
        cutoff_type (string): cutoff function to use with the basis.
        act (string): activation function to use.
        preprocess (bool): whether to return the preprocessed tensor.
    """
    def __init__(self, atom_types=[1, 6, 7, 8, 9],  rc=4.0, cutoff_type='f1',
                 basis_type='polynomial', n_basis=4, gamma=3.0, center=None,
                 pp_nodes=[16, 16], pi_nodes=[16, 16], ii_nodes=[16, 16],
                 out_nodes=[16, 16], out_units=1, out_pool=False,
                 act='tanh', depth=4):

        super(PiNet, self).__init__()

        self.depth = depth
        self.preprocess = PreprocessLayer(atom_types, rc)
        self.activation = act

        if basis_type == 'polynomial':
            self.basis_fn = PolynomialBasis(cutoff_type, rc, n_basis)
        elif basis_type == 'gaussian':
            self.basis_fn = GaussianBasis(cutoff_type, rc, n_basis, gamma, center)

        self.res_update = [ResUpdate() for i in range(depth)]
        self.gc_blocks = [GCBlock([], pi_nodes, ii_nodes, activation=act)]
        self.gc_blocks += [GCBlock(pp_nodes, pi_nodes, ii_nodes, activation=act)
                           for i in range(depth-1)]
        self.out_layers = [OutLayer(out_nodes, out_units) for i in range(depth)]
        self.ann_output =  ANNOutput(out_pool)
    
    def train_step(self, tensors):
        with tf.GradientTape() as tape:
            pred = self(tensors, training=True)
            ind = tensors['ind_1']
            nbatch = tf.reduce_max(ind)+1
            pred = tf.math.unsorted_segment_sum(pred, ind[:, 0], nbatch)
            e_data = tensors['e_data']
            if params['params']['e_dress']:
                e_data -= atomic_dress(tensors, params['params']['e_dress'], dtype=pred.dtype)
            e_data *= params['params']['e_scale']
            loss = self.compiled_loss(e_data, pred, regularization_losses=self.losses)

        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(e_data, pred)
        return {m.name: m.result() for m in self.metrics}

    
    def test_step(self, tensors):
        pred = self(tensors, training=False)
        ind = tensors['ind_1']
        nbatch = tf.reduce_max(ind)+1
        pred = tf.math.unsorted_segment_sum(pred, ind[:, 0], nbatch)
        e_data = tensors['e_data']
        if params['params']['e_dress']:
            e_data -= atomic_dress(tensors, params['params']['e_dress'], dtype=pred.dtype)
        e_data *= params['params']['e_scale']
        self.compiled_loss(e_data, pred, regularization_losses=self.losses)
        self.compiled_metrics.update_state(e_data, pred)
        return {m.name: m.result() for m in self.metrics}
    

    def call(self, tensors):
        tensors = self.preprocess(tensors)
        basis = self.basis_fn(tensors['dist'])[:, None, :]
        output = 0.0
        for i in range(self.depth):
            prop = self.gc_blocks[i]([tensors['ind_2'], tensors['prop'], basis])
            output = self.out_layers[i]([tensors['ind_1'], prop, output])
            tensors['prop'] = self.res_update[i]([tensors['prop'], prop])

        output = self.ann_output([tensors['ind_1'], output])
        return output

In [5]:
params = {'optimizer': {
        'class_name': 'Adam',
        'config': {
            'learning_rate': {
                'class_name': 'ExponentialDecay',
                'config': {
                    'initial_learning_rate': 1e-3,
                    'decay_steps': 10000, 
                    'decay_rate': 0.994}}, 
                    'clipnorm': 0.01}},    
            'params': {
                  'learning_rate': 1e-3, # Relatively large learning rate
                  'e_scale': 627.5, # Here we scale the model to kcal/mol
                  'e_dress': dress
              }
          }

In [6]:
def create_and_train_model(batch_size=20):
    filelist = glob('/Users/miguelnavaharris/Project/QM9/*.xyz')[:100]
    dataset = load_qm9(filelist, splits={'train':8, 'test':2})
    dress, error = get_atomic_dress(dataset['train'],[1,6,7,8,9])
    network = PiNet()
    train_set, test_set, batch_size = get_traintest_sets(dataset, batch_size, buffer_size=100)
    preprocess_traintest_sets(train_set, test_set, network)
    network = get_compiled_network(network)
    network.fit(train_set, epochs=2,  validation_data=test_set)

    return network


def measure_memory():
    process = psutil.Process()
    initial_memory = process.memory_info().rss / (1024 * 1024)  # Convert bytes to MB
    initial_swap = psutil.swap_memory().used / (1024 * 1024)  # Convert bytes to MB
    print(f"Memory usage before training: {initial_memory:.3f} MB")
    print(f"Swap usage before training: {initial_swap:.3f} MB")

    with open('/Users/miguelnavaharris/New_Benchmarks/Garbage_collection/withclearsession/PiNet_TF2_M1_GPU_100mols_gc_memory_usage.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['Iteration', 'Memory Usage (MB)', 'Swap Usage (MB)'])
        csv_writer.writerow([0, initial_memory, initial_swap])

        for i in range(10):
            model = create_and_train_model()
            del model
            gc.collect()
            K.clear_session()
            memory_after_iteration = process.memory_info().rss / (1024 * 1024)  # Convert bytes to MB
            swap_after_iteration = psutil.swap_memory().used / (1024 * 1024)  # Convert bytes to MB
            print(f"Memory usage after iteration {i + 1}: {memory_after_iteration:.3f} MB")
            print(f"Swap usage after iteration {i + 1}: {swap_after_iteration:.3f} MB")
            csv_writer.writerow([i + 1, memory_after_iteration, swap_after_iteration])

measure_memory()


Memory usage before training: 378.875 MB
Swap usage before training: 967.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.




Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:26.803403: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 3s 301ms/step - loss: 2198.6025 - mean_absolute_error: 37.6069 - mean_squared_error: 2198.6025Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:28.886076: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 1: 1588.453 MB
Swap usage after iteration 1: 967.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:32.222285: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 3s 356ms/step - loss: 1763.6272 - mean_absolute_error: 34.7459 - mean_squared_error: 1763.6271Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:34.388926: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 2: 2173.734 MB
Swap usage after iteration 2: 967.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:38.266338: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 3s 378ms/step - loss: 2274.0688 - mean_absolute_error: 37.9192 - mean_squared_error: 2274.0686Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:40.591995: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 3: 2677.578 MB
Swap usage after iteration 3: 967.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:44.445728: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 3s 459ms/step - loss: 2083.6619 - mean_absolute_error: 36.8743 - mean_squared_error: 2083.6619Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:47.137809: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 4: 3137.781 MB
Swap usage after iteration 4: 967.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:51.132726: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 4s 546ms/step - loss: 2342.3860 - mean_absolute_error: 38.8772 - mean_squared_error: 2342.3860Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:54.036883: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 5: 3643.234 MB
Swap usage after iteration 5: 967.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:10:58.414852: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 4s 627ms/step - loss: 1764.7405 - mean_absolute_error: 34.3938 - mean_squared_error: 1764.7405Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:11:01.849494: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 6: 4048.516 MB
Swap usage after iteration 6: 959.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:11:06.111881: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 4s 647ms/step - loss: 2202.9617 - mean_absolute_error: 37.7333 - mean_squared_error: 2202.9617Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:11:09.822427: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 7: 4580.891 MB
Swap usage after iteration 7: 959.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:11:14.498326: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 4s 681ms/step - loss: 2138.7087 - mean_absolute_error: 38.2270 - mean_squared_error: 2138.7087Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:11:18.042405: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 8: 5064.484 MB
Swap usage after iteration 8: 959.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:11:22.865271: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 4s 635ms/step - loss: 2361.7888 - mean_absolute_error: 40.7609 - mean_squared_error: 2361.7888Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:11:26.534106: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 9: 5544.891 MB
Swap usage after iteration 9: 959.438 MB
Epoch 1/2
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)
Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:11:31.274796: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


      4/Unknown - 5s 658ms/step - loss: 2113.5964 - mean_absolute_error: 38.1081 - mean_squared_error: 2113.5964Shape mismatch in elems: Tensor("pi_net/preprocess_layer/cond/Shape:0", shape=(1,), dtype=int32)


2023-04-19 19:11:35.460165: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2
Memory usage after iteration 10: 5429.234 MB
Swap usage after iteration 10: 959.438 MB
