In [1]:
import numpy as np
import tensorflow as tf
from pinn import get_network
from pinn.utils import connect_dist_grad, atomic_dress
from glob import glob
from ase.collections import g2
from pinn.io import load_qm9, sparse_batch
from pinn.optimizers import get

Init Plugin
Init Graph Optimizer
Init Kernel


In [2]:
filelist = glob('/Users/miguelnavaharris/Project/QM9/*.xyz')
dataset = load_qm9(filelist, splits={'train':8, 'test':2})

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-05-21 18:16:29.685571: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-21 18:16:29.685696: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
def get_traintest_sets(dataset=None, buffer_size=None, batch_size=None):
    train_set = dataset['train'].shuffle(buffer_size).apply(sparse_batch(batch_size))
    test_set = dataset['test'].apply(sparse_batch(batch_size))
    return (train_set, test_set, batch_size)

### Normal params

In [4]:
params = {'optimizer': {'class_name': 'Adam', 'config': {'learning_rate': {'class_name': 'ExponentialDecay', 'config': {'initial_learning_rate': 0.0003, 'decay_steps': 10000, 'decay_rate': 0.994}}, 'clipnorm': 0.01}}, 'network': {'name': 'PiNet', 'params': {'depth': 4, 'rc': 4.0, 'atom_types': [1, 6, 7, 8, 9]}}, 'e_dress': {}, 'e_scale': 1.0}

In [5]:
network = get_network(params['network'])
print(network)

<pinn.networks.pinet.PiNet object at 0x157a78250>


In [6]:
def preprocess_traintest_sets(train_set, test_set, network=None):
    for batch in train_set:
        batch = network.preprocess(batch)
        connect_dist_grad(batch)
    for batch in test_set:
        batch = network.preprocess(batch)
        connect_dist_grad(batch)

In [7]:
# import time

# batch_sizes = [256, 512]
# for batch_size in batch_sizes:
#     network = get_network(params['network'])
#     start_time = time.time()
#     train_set, test_set = get_traintest_sets(batch_size)
#     preprocess_traintest_sets(train_set, test_set)
#     print(f'Done in {time.time() - start_time} s')


In [8]:
# Get and preprocess data
import time
print('Beginning preprocessing')
start_time_preprocess = time.time()
train_set, test_set, batch_size = get_traintest_sets(dataset=dataset, buffer_size=1000, batch_size=256)
preprocess_traintest_sets(train_set, test_set, network=network)
print(f'Preprocessing time: {(time.time() - start_time_preprocess)} s')
with train_summary_writer.as_default():
    tf.summary.scalar(f'Preprocessing time', (time.time() - start_time), step=batch_size)

Beginning preprocessing
Preprocessing time: 0.09306597709655762 s


2022-05-21 18:16:40.149384: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2022-05-21 18:16:40.149633: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [9]:
def train_and_evaluate_network(network=None, params=None, train_set=None, test_set=None, batch_size=None, epochs=1):
    import time
    import gc

    # Instantiate an optimizer
    from pinn.optimizers import get
    optimizer = get(params['optimizer'])
    # Define a loss function
    loss_fn = tf.keras.losses.mse
    # Define metrics
    train_loss_metric = tf.keras.metrics.MeanSquaredError()
    val_loss_metric = tf.keras.metrics.MeanSquaredError()
    train_err_metric = tf.keras.metrics.RootMeanSquaredError()
    val_err_metric = tf.keras.metrics.RootMeanSquaredError()
    

    for epoch in range(epochs):
        print("\nStart of epoch %d" % (epoch,))
        start_time_epoch = time.time()
        hund_step_times = []

        # Iterate over the batches of the dataset.
        for step, batch in enumerate(train_set):
            
            

            # Open a GradientTape to record the operations run
            # during the forward pass, which enables auto-differentiation.
            with tf.GradientTape() as tape:

                # Run the forward pass of the layer.
                # The operations that the layer applies
                # to its inputs are going to be recorded
                # on the GradientTape.
                pred = network(batch, training=True)  # Logits for this minibatch

                ind = batch['ind_1']
                nbatch = tf.reduce_max(ind)+1
                pred = tf.math.unsorted_segment_sum(pred, ind[:, 0], nbatch)
                e_data = batch['e_data']

                if params['e_dress']:
                    e_data -= atomic_dress(batch, params['e_dress'], dtype=pred.dtype)
                    e_data *= params['e_scale']


                # Compute the loss value for this minibatch.
                loss_value = loss_fn(e_data, pred)

            # Use the gradient tape to automatically retrieve
            # the gradients of the trainable variables with respect to the loss.
            grads = tape.gradient(loss_value, network.trainable_weights)

            # Run one step of gradient descent by updating
            # the value of the variables to minimize the loss.
            optimizer.apply_gradients(zip(grads, network.trainable_weights))

            # Update the loss and error metrics
            train_loss_metric.update_state(e_data, pred)
            train_err_metric.update_state(e_data, pred)



            # Log every 100 batches.
            if step == 0:
                print(f"Initial loss (for one batch): {float(loss_value)}")
                print(f"Seen so far: {((step + 1) * batch_size)} molecules")

                # Reset the weights for different batch sizes
                network.save_weights('initial_weights.h5')

                # with train_summary_writer.as_default():
                #     tf.summary.scalar('Training loss', train_loss_metric.result(), step=step)
                #     tf.summary.scalar('Training error', train_err_metric.result(), step=step)


            elif step % 20 == 0:
                print(f"Training loss (for one batch) at step {step}: {float(loss_value)}")
                print(f"Seen so far: {((step + 1) * batch_size)} molecules")
                hund_step_times += [(time.time() - start_time_epoch)]
                print(f'Training time for 20 batches: {((hund_step_times[-1] - hund_step_times[-2]) if len(hund_step_times) > 1 else hund_step_times[-1])} s')

                # Record tensorboad metrics
                # with train_summary_writer.as_default():
                #     tf.summary.scalar('Training loss', train_loss_metric.result(), step=step)
                #     tf.summary.scalar('Training error', train_err_metric.result(), step=step)
                #     tf.summary.scalar('Training time/20 batches', ((hund_step_times[-1] - hund_step_times[-2]) if len(hund_step_times) > 1 else hund_step_times[-1]), step=step)


        print(f'Training time for epoch {epoch + 1}: {(time.time() - start_time_epoch)} s')



        # #Update the training metric now that the network has been trained
        # print(f'Calculating training error for epoch {(epoch + 1)}')
        # for batch in train_set:
        #     pred = network(batch, training=False)  # Logits for this minibatch

        #     ind = batch['ind_1']
        #     nbatch = tf.reduce_max(ind)+1
        #     pred = tf.math.unsorted_segment_sum(pred, ind[:, 0], nbatch)
        #     train_err_metric.update_state(e_data, pred)
        
        # Display metrics at the end of each epoch
        train_err = train_err_metric.result()
        print(f"Training err over epoch {(epoch + 1)}: {float(train_err)}")

        # Reset training metrics at the end of each epoch
        train_err_metric.reset_states()

        # Run a validation loop at the end of each epoch
        print(f'Starting validation for epoch {(epoch + 1)}')
        for step, batch in enumerate(test_set):
            val_pred = network(batch, training=False)
            ind = batch['ind_1']
            nbatch = tf.reduce_max(ind)+1
            val_pred = tf.math.unsorted_segment_sum(val_pred, ind[:, 0], nbatch)
            e_data = batch['e_data']

            if params['e_dress']:
                e_data -= atomic_dress(batch, params['e_dress'], dtype=pred.dtype)
                e_data *= params['e_scale']


            # Update val metrics
            val_loss_metric.update_state(e_data, val_pred)
            val_err_metric.update_state(e_data, val_pred)

            # Record Tensorboard metrics
            # if step % 5 == 0:
                
            #     with test_summary_writer.as_default():
            #         tf.summary.scalar('Validation loss', val_loss_metric.result(), step=step)
            #         tf.summary.scalar('Validation error', val_err_metric.result(), step=step)


        val_err = val_err_metric.result()
        val_err_metric.reset_states()
        print(f"Validation err for epoch {(epoch + 1)}: {float(val_err)}")
        print(f"Time taken for epoch {epoch + 1}: {(time.time() - start_time_epoch)} s")




In [10]:
train_and_evaluate_network(network=network, params=params, train_set=train_set, test_set=test_set, batch_size=batch_size, epochs=1)


Start of epoch 0
Training time for epoch 1: 0.01108098030090332 s
Training err over epoch 1: 0.0
Starting validation for epoch 1
Validation err for epoch 1: 0.0
Time taken for epoch 1: 0.0794680118560791 s


In [10]:
import time
import gc
# 64, 128, 256, 300, 350, 400, 450, 500, 
batch_sizes = [256, 256, 512, 550, 600, 1024, 2048]
for batch_size in batch_sizes:
    
    # Set up summary writers    
    train_log_dir = '/Users/miguelnavaharris/Project/tensorboard_logs/batch_sizes/train/' + str(batch_size)
    test_log_dir = '/Users/miguelnavaharris/Project/tensorboard_logs/batch_sizes/test/' + str(batch_size)
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    test_summary_writer = tf.summary.create_file_writer(test_log_dir)
    network = get_network(params['network'])

    start_time = time.time()    
    train_and_evaluate_network(batch_size=batch_size, epochs=1)
    with train_summary_writer.as_default():
        tf.summary.scalar(f'Run time', (time.time() - start_time), step=batch_size)
    
    del network 
    gc.collect()      
    tf.keras.backend.clear_session()
    
    

    

Beginning preprocessing


2022-05-09 18:29:17.070097: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2022-05-09 18:29:17.070984: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Preprocessing time: 29.446027994155884 s

Start of epoch 0
Initial loss (for one batch): 163182.375
Seen so far: 256 molecules
Training loss (for one batch) at step 20: 14883.09375
Seen so far: 5376 molecules
Training time for 20 batches: 9.639225959777832 s
Training loss (for one batch) at step 40: 9566.8857421875
Seen so far: 10496 molecules
Training time for 20 batches: 9.715719938278198 s
Training loss (for one batch) at step 60: 3928.34423828125
Seen so far: 15616 molecules
Training time for 20 batches: 9.832310914993286 s
Training loss (for one batch) at step 80: 2708.810546875
Seen so far: 20736 molecules
Training time for 20 batches: 10.863059043884277 s
Training loss (for one batch) at step 100: 1443.9049072265625
Seen so far: 25856 molecules
Training time for 20 batches: 11.278488159179688 s
Training loss (for one batch) at step 120: 1278.47265625
Seen so far: 30976 molecules
Training time for 20 batches: 10.747613906860352 s
Training loss (for one batch) at step 140: 837.166

KeyboardInterrupt: 

In [2]:
%load_ext tensorboard
%tensorboard --logdir /Users/miguelnavaharris/Project/tensorboard_logs/batch_sizes

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 41693), started 0:00:14 ago. (Use '!kill 41693' to kill it.)

In [22]:
def _generator(molecule):
        data = {'coord': molecule.positions,
                'ind_1': np.zeros([len(molecule), 1]),
                'elems': molecule.numbers}
        yield data

def predict_energy(molecule, network=None, params=None):
        '''Takes an ASE Atoms object and outputs PiNet's energy prediction'''
        dtype=tf.float32
        dtypes = {'coord': dtype, 'elems': tf.int32, 'ind_1': tf.int32}
        shapes = {'coord': [None, 3], 'elems': [None], 'ind_1': [None, 1]}

        pred_dataset = tf.data.Dataset.from_generator(lambda:_generator(molecule), dtypes, shapes)

        for molecule in pred_dataset:
                print('unprocessed molecule:', molecule)
                molecule = network.preprocess(molecule)
                print('preprocessed molecule:', molecule)
                pred = network(molecule, training=False)
                ind = molecule['ind_1']
                nbatch = tf.reduce_max(ind)+1
                pred = pred/params['e_scale']
                if params['e_dress']:
                        pred += atomic_dress(molecule, params['e_dress'], dtype=pred.dtype)
                energy_prediction = tf.math.unsorted_segment_sum(pred, ind[:, 0], nbatch)
                energy_prediction_numpy = energy_prediction.numpy()[0]
        return energy_prediction_numpy

In [23]:
next(_generator(g2['CH4']))

{'coord': array([[ 0.      ,  0.      ,  0.      ],
        [ 0.629118,  0.629118,  0.629118],
        [-0.629118, -0.629118,  0.629118],
        [ 0.629118, -0.629118, -0.629118],
        [-0.629118,  0.629118, -0.629118]]),
 'ind_1': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'elems': array([6, 1, 1, 1, 1])}

In [24]:
g2['CH4'].positions.shape

(5, 3)

In [None]:
from ase import Atoms
atoms = Atoms('H3')

In [25]:
predict_energy(g2['CH4'], network=network, params=params)

unprocessed molecule: {'coord': <tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[ 0.      ,  0.      ,  0.      ],
       [ 0.629118,  0.629118,  0.629118],
       [-0.629118, -0.629118,  0.629118],
       [ 0.629118, -0.629118, -0.629118],
       [-0.629118,  0.629118, -0.629118]], dtype=float32)>, 'elems': <tf.Tensor: shape=(5,), dtype=int32, numpy=array([6, 1, 1, 1, 1], dtype=int32)>, 'ind_1': <tf.Tensor: shape=(5, 1), dtype=int32, numpy=
array([[0],
       [0],
       [0],
       [0],
       [0]], dtype=int32)>}
preprocessed molecule: {'coord': <tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[ 0.      ,  0.      ,  0.      ],
       [ 0.629118,  0.629118,  0.629118],
       [-0.629118, -0.629118,  0.629118],
       [ 0.629118, -0.629118, -0.629118],
       [-0.629118,  0.629118, -0.629118]], dtype=float32)>, 'elems': <tf.Tensor: shape=(5,), dtype=int32, numpy=array([6, 1, 1, 1, 1], dtype=int32)>, 'ind_1': <tf.Tensor: shape=(5, 1), dtype=int32, numpy=
array([[0],
     

-69.87514

# Trying shifted softplus activation function

In [7]:
from keras import backend as K

def shifted_softplus(x):
    return K.log((0.5*K.exp(x) + 0.5))

In [8]:
params = {'optimizer': {'class_name': 'Adam', 'config': {'learning_rate': {'class_name': 'ExponentialDecay', 'config': {'initial_learning_rate': 0.0003, 'decay_steps': 10000, 'decay_rate': 0.994}}, 'clipnorm': 0.01}}, 'network': {'name': 'PiNet', 'params': {'depth': 4, 'rc': 4.0, 'atom_types': [1, 6, 7, 8, 9], 'act': shifted_softplus}}}

In [9]:
network = get_network(params['network'])
print(network)

<pinn.networks.pinet.PiNet object at 0x1561291c0>


In [10]:
train_and_evaluate_network()

Beginning preprocessing


2022-05-02 23:23:43.055585: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2022-05-02 23:23:43.055804: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Preprocessing time: 30.652596950531006 s

Start of epoch 0
Training loss (for one batch) at step 0: 167856.28125
Seen so far: 256 molecules
Training time for 100 batches: 0.7871158123016357 s
Training loss (for one batch) at step 100: 16509.40234375
Seen so far: 25856 molecules
Training time for 100 batches: 55.31004500389099 s
Training loss (for one batch) at step 200: 9751.76953125
Seen so far: 51456 molecules
Training time for 100 batches: 73.94835209846497 s
Training loss (for one batch) at step 300: 1401.6837158203125
Seen so far: 77056 molecules
Training time for 100 batches: 79.28515601158142 s
Training loss (for one batch) at step 400: 1084.839111328125
Seen so far: 102656 molecules
Training time for 100 batches: 90.64542889595032 s
Training time for epoch 1: 317.0185399055481 s
Calculating training error for epoch 1


KeyboardInterrupt: 

In [12]:
network.summary()

Model: "pi_net"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
preprocess_layer (Preprocess multiple                  0         
_________________________________________________________________
polynomial_basis (Polynomial multiple                  0         
_________________________________________________________________
res_update (ResUpdate)       multiple                  80        
_________________________________________________________________
res_update_1 (ResUpdate)     multiple                  0         
_________________________________________________________________
res_update_2 (ResUpdate)     multiple                  0         
_________________________________________________________________
res_update_3 (ResUpdate)     multiple                  0         
_________________________________________________________________
gc_block (GCBlock)           multiple                  1776 

In [13]:
dict = {i: v for i, v in enumerate(network.layers)}
dict

{0: <pinn.networks.pinet.PreprocessLayer at 0x138ea7580>,
 1: <pinn.layers.PolynomialBasis at 0x138ea7dc0>,
 2: <pinn.networks.pinet.ResUpdate at 0x138ea72b0>,
 3: <pinn.networks.pinet.ResUpdate at 0x138e80520>,
 4: <pinn.networks.pinet.ResUpdate at 0x138e80430>,
 5: <pinn.networks.pinet.ResUpdate at 0x138e80f40>,
 6: <pinn.networks.pinet.GCBlock at 0x138e80e80>,
 7: <pinn.networks.pinet.GCBlock at 0x12fc6ed60>,
 8: <pinn.networks.pinet.GCBlock at 0x138f7bdc0>,
 9: <pinn.networks.pinet.GCBlock at 0x138f8e400>,
 10: <pinn.networks.pinet.OutLayer at 0x138ea7250>,
 11: <pinn.networks.pinet.OutLayer at 0x138f95c70>,
 12: <pinn.networks.pinet.OutLayer at 0x138f9edf0>,
 13: <pinn.networks.pinet.OutLayer at 0x138fa6eb0>,
 14: <pinn.layers.ANNOutput at 0x12fc6e490>}

In [12]:
network.activation

<function __main__.shifted_softplus(x)>

In [12]:
train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)

In [None]:
train_loss.result()