In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from spektral.data import DisjointLoader
from spektral.datasets import QM9
from spektral.layers import ECCConv, GlobalSumPool
import time
from datetime import datetime
import psutil
import os
import time
import pandas as pd
from mendeleev import element
import csv

In [2]:
# Run this cell to disable GPU
physical_devices = tf.config.list_physical_devices()
tf.config.set_visible_devices(physical_devices[0], 'CPU')
tf.config.set_visible_devices([], 'GPU')

In [3]:
################################################################################
# Config (learning rate decay)
################################################################################
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate=1e-3,
            decay_steps=10000,
            decay_rate=0.994)
# learning_rate = 1e-3
epochs = 1 # Number of training epochs
batch_size = 256  # Batch size

In [4]:
################################################################################
# Load data
################################################################################
dataset = QM9(amount=None)  # Set amount=None to train on whole dataset

# Set labels to U0
for i in range(len(dataset)):
    dataset[i].y = dataset[i].y[10]

# Parameters
F = dataset.n_node_features  # Dimension of node features
S = dataset.n_edge_features  # Dimension of edge features
n_out = dataset.n_labels  # Dimension of the target

# Train/test split
idxs = np.random.permutation(len(dataset))
split = int(0.8 * len(dataset))
idx_tr, idx_te = np.split(idxs, [split])
dataset_tr, dataset_te = dataset[idx_tr], dataset[idx_te]

loader_tr = DisjointLoader(dataset_tr, batch_size=batch_size, epochs=epochs)
loader_te = DisjointLoader(dataset_te, batch_size=batch_size, epochs=epochs)

num_molecules_tr = loader_tr.steps_per_epoch * batch_size

Loading QM9 dataset.
Reading SDF


100%|█████████████████████████████████| 133885/133885 [01:03<00:00, 2093.75it/s]


In [6]:
################################################################################
# Build model
################################################################################
class Net(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = ECCConv(32, activation="relu")
        self.conv2 = ECCConv(32, activation="relu")
        self.global_pool = GlobalSumPool()
        self.dense = Dense(n_out)

    def call(self, inputs):
        x, a, e, i = inputs
        x = self.conv1([x, a, e])
        x = self.conv2([x, a, e])
        output = self.global_pool([x, i])
        output = self.dense(output)

        return output

model = Net()
optimizer = Adam(lr_schedule)
loss_fn = MeanSquaredError()

In [7]:
# class CustomEarlyStopping(EarlyStopping):
#     def __init__(self, patience=100, **kwargs):
#         super().__init__(patience=patience, **kwargs)
#         self.val_losses = []
#         self.best = float('inf') if self.monitor_op == np.less else -float('inf')


#     def on_epoch_end(self, epoch, logs=None):
#         val_loss = logs.get("val_loss")
#         self.val_losses.append(val_loss)
#         super().on_epoch_end(epoch, logs)

#         if self.wait >= self.patience:
#             if self.stopped_epoch > 0 and self.restore_best_weights:
#                 self.model.set_weights(self.best_weights)

#             self.model.stop_training = True
#             print(f"Early stopping triggered. No improvement in validation loss for the past {self.patience} epochs.")


In [8]:
train_loss_metric = tf.keras.metrics.MeanSquaredError()
train_mae_metric = tf.keras.metrics.MeanAbsoluteError()
train_rmse_metric = tf.keras.metrics.RootMeanSquaredError()
val_loss_metric = tf.keras.metrics.MeanSquaredError()
val_mae_metric = tf.keras.metrics.MeanAbsoluteError()
val_rmse_metric = tf.keras.metrics.RootMeanSquaredError()
train_log_dir = '/Users/miguelnavaharris/New_Benchmarks/Prediction_accuracy/M1/Spektral_predvstrue/' + str(batch_size) + '/train'
test_log_dir = '/Users/miguelnavaharris/New_Benchmarks/Prediction_accuracy/M1/Spektral_predvstrue/' + str(batch_size) + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)
# early_stopping = CustomEarlyStopping(patience=2, restore_best_weights=True)

In [9]:
# Get the current process
current_process = psutil.Process(os.getpid())
# early_stopping.set_model(model)

@tf.function(input_signature=loader_tr.tf_signature(), experimental_relax_shapes=True)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions) + sum(model.losses)
        train_mae_metric.update_state(target, predictions)
        train_rmse_metric.update_state(target, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

def train_and_evaluate():
    step = loss = total_batches = 0
    epoch = 1
    val_losses = []
    start_epoch_time = time.time()
    print(f'Starting epoch {epoch}')
    for batch in loader_tr:
        step += 1
        total_batches += 1
        loss += train_step(*batch)


        if total_batches % 20 == 0 and total_batches != 0:


            # Get the memory information of the current process
            process_memory_info = current_process.memory_info()
            ram_usage = process_memory_info.rss / (1024 ** 2)  # Convert to MB
            swap_info = psutil.swap_memory()
            swap_usage = swap_info.used / (1024 ** 2)  # Convert to MB

            with train_summary_writer.as_default():
                tf.summary.scalar('batch_Loss', (loss / step), step=total_batches)
                tf.summary.scalar('batch_MAE', train_mae_metric.result(), step=total_batches)
                tf.summary.scalar('batch_RMSE', train_rmse_metric.result(), step=total_batches)
                tf.summary.scalar('RAM_usage_MB', ram_usage, step=total_batches)
                tf.summary.scalar('Swap_usage_MB', swap_usage, step=total_batches)
              


        if step == loader_tr.steps_per_epoch:
            step = 0
            epoch_train_time = time.time() - start_epoch_time
            molecules_per_second = num_molecules_tr / epoch_train_time
            print("molecules per second:", molecules_per_second)
            print("Loss: {}".format(loss / loader_tr.steps_per_epoch))
            print(f"MAE: {float(train_mae_metric.result())}")
            print(f"RMSE: {float(train_rmse_metric.result())}")
            with train_summary_writer.as_default():
                tf.summary.scalar(f'epoch_Loss', (loss / loader_tr.steps_per_epoch), step=total_batches)
                tf.summary.scalar(f'epoch_MAE', train_mae_metric.result(), step=total_batches)
                tf.summary.scalar(f'epoch_RMSE', train_rmse_metric.result(), step=total_batches)
                tf.summary.scalar(f'epoch_moleculespersec', molecules_per_second, step=total_batches)

            train_mae_metric.reset_states()
            train_rmse_metric.reset_states()
            loss = 0

            ################################################################################
            # Evaluate model
            ################################################################################
            print("Testing model")
            val_step = val_loss = 0
            for batch in loader_te:
                val_step += 1
                inputs, target = batch
                predictions = model(inputs, training=False)
                val_loss += loss_fn(target, predictions)

                val_mae_metric.update_state(target, predictions)
                val_rmse_metric.update_state(target, predictions)
            
                if val_step == loader_te.steps_per_epoch:
                    val_step = 0
                    val_loss /= loader_te.steps_per_epoch
                    val_losses.append(val_loss)
                    print("Validation loss: {}".format(val_loss))
                    print('Validation MAE:', float(val_mae_metric.result()))
                    print('Validation RMSE:', float(val_rmse_metric.result()))
                    with test_summary_writer.as_default():
                        tf.summary.scalar('epoch_validation_Loss', val_loss, step=total_batches)
                        tf.summary.scalar('epoch_validation_MAE', val_mae_metric.result(), step=total_batches)
                        tf.summary.scalar('epoch_validation_RMSE', val_rmse_metric.result(), step=total_batches)
            
                        
                    # logs = {"val_loss": val_loss, "val_mae": val_mae_metric.result()}
                    # early_stopping.on_epoch_end(epoch - 1, logs)
                    # if early_stopping.restore_best_weights:
                    #     model.set_weights(early_stopping.best_weights)
                    # if model.stop_training:
                    #     return f"Early stopping triggered. Training stopped at epoch {epoch}"

                    val_loss = 0
                    val_mae_metric.reset_states()
                    val_rmse_metric.reset_states()
                    break

            epoch += 1
            if epoch <= epochs:
                print(f'Starting epoch {epoch}')
            start_epoch_time = time.time()



In [10]:
train_and_evaluate()

Starting epoch 1


  np.random.shuffle(a)
  return py_builtins.overload_of(f)(*args)
2023-04-15 15:18:28.659200: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


molecules per second: 6909.385929008912
Loss: 61708.85546875
MAE: 220.37619018554688
RMSE: 249.21556091308594
Testing model




Validation loss: 11796.859375
Validation MAE: 85.43062591552734
Validation RMSE: 108.90721893310547
Starting epoch 2
molecules per second: 10030.991253729051
Loss: 8621.796875
MAE: 74.0617446899414
RMSE: 92.91338348388672
Testing model
Validation loss: 6818.890625
Validation MAE: 66.14544677734375
Validation RMSE: 82.68148040771484
Starting epoch 3
molecules per second: 10136.127091822469
Loss: 5525.3466796875
MAE: 59.38648986816406
RMSE: 74.4217300415039
Testing model
Validation loss: 4398.1728515625
Validation MAE: 52.76784133911133
Validation RMSE: 66.37173461914062
Starting epoch 4
molecules per second: 10112.904642527019
Loss: 3519.953857421875
MAE: 47.2893180847168
RMSE: 59.39862060546875
Testing model
Validation loss: 2837.042724609375
Validation MAE: 41.620384216308594
Validation RMSE: 53.25392150878906
Starting epoch 5
molecules per second: 10108.075047439223
Loss: 2236.09912109375
MAE: 37.57269287109375
RMSE: 47.34202194213867
Testing model
Validation loss: 1824.600341796875


# Predictions

In [26]:
model = tf.keras.models.load_model('/Users/miguelnavaharris/New_Benchmarks/NVIDIA/Spektral/14_epochs/test')



In [27]:
def convert_to_elem_nums(atomsonehot):
    elem_nums = [1, 6, 7, 8, 9]
    elems = []
    for i in range(len(atomsonehot)):
        elems.append(elem_nums[np.nonzero(atomsonehot[:,:5][i])[0][0]])
    return np.array(elems)

loader_te = DisjointLoader(dataset_te, batch_size=batch_size, epochs=epochs)
all_pred_energies = []
all_true_energies = []
num_heavy_atoms = []
molecule_masses = []
for batch in loader_te:
    inputs, target = batch
    predictions = model(inputs, training=False)
    predictions = predictions.numpy()
    all_pred_energies.extend(predictions[:,0].tolist())
    all_true_energies.extend(target[:,0].tolist())
    x, a, e, i = inputs
    atomsonehot = x
    elems = convert_to_elem_nums(atomsonehot)
    molecule_indices = i

    unique_elements = np.unique(elems)
    element_masses = {int(elem_num): element(int(elem_num)).mass for elem_num in unique_elements}

    for idx in range(len(target)):
        molecule_atoms = elems[molecule_indices == idx]
        num_heavy = np.sum(molecule_atoms > 1)
        num_heavy_atoms.append(num_heavy)

        molecule_mass = sum(element_masses[int(elem_num)] for elem_num in molecule_atoms)
        molecule_masses.append(molecule_mass)
        
results = pd.DataFrame({
    'true_energy': all_pred_energies,
    'pred_energy': all_true_energies,
    'num_heavy_atoms': num_heavy_atoms,
    'molecule_mass': molecule_masses,
})

  np.random.shuffle(a)


In [29]:
results.to_csv('/Users/miguelnavaharris/New_Benchmarks/NVIDIA/Spektral/14_epochs/test/predvstrue.csv')

In [28]:
mae = np.mean(np.abs(results['true_energy'] - results['pred_energy']))
print("Mean Absolute Error (MAE):", mae)

Mean Absolute Error (MAE): 1.8714138595301442
