In [1]:
import json
import torch
import pandas as pd
import seaborn as sns

from torch.utils.data import DataLoader
import pytorch_lightning as pl
from torchmetrics import R2Score


sns.set(style="whitegrid")


from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import (
    LearningRateMonitor,
    EarlyStopping,
    ModelCheckpoint,
)

from bondnet.utils import seed_torch
from bondnet.model.training_utils import load_model_lightning
from bondnet.data.datamodule import BondNetLightningDataModule

from bondnet.utils import seed_torch
from bondnet.model.training_utils import (
    LogParameters,
    load_model_lightning,
)

seed_torch()
torch.set_float32_matmul_precision("high")  # might have to disable on older GPUs
torch.multiprocessing.set_sharing_strategy("file_system")

In [2]:
! ls ~/dev/bondnet/bondnet/dataset/20231110_parse/green/2022/ccsdtf_121423_qtaim.json

/home/santiagovargas/dev/bondnet/bondnet/dataset/20231110_parse/green/2022/ccsdtf_121423_qtaim.json


In [3]:
#df_100 = pd.read_json("~/dev/bondnet/bondnet/dataset/20231110_parse/green/2022/ccsdtf_121423_qtaim_100.json")
#df_1000 = pd.read_json("~/dev/bondnet/bondnet/dataset/20231110_parse/green/2022/ccsdtf_121423_qtaim_1000.json")
#df_10000 = pd.read_json("~/dev/bondnet/bondnet/dataset/20231110_parse/green/2022/ccsdtf_121423_qtaim_10000.json")
#df_full = pd.read_json("~/dev/bondnet/bondnet/dataset/20231110_parse/green/2022/ccsdtf_121423_qtaim.json")
#df_test = pd.read_json("~/dev/bondnet/bondnet/dataset/20231110_parse/green/2022/ccsdtf_121423_qtaim_test_learning.json")

In [4]:
config = {
  "model": {
    "conv": "GatedGCNConv",
    "readout": "Set2SetThenCat", 
    "initializer": "kaiming",
    "augment": False,
    "classifier": False,
    "classif_categories": 3,
    "cat_weights": [1.0, 1.0, 1.0],
    "embedding_size": 8,
    "epochs": 2000,
    "extra_features": {
    "atom": ["grad_norm", "esp_total", "Hamiltonian_K"],
    "bond": ["grad_norm", "ellip_e_dens", "ave_loc_ion_E",
             "e_loc_func", "esp_total",  "Hamiltonian_K"],
    "global": ["dHrxn298"],
     "mappings": ["indices_qtaim"]
     },
    "extra_info": [],
    "feature_filter": False,
    "filter_species": [3, 6],
    "fc_activation": "ReLU",
    "fc_batch_norm": True,
    "fc_dropout": 0.2,
    "fc_hidden_size_1": 512,
    "fc_hidden_size_shape": "flat",
    "fc_num_layers": 3,
    "gated_activation": "ReLU",
    "gated_batch_norm": True,
    "gated_dropout": 0.1,
    "gated_graph_norm": False,
    "gated_hidden_size_1": 128,
    "gated_hidden_size_shape": "cone",
    "gated_num_fc_layers": 2,
    "gated_num_layers": 1,
    "gated_residual": False,
    "learning_rate": 0.001,
    "precision": "bf16-mixed",
    "loss": "mse",
    "num_lstm_iters": 13,
    "num_lstm_layers": 1,
    "restore": False,
    "weight_decay": 0.000001,
    "max_epochs": 1000,
    "max_epochs_transfer": 10,
    "transfer": False,
    "filter_outliers": True,
    "filter_sparse_rxns": False, 
    "freeze": False,
    "transfer": False,
    "target_var": "dE0",
    "target_var_transfer": "dHrxn298",
    "reactant_only": False
  },
  "optim": {
    "batch_size": 64,
    "num_devices": 1,
    "num_nodes": 0,
    "num_workers": 0,
    "val_size": 0.1,
    "test_size": 0.1,
    "strategy": "auto",
    "gradient_clip_val": 1000.0,
    "accumulate_grad_batches": 1,
    "pin_memory": False, 
    "persistent_workers": False
  },
  "dataset": {
    "log_save_dir": "./model_log/",
    "lmdb_dir": "./lmdb_data/",
    "target_var": "dE0",
    "overwrite": True
  },
  "dataset_transfer": {
    "log_save_dir": "./model_log_transfer/",
    "lmdb_dir": "./lmdb_data_transfer/",
    "target_var": "dHrxn298",
    "overwrite": True
  }
}

In [5]:
dataset_loc = "~/dev/bondnet/bondnet/dataset/20231110_parse/green/2022/ccsdtf_121423_qtaim_1000.json"
log_save_dir = "./100/"


if config["model"]["precision"] == "16" or config["model"]["precision"] == "32":
    config["model"]["precision"] = int(config["model"]["precision"])

# dataset
config["dataset"]["data_dir"] = dataset_loc
extra_keys = config["model"]["extra_features"]
config["model"]["filter_sparse_rxns"] = False
config["model"]["debug"] = False

config["dataset_transfer"]["data_dir"] = dataset_loc
dm = BondNetLightningDataModule(config)

feature_size, feature_names = dm.prepare_data()

fg_list None
reading file from: ~/dev/bondnet/bondnet/dataset/20231110_parse/green/2022/ccsdtf_121423_qtaim_1000.json
rxn raw len: 1000
Program finished in 4.738968463003403 seconds
.............failures.............
reactions len: 996
valid ind len: 996
bond break fail count: 		0
default fail count: 		4
sdf map fail count: 		0
product bond fail count: 	0
about to group and organize
number of grouped reactions: 996
---> generating grouped reactions


grouped reactions: 100%|██████████| 996/996 [00:17<00:00, 55.48it/s]


--> generating labels


labeled reactions: 100%|██████████| 996/996 [00:00<00:00, 26935.10it/s]


features: 2309
labels: 996
molecules: 2309
constructing graphs & features....


mol graphs: 100%|██████████| 2309/2309 [00:05<00:00, 447.64it/s]


number of graphs valid: 2309
number of graphs: 2309


In [6]:
config["model"]["in_feats"] = feature_size
config["dataset"]["feature_names"] = feature_names

print(">" * 40 + "config_settings" + "<" * 40)
for k, v in config.items():
    print("{}\t\t\t{}".format(str(k).ljust(20), str(v).ljust(20)))

print(">" * 40 + "config_settings" + "<" * 40)
model = load_model_lightning(config["model"], load_dir=log_save_dir)

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>config_settings<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
model               			{'conv': 'GatedGCNConv', 'readout': 'Set2SetThenCat', 'initializer': 'kaiming', 'augment': False, 'classifier': False, 'classif_categories': 3, 'cat_weights': [1.0, 1.0, 1.0], 'embedding_size': 8, 'epochs': 2000, 'extra_features': {'atom': ['grad_norm', 'esp_total', 'Hamiltonian_K'], 'bond': ['grad_norm', 'ellip_e_dens', 'ave_loc_ion_E', 'e_loc_func', 'esp_total', 'Hamiltonian_K'], 'global': ['dHrxn298'], 'mappings': ['indices_qtaim']}, 'extra_info': [], 'feature_filter': False, 'filter_species': [3, 6], 'fc_activation': 'ReLU', 'fc_batch_norm': True, 'fc_dropout': 0.2, 'fc_hidden_size_1': 512, 'fc_hidden_size_shape': 'flat', 'fc_num_layers': 3, 'gated_activation': 'ReLU', 'gated_batch_norm': True, 'gated_dropout': 0.1, 'gated_graph_norm': False, 'gated_hidden_size_1': 128, 'gated_hidden_size_shape': 'cone', 'gated_num_fc_layers': 2, 'gated_num_layers': 1, 'gated_resid

In [7]:
# print number of parameters 
print(">" * 40 + "model_parameters" + "<" * 40)
print("Total number of parameters: ", sum(p.numel() for p in model.parameters()))

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>model_parameters<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
Total number of parameters:  1443137


In [8]:
log_parameters = LogParameters()
logger_tb = TensorBoardLogger(
    config["dataset"]["log_save_dir"], name="test_logs"
)
lr_monitor = LearningRateMonitor(logging_interval="step")

checkpoint_callback = ModelCheckpoint(
    dirpath=config["dataset"]["log_save_dir"],
    filename="model_lightning_{epoch:02d}-{val_l1:.2f}",
    monitor="val_l1",
    mode="min",
    auto_insert_metric_name=True,
    save_last=True,
)

early_stopping_callback = EarlyStopping(
    monitor="val_l1", min_delta=0.00, patience=500, verbose=False, mode="min"
)

trainer = pl.Trainer(
    max_epochs=config["model"]["max_epochs"],
    accelerator="gpu",
    devices=config["optim"]["num_devices"],
    num_nodes=config["optim"]["num_nodes"],
    gradient_clip_val=config["optim"]["gradient_clip_val"],
    accumulate_grad_batches=config["optim"]["accumulate_grad_batches"],
    enable_progress_bar=True,
    callbacks=[
        early_stopping_callback,
        lr_monitor,
        #log_parameters,
        checkpoint_callback,
    ],
    enable_checkpointing=True,
    strategy=config["optim"]["strategy"],
    default_root_dir=config["dataset"]["log_save_dir"],
    logger=[logger_tb],
    precision=config["model"]["precision"],
)

trainer.fit(model, dm)
trainer.test(model, dm)

Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

   | Name            | Type              | Params
-------------------------------------------------------
0  | embedding       | UnifySize         | 320   
1  | gated_layers    | ModuleList        | 190 K 
2  | readout_layer   | Set2SetThenCat    | 395 K 
3  | fc_layers       | ModuleList        | 857 K 
4  | loss            | MeanSquaredError  | 0     
5  | train_r2        | R2Score           | 0     
6  | train_torch_l1  | MeanAbsoluteError | 0     
7  | train_torch_mse | MeanSquaredError  | 0     
8  | val_r2          | R2Score           | 0     
9  | val_torch_l1    | MeanAbsoluteError | 0     
10 | val_torch_mse   | MeanSquaredError  | 0     
11 | test_r2         | 

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 00053: reducing learning rate of group 0 to 4.0000e-04.


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]