In [1]:
import wandb, argparse, torch, json
import numpy as np

import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
from pytorch_lightning.callbacks import (
    LearningRateMonitor,
    EarlyStopping,
    ModelCheckpoint,
)

from bondnet.data.dataset import ReactionNetworkDatasetPrecomputed
from bondnet.data.dataloader import DataLoaderPrecomputedReactionGraphs
from bondnet.data.dataset import train_validation_test_split
from bondnet.utils import seed_torch
from bondnet.model.training_utils import (
    get_grapher,
    LogParameters,
    load_model_lightning,
)

seed_torch()
torch.set_float32_matmul_precision("high")  # might have to disable on older GPUs

import torch.multiprocessing

torch.multiprocessing.set_sharing_strategy("file_system")

  from .autonotebook import tqdm as notebook_tqdm


# check cuda


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# 1. model config


In [15]:
config = {
    "augment": True,
    "batch_size": 4,
    "debug": False,
    "classifier": False,
    "classif_categories": 3,
    "cat_weights": [1.0, 1.0, 1.0],
    "embedding_size": 24,
    "epochs": 100,
    "extra_features": ["bond_length"],
    "extra_info": [],
    "filter_species": [3, 5],
    "fc_activation": "ReLU",
    "fc_batch_norm": True,
    "fc_dropout": 0.2,
    "fc_hidden_size_1": 256,
    "fc_hidden_size_shape": "flat",
    "fc_num_layers": 1,
    "gated_activation": "ReLU",
    "gated_batch_norm": False,
    "gated_dropout": 0.1,
    "gated_graph_norm": False,
    "gated_hidden_size_1": 512,
    "gated_hidden_size_shape": "flat",
    "gated_num_fc_layers": 1,
    "gated_num_layers": 2,
    "gated_residual": True,
    "learning_rate": 0.003,
    "precision": 32,
    "loss": "mse",
    "num_lstm_iters": 3,
    "num_lstm_layers": 1,
    "on_gpu": True,
    "restore": False,
    "target_var": "ts",
    "target_var_transfer": "diff",
    "weight_decay": 0.0,
    "max_epochs": 100,
    "max_epochs_transfer": 100,
    "transfer": False,
    "filter_outliers": True,
}

dataset_loc = "../../../tests/data/testdata/barrier_100.json"

on_gpu = config["on_gpu"]
extra_keys = config["extra_features"]
debug = config["debug"]
precision = config["precision"]

if precision == "16" or precision == "32":
    precision = int(precision)

if on_gpu:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else:
    device = torch.device("cpu")

extra_keys = config["extra_features"]

# 2. load json and processing it into Reaction networks graphs in cpu


In [16]:
dataset = ReactionNetworkDatasetPrecomputed(
    grapher=get_grapher(extra_keys),
    file=dataset_loc,
    target=config["target_var"],
    classifier=config["classifier"],
    classif_categories=config["classif_categories"],
    filter_species=config["filter_species"],
    filter_outliers=config["filter_outliers"],
    filter_sparse_rxns=False,
    debug=debug,
    device="cpu",
    extra_keys=extra_keys,
    extra_info=config["extra_info"],
)

reading file from: ../../../tests/data/testdata/barrier_100.json
rxn raw len: 100
Program finished in 0.6499089018907398 seconds
.............failures.............
reactions len: 89
valid ind len: 89
bond break fail count: 		0
default fail count: 		11
sdf map fail count: 		0
product bond fail count: 	0
about to group and organize
number of grouped reactions: 89
features: 214
labels: 89
molecules: 214
constructing graphs & features....
number of graphs valid: 214
number of graphs: 214
prebuilding reaction graphs


# 3. Write Reaction networks graphs to lmdb files in parallel


In [17]:
import sys

sys.path.insert(0, sys.path[0] + "/../")
from bondnet.data.lmdb_dataset import LmdbDataset, CRNs2lmdb

In [24]:
config = {
    "out_path": "./lmdb_data/",  # Update the directory path
    "num_workers": 3,
    "output_file": "merged_data.lmdb",
}
CRNs2lmdb(
    dataset,
    lmdb_dir=config["out_path"],
    num_workers=4,
    lmdb_name=config["output_file"],
)

Worker 0: Writing CRNs Objects into LMDBs: 100%|██████████| 23/23 [00:00<00:00, 612.65it/s]
Worker 1: Writing CRNs Objects into LMDBs: 100%|██████████| 22/22 [00:00<00:00, 598.31it/s]

Worker 2: Writing CRNs Objects into LMDBs: 100%|██████████| 22/22 [00:00<00:00, 616.88it/s]


Worker 3: Writing CRNs Objects into LMDBs: 100%|██████████| 22/22 [00:00<00:00, 618.33it/s]


Deleted file: ./lmdb_data/_tmp_data.0001.lmdb
Deleted file: ./lmdb_data/_tmp_data.0000.lmdb-lock
Deleted file: ./lmdb_data/_tmp_data.0002.lmdb-lock
Deleted file: ./lmdb_data/_tmp_data.0001.lmdb-lock
Deleted file: ./lmdb_data/_tmp_data.0003.lmdb
Deleted file: ./lmdb_data/_tmp_data.0003.lmdb-lock
Deleted file: ./lmdb_data/_tmp_data.0000.lmdb
Deleted file: ./lmdb_data/_tmp_data.0002.lmdb


# 4. Load lmdb files


In [19]:
_lmdb = LmdbDataset({"src": "./lmdb_data/merged_data.lmdb"})

In [20]:
print(_lmdb.num_samples, _lmdb.dtype, _lmdb.feature_size, _lmdb.feature_name)

89 float32 {'atom': 20, 'bond': 8, 'global': 7} {'atom': ['total degree', 'is in ring', 'total H', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'chemical symbol', 'ring size', 'ring size', 'ring size', 'ring size', 'ring size'], 'bond': ['metal bond', 'ring inclusion', 'ring size', 'ring size', 'ring size', 'ring size', 'ring size', 'bond_length'], 'global': ['num atoms', 'num bonds', 'molecule weight', 'charge one hot', 'charge one hot', 'charge one hot', 'charge one hot']}


# 5. Train BondNet


In [21]:
log_save_dir = "./logs_lightning/"
dict_for_model = {
    "extra_features": extra_keys,
    "classifier": False,
    "classif_categories": config["classif_categories"],
    "filter_species": config["filter_species"],
    "filter_outliers": config["filter_outliers"],
    "filter_sparse_rxns": False,
    "debug": debug,
    "in_feats": dataset.feature_size,
}
config["batch_size"] = 24

config.update(dict_for_model)
#! 2. split dataset.  train_validation_test_split is in bondnet dataset.
trainset, valset, testset = train_validation_test_split(
    _lmdb, validation=0.15, test=0.15
)

print(">" * 40 + "config_settings" + "<" * 40)
for k, v in config.items():
    print("{}\t\t\t{}".format(str(k).ljust(20), str(v).ljust(20)))

print(">" * 40 + "config_settings" + "<" * 40)

#! 3. dataloader
val_loader = DataLoaderPrecomputedReactionGraphs(
    valset, batch_size=len(valset), shuffle=False
)
test_loader = DataLoaderPrecomputedReactionGraphs(
    testset, batch_size=len(testset), shuffle=False
)
train_loader = DataLoaderPrecomputedReactionGraphs(
    trainset, batch_size=config["batch_size"], shuffle=True
)

model = load_model_lightning(config, device=device, load_dir=log_save_dir)

KeyError: 'classif_categories'

In [22]:
project_name = "test"
with wandb.init(project=project_name) as run:
    log_parameters = LogParameters()
    logger_tb = TensorBoardLogger(log_save_dir, name="test_logs")
    logger_wb = WandbLogger(project=project_name, name="test_logs")
    lr_monitor = LearningRateMonitor(logging_interval="step")

    checkpoint_callback = ModelCheckpoint(
        dirpath=log_save_dir,
        filename="model_lightning_{epoch:02d}-{val_loss:.2f}",
        monitor="val_loss",  # TODO
        mode="min",
        auto_insert_metric_name=True,
        save_last=True,
    )
    early_stopping_callback = EarlyStopping(
        monitor="val_loss", min_delta=0.00, patience=500, verbose=False, mode="min"
    )

    trainer = pl.Trainer(
        max_epochs=config["max_epochs"],
        accelerator="gpu",
        devices=[0],
        accumulate_grad_batches=5,
        enable_progress_bar=True,
        gradient_clip_val=1.0,
        callbacks=[
            early_stopping_callback,
            lr_monitor,
            log_parameters,
            checkpoint_callback,
        ],
        enable_checkpointing=True,
        default_root_dir=log_save_dir,
        logger=[logger_tb, logger_wb],
        precision=precision,
    )

    trainer.fit(model, train_loader, val_loader)
    trainer.test(model, test_loader)

run.finish()

[34m[1mwandb[0m: Currently logged in as: [33msanti[0m ([33mhydro_homies[0m). Use [1m`wandb login --relogin`[0m to force relogin


  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"


KeyError: 'max_epochs'