In [1]:
import os
import numpy as np
import lmdb
import pickle
from pathlib import Path

from copy import deepcopy


import dgl
from dgl import heterograph
from dgl import DGLGraph

import torch
from torch.utils.data import Dataset
import pytorch_lightning as pl

from bondnet.data.reaction_network import ReactionLMDB
from bondnet.data.utils import construct_rxn_graph_empty, create_rxn_graph
from bondnet.model.training_utils import load_model_lightning
from bondnet.test_utils import get_defaults
from bondnet.data.dataset import ReactionDatasetLMDBDataset, LmdbMoleculeDataset, LmdbReactionDataset
from bondnet.data.lmdb import TransformMol
from bondnet.data.dataloader import DataLoaderReactionLMDB
from bondnet.model.training_utils import get_grapher
from bondnet.data.dataset import ReactionDatasetGraphs
from bondnet.data.dataloader import DataLoaderReaction
from bondnet.data.datamodule import BondNetLightningDataModuleLMDB

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
env = lmdb.open(
            str("/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/reaction.lmdb"),
            subdir=False,
            readonly=False,
            lock=False,
            readahead=True,
            meminit=False,
            max_readers=1,
    )

length_entry = env.begin().get("length".encode("ascii"))
            

In [3]:
env = lmdb.open(
            str("/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/molecule.lmdb"),
            subdir=False,
            readonly=False,
            lock=False,
            readahead=True,
            meminit=False,
            max_readers=1,
    )

In [4]:
length_entry = env.begin().get("length".encode("ascii"))

In [5]:
elements = env.begin().get("elements".encode("ascii"))
pickle.loads(elements)

{'C', 'Cl', 'F', 'H', 'Li', 'Mg', 'N', 'O', 'P', 'S'}

In [6]:
config = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/train/molecule.lmdb"
}
mol = LmdbMoleculeDataset(config=config, transform=TransformMol)

config = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/train/reaction.lmdb"
}

reaction = LmdbReactionDataset(config=config)

##############################################################################

config = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/val/molecule.lmdb"
}
mol_test = LmdbMoleculeDataset(config=config, transform=TransformMol)

config = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/val/reaction.lmdb"
}

reaction_test = LmdbReactionDataset(config=config)

##############################################################################

config = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/test/molecule.lmdb"
}
mol_val = LmdbMoleculeDataset(config=config, transform=TransformMol)

config = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/test/reaction.lmdb"
}

reaction_val = LmdbReactionDataset(config=config)


rxn_ntwk = ReactionLMDB(mol, reaction)
rxn_ntwk_test = ReactionLMDB(mol_test, reaction_test)
rxn_ntwk_val = ReactionLMDB(mol_val, reaction_val)
dataset_test = ReactionDatasetLMDBDataset(rxn_ntwk_test)
dataset_val = ReactionDatasetLMDBDataset(rxn_ntwk_val)
dataset = ReactionDatasetLMDBDataset(rxn_ntwk)

dataloader = DataLoaderReactionLMDB(
    dataset, batch_size=100, shuffle=True, num_workers=4
)

dataloader_test = DataLoaderReactionLMDB(
    dataset_test, batch_size=len(dataset_test), shuffle=False, num_workers=4
)

dataloader_val = DataLoaderReactionLMDB(
    dataset_val, batch_size=len(dataset_val), shuffle=False, num_workers=4
)

In [7]:


dataset_loc = "../../../tests/data/testdata/barrier_100.json"
config = {
    "dataset": {
        "data_dir": dataset_loc,
        "target_var": "ts",
    },
    "model": {
        "extra_features": ["bond_length"],
        "extra_info": [],
        "debug": False,
        "classifier": False,
        "classif_categories": 3,
        "filter_species": [3, 6],
        "filter_outliers": False,
        "filter_sparse_rxns": False,
        "restore": False,
    },
    "optim": {
        "val_size": 0.2,
        "test_size": 0.2,
        "batch_size": 4,
        "num_workers": 1,
    },
}
config_model = get_defaults()
# update config with model settings
for key, value in config_model["model"].items():
    config["model"][key] = value
for key, value in config_model["model"].items():
    config["model"][key] = value
    
#from bondnet.data.datamodule import BondNetLightningDataModule
#dm = BondNetLightningDataModule(config)
# feat_size, feat_name = dm.prepare_data()
# config["model"]["in_feats"] = feat_size
# config["model"]["in_feats"] = feat_size
# config = get_defaults()
#config["model"]["in_feats"] = dataset.feature_info["feature_size"]
#reaction = dataset.reaction_network.reactions[0]


config["model"]["in_feats"] = dataset.feature_size
model = load_model_lightning(config["model"], load_dir="./test_lmdb/")

NB: using GatedGCNConv
NB: using Set2SetThenCat
:::NO INITIALIZER USED:::


In [8]:

device = "cuda" if torch.cuda.is_available() else "cpu"
nodes = ["atom", "bond", "global"]
for it, batch in enumerate(dataloader):
    print(it)
    batched_graph, label = batch
    nodes = ["atom", "bond", "global"]
    feats = {nt: batched_graph.nodes[nt].data["ft"] for nt in nodes}
    target = label["value"].view(-1)
    target_aug = label["value_rev"].view(-1)
    empty_aug = torch.isnan(target_aug).tolist()
    empty_aug = True in empty_aug
    norm_atom = label["norm_atom"]
    norm_bond = label["norm_bond"]
    stdev = label["scaler_stdev"]
    mean = label["scaler_mean"]
    reactions = label["reaction"]

    if model.stdev is None:
        model.stdev = stdev[0]
    # stdev = stdev.to(device)

    #print(label["reaction"][0]["reaction_molecule_info"]["mappings"].keys())
    #print(label["reaction"][0]["reaction_molecule_info"]["mappings"]["num_bonds_total"])
    #print(label["reaction"][0]["reaction_molecule_info"]["mappings"]["num_atoms_total"])
    
    
    #for nt, ft in feats.items():
        #print(nt, ft.shape)
        #batched_graph.nodes[nt].data.update({"ft": ft})
    #print(batched_graph)
    graphs = dgl.unbatch(batched_graph)
    #print("atom")
    #[print(g.number_of_nodes("atom")) for g in graphs]
    #print("bond")
    #[print(g.number_of_nodes("bond")) for g in graphs]
    #reaction[0]
    #print(reaction[0]["reaction_graph"])
    #print(reaction[0]["reaction_feature"])

    #print(reactions[0]["mappings"])
    #print(reactions[0]["reaction_graph"])
    #print(reactions[0]["reaction_feature"])


    
    model(
        graph=batched_graph,
        feats=feats,
        reactions=reactions,
        norm_atom=norm_atom,
        norm_bond=norm_bond,
        reverse=False,
    )
    """
    for ind, rxn in enumerate(reactions):
        #print("rxn {}".format(ind))
        #print(rxn["reaction_molecule_info"])

        reactants = [
            graphs[i] for i in rxn["reaction_molecule_info"]["reactants"]["reactants"]
        ]
        products = [
            graphs[i] for i in rxn["reaction_molecule_info"]["products"]["products"]
        ]
        # print(rxn["reaction_molecule_info"]["products"]["products"])
        # print(len(products))

        #print(reactants)
        #print(products)
        mappings = rxn["mappings"]
        # print the sum of all the mappings lengths
        sum_length = sum([len(m) for m in mappings["atom_map"][0]])
        print("atom map items: {}".format(sum_length))
        print("atom map items: {}".format([len(m) for m in mappings["atom_map"][0]]))
        print("atom map items: {}".format([len(m) for m in mappings["atom_map"][1]]))

        print("bond map items: {}".format(sum_length))
        print("bond map items: {}".format([len(m) for m in mappings["bond_map"][0]]))
        print("bond map items: {}".format([len(m) for m in mappings["bond_map"][1]]))
        
        #reactant_atom_map = rxn["reaction_molecule_info"]["reactants"]["atom_map"]
        #product_atom_map = rxn["reaction_molecule_info"]["products"]["atom_map"]
        #reactant_bond_map = rxn["reaction_molecule_info"]["reactants"]["bond_map"]
        #product_bond_map = rxn["reaction_molecule_info"]["products"]["bond_map"]
        
        #print(mappings)
        
        g, fts = create_rxn_graph(
            reactants=reactants,
            products=products,
            mappings=mappings,
            device=device,
            has_bonds=None,
            reverse=False,
            reactant_only=False,
            empty_graph_fts=None,
        )
    """
    


0
1
2
3
4
5
6
7
8
9
10
11


In [9]:
project_name = "test_multi_gpu"


trainer = pl.Trainer(
    max_epochs=2,
    accelerator="gpu",
    devices=[0],
    accumulate_grad_batches=5,
    enable_progress_bar=True,
    gradient_clip_val=1.0,
    enable_checkpointing=True,
    precision=32,
)

trainer.fit(model, dataloader, dataloader_test)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A5000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

   | Name            | Type              | Params
-------------------------------------------------------
0  | embedding       | UnifySize         | 124   
1  | gated_layers    | ModuleList        | 1.6 K 
2  | readout_layer   | Set2SetThenCat    | 2.6 K 
3  | fc_layers       | ModuleList        | 3.3 K 
4  | loss            | MeanSquaredError  | 0     
5  | train_r2        | R2Score           | 0     
6  | train_torch_l1  | MeanAbsoluteError | 0     
7  | train_torch_mse | MeanSquaredError  | 0     
8  | val_r2          | R2Score           | 0     
9  | val_torch_l1    | MeanAbsoluteError | 0     
10 | val_torch_mse   | MeanSquaredError  | 0     
11 | test_r2         | R2Score           | 0     
12 | test_torch_l1   | MeanAbsoluteError | 0     
13 | test_torch_mse  | MeanSquaredError  | 0     
-------------------------------------------------------
7.6 K     Trainable params
0         Non-trainable params
7.6 K     Total params
0.030     To

                                                                           

/home/santiagovargas/anaconda3/envs/bondnet_new/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:293: The number of training batches (12) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 1: 100%|██████████| 12/12 [00:14<00:00,  0.80it/s, v_num=137, val_loss=0.979, val_r2=-324., val_l1=0.754, val_mse=0.944, train_loss=0.998, train_r2=-212., train_l1=0.770, train_mse=0.976]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 12/12 [00:14<00:00,  0.80it/s, v_num=137, val_loss=0.979, val_r2=-324., val_l1=0.754, val_mse=0.944, train_loss=0.998, train_r2=-212., train_l1=0.770, train_mse=0.976]


In [None]:

dataset_loc = "/home/santiagovargas/dev/bondnet/bondnet/dataset/rapter_new_parse/qtaim/test_rapter_filtered_species.pkl"


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

extra_keys = {"bond": ["bond_length"]}
precision = "32"

if precision == "16" or precision == "32":
    precision = int(precision)

extra_keys = {}

dataset = ReactionDatasetGraphs(
    grapher=get_grapher(extra_keys),
    file=dataset_loc,
    target="ts",
    classifier=False,
    classif_categories=3,
    filter_species=[5, 5],
    filter_outliers=False,
    filter_sparse_rxns=False,
    debug=True,
    extra_keys={"bond":["bond_length"]},
    extra_info={}
)

fg_list None
reading file from: /home/santiagovargas/dev/bondnet/bondnet/dataset/rapter_new_parse/qtaim/test_rapter_filtered_species.pkl
rxn raw len: 100
Program finished in 0.5151092829182744 seconds
.............failures.............
reactions len: 100
valid ind len: 100
bond break fail count: 		0
default fail count: 		0
sdf map fail count: 		0
product bond fail count: 	0
about to group and organize
number of grouped reactions: 100
---> generating grouped reactions


grouped reactions: 100%|██████████| 100/100 [00:01<00:00, 58.39it/s]


--> generating labels


labeled reactions: 100%|██████████| 100/100 [00:00<00:00, 33904.32it/s]


features: 287
labels: 100
molecules: 287
constructing graphs & features....


mol graphs: 100%|██████████| 287/287 [00:00<00:00, 575.18it/s]

number of graphs valid: 287
number of graphs: 287





In [None]:
dataset.feature_size

{'atom': 20, 'bond': 7, 'global': 3}

In [None]:
dataloader_normal = DataLoaderReaction(
    dataset, batch_size=100, shuffle=True
)

In [None]:
next(iter(dataloader_normal))

(Graph(num_nodes={'atom': 2614, 'bond': 2458, 'global': 287},
       num_edges={('atom', 'a2a', 'atom'): 2614, ('atom', 'a2b', 'bond'): 4914, ('atom', 'a2g', 'global'): 2614, ('bond', 'b2a', 'atom'): 4914, ('bond', 'b2b', 'bond'): 2458, ('bond', 'b2g', 'global'): 2458, ('global', 'g2a', 'atom'): 2614, ('global', 'g2b', 'bond'): 2458, ('global', 'g2g', 'global'): 287},
       metagraph=[('atom', 'atom', 'a2a'), ('atom', 'bond', 'a2b'), ('atom', 'global', 'a2g'), ('bond', 'atom', 'b2a'), ('bond', 'bond', 'b2b'), ('bond', 'global', 'b2g'), ('global', 'atom', 'g2a'), ('global', 'bond', 'g2b'), ('global', 'global', 'g2g')]),
 {'value': tensor([[-9.5158e-01],
          [-9.0933e-01],
          [-1.9626e-01],
          [-1.1252e+00],
          [-2.5112e-01],
          [ 1.3215e+00],
          [-4.3222e-01],
          [ 2.8052e-01],
          [ 4.9866e-01],
          [ 1.3765e+00],
          [ 8.6168e-01],
          [ 4.0345e-01],
          [-4.3456e-02],
          [-1.0290e+00],
          [-9

In [None]:
config = get_defaults()

config = {
    "model": {
        "extra_features": [],
        "extra_info": [],
        "debug": False,
        "classifier": False,
        "classif_categories": 3,
        "filter_species": [3, 6],
        "filter_outliers": False,
        "filter_sparse_rxns": False,
        "restore": False,
    },
    "optim": {
        "val_size": 0.1,
        "test_size": 0.1,
        "batch_size": 4,
        "num_workers": 1,
    },
}

dataset_loc = "../../../tests/data/testdata/barrier_100.json"
config = {
    "dataset": {
        "data_dir": dataset_loc,
        "target_var": "ts",
    },
    "model": {
        "extra_features": [],
        "extra_info": [],
        "debug": False,
        "classifier": False,
        "classif_categories": 3,
        "filter_species": [3, 6],
        "filter_outliers": False,
        "filter_sparse_rxns": False,
        "restore": False,
    },
    "optim": {
        "val_size": 0.2,
        "test_size": 0.2,
        "batch_size": 4,
        "num_workers": 1,
    },
}
config_model = get_defaults()
# update config with model settings
for key, value in config_model["model"].items():
    config["model"][key] = value
for key, value in config_model["model"].items():
    config["model"][key] = value
    
#from bondnet.data.datamodule import BondNetLightningDataModule
#dm = BondNetLightningDataModule(config)
# feat_size, feat_name = dm.prepare_data()
# config["model"]["in_feats"] = feat_size
# config["model"]["in_feats"] = feat_size
# config = get_defaults()
#config["model"]["in_feats"] = dataset.feature_info["feature_size"]
#reaction = dataset.reaction_network.reactions[0]
config["model"]["in_feats"] = dataset.feature_size
model = load_model_lightning(config["model"], load_dir="./test_lmdb/")

NameError: name 'dataset' is not defined

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
nodes = ["atom", "bond", "global"]
for it, (batched_graph, label) in enumerate(dataloader_normal):
    feats = {nt: batched_graph.nodes[nt].data["ft"] for nt in nodes}
    target = label["value"].view(-1).to(device)
    norm_atom = None
    norm_bond = None
    stdev = torch.tensor([1.0])
    # print(feats.keys())
    # if device is not None:
    # feats = {k: v.to(device) for k, v in feats.items()}
    # target = target.to(device)
    # norm_atom = norm_atom.to(device)
    # norm_bond = norm_bond.to(device)
    # stdev = stdev.to(device)

    #print(label["reaction"][0]["reaction_molecule_info"]["mappings"].keys())
    #print(label["reaction"][0]["reaction_molecule_info"]["mappings"]["num_bonds_total"])
    #print(label["reaction"][0]["reaction_molecule_info"]["mappings"]["num_atoms_total"])
    
    reactions = label["reaction"]
    #for nt, ft in feats.items():
    #    batched_graph.nodes[nt].data.update({"ft": ft})

    #graphs = dgl.unbatch(batched_graph)
    #reaction[0]
    #print(reaction[0]["reaction_graph"])
    #print(reaction[0]["reaction_feature"])

    #print(reactions[0]["mappings"])
    #print(reactions[0]["reaction_graph"])
    #print(reactions[0]["reaction_feature"])

    #print(reactions[0]["reaction_feature"]["global"].shape)
    #print(reactions[0]["reaction_feature"]["bond"].shape)
    #print(reactions[0]["reaction_feature"]["atom"].shape)
    #print(reactions[0]["mappings"])
    #print(reactions[0]["reaction_feature"])
    #print(reactions[0]["reaction_feature"])
    model(
        graph=batched_graph,
        feats=feats,
        reactions=reactions,
        norm_atom=norm_atom,
        norm_bond=norm_bond,
        reverse=False,
    )

NameError: name 'dataloader_normal' is not defined

In [2]:
config =  {"dataset": {
    "log_save_dir": "./model_log/",
    "train_lmdb": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/train/",
    "val_lmdb": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/val/",
    "test_lmdb": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/test/",
    "target_var": "value",
    "overwrite": False,
    "no_splits": False
  },
    "model": {
        "extra_features": ["bond_length"],
        "extra_info": [],
        "debug": False,
        "classifier": False,
        "classif_categories": 3,
        "filter_species": [3, 6],
        "filter_outliers": False,
        "filter_sparse_rxns": False,
        "restore": False,
    },
    "optim": {
    "batch_size": 16,
    "num_devices": 1,
    "num_nodes": 1,
    "num_workers": 1,
    "val_size": 0.15,
    "test_size": 0.1,
    "strategy": "auto",
    "gradient_clip_val": 5.0,
    "accumulate_grad_batches": 3,
    "pin_memory": False,
    "persistent_workers": False
  },
}

In [3]:
dm = BondNetLightningDataModuleLMDB(config)
feature_size, feature_names = dm.prepare_data()
print(feature_size)
config["model"]["in_feats"] = feature_size
dm.setup(stage=None)

config_model = get_defaults()
# update config with model settings
for key, value in config_model["model"].items():
    config["model"][key] = value
for key, value in config_model["model"].items():
    config["model"][key] = value
    
model = load_model_lightning(config["model"], load_dir="./test_lmdb/")

{'atom': 20, 'bond': 8, 'global': 3}
NB: using GatedGCNConv
NB: using Set2SetThenCat
:::NO INITIALIZER USED:::


In [6]:
config

{'dataset': {'log_save_dir': './model_log/',
  'train_lmdb': '/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/train/',
  'val_lmdb': '/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/val/',
  'test_lmdb': '/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/test/',
  'target_var': 'value',
  'overwrite': False,
  'no_splits': False},
 'model': {'extra_features': {'bond': ['bond_length']},
  'extra_info': [],
  'debug': False,
  'classifier': False,
  'classif_categories': 3,
  'filter_species': [3, 5],
  'filter_outliers': True,
  'filter_sparse_rxns': False,
  'restore': False,
  'in_feats': {'atom': 20, 'bond': 8, 'global': 3},
  'conv': 'GatedGCNConv',
  'readout': 'Set2SetThenCat',
  'augment': False,
  'cat_weights': [1.0, 1.0, 1.0],
  'embedding_size': 4,
  'epochs': 100,
  'fc_activation': 'ReLU',
  'fc_batch_norm': False,
  'fc_dropout': 0.2,
  'fc_hidden_size_1': 64,
  'fc_hidden_size_shape': 'flat',
  'fc_num_layers': 

In [4]:

config_temp = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/train/molecule.lmdb"
}
mol = LmdbMoleculeDataset(config=config_temp, transform=TransformMol)

config_temp = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/train/reaction.lmdb"
}

reaction = LmdbReactionDataset(config=config_temp)

##############################################################################

config_temp = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/test/molecule.lmdb"
}
mol_test = LmdbMoleculeDataset(config=config_temp, transform=TransformMol)

config_temp = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/test/reaction.lmdb"
}

reaction_test = LmdbReactionDataset(config=config_temp)

##############################################################################

config_temp = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/val/molecule.lmdb"
}
mol_val = LmdbMoleculeDataset(config=config_temp, transform=TransformMol)

config_temp = {
    "src": "/home/santiagovargas/dev/bondnet/bondnet/scripts/helpers/test_rapter/val/reaction.lmdb"
}

reaction_val = LmdbReactionDataset(config=config_temp)


rxn_ntwk = ReactionLMDB(mol, reaction)
rxn_ntwk_test = ReactionLMDB(mol_test, reaction_test)
rxn_ntwk_val = ReactionLMDB(mol_val, reaction_val)
dataset_test = ReactionDatasetLMDBDataset(rxn_ntwk_test)
dataset_val = ReactionDatasetLMDBDataset(rxn_ntwk_val)
dataset = ReactionDatasetLMDBDataset(rxn_ntwk)



dataloader = DataLoaderReactionLMDB(
    dataset, batch_size=100, shuffle=False, num_workers=4,
)

dataloader_test = DataLoaderReactionLMDB(
    dataset_test, batch_size=len(dataset_test), shuffle=False, num_workers=4
)

dataloader_val = DataLoaderReactionLMDB(
    dataset_val, batch_size=len(dataset_val), shuffle=False, num_workers=4
)



In [5]:


config = {
    "dataset": {
    },
    "model": {
        "extra_features": ["bond_length"],
        "extra_info": [],
        "debug": False,
        "classifier": False,
        "classif_categories": 3,
        "filter_species": [3, 6],
        "filter_outliers": False,
        "filter_sparse_rxns": False,
        "restore": False,
    },
    "optim": {
        "val_size": 0.2,
        "test_size": 0.2,
        "batch_size": 128,
        "num_workers": 4,
    },
}
config_model = get_defaults()
# update config with model settings
for key, value in config_model["model"].items():
    config["model"][key] = value
for key, value in config_model["model"].items():
    config["model"][key] = value
    

config["model"]["in_feats"] = dataset.feature_size
model = load_model_lightning(config["model"], load_dir="./test_lmdb/")

print(dataset.feature_size)

NB: using GatedGCNConv
NB: using Set2SetThenCat
:::NO INITIALIZER USED:::
{'atom': 20, 'bond': 8, 'global': 3}


In [7]:
config_model

{'model': {'conv': 'GatedGCNConv',
  'readout': 'Set2SetThenCat',
  'augment': False,
  'classifier': False,
  'classif_categories': 3,
  'cat_weights': [1.0, 1.0, 1.0],
  'embedding_size': 4,
  'epochs': 100,
  'extra_features': {'bond': ['bond_length']},
  'extra_info': [],
  'filter_species': [3, 5],
  'fc_activation': 'ReLU',
  'fc_batch_norm': False,
  'fc_dropout': 0.2,
  'fc_hidden_size_1': 64,
  'fc_hidden_size_shape': 'flat',
  'fc_num_layers': 1,
  'gated_activation': 'ReLU',
  'gated_batch_norm': False,
  'gated_dropout': 0.1,
  'gated_graph_norm': False,
  'gated_hidden_size_1': 10,
  'gated_hidden_size_shape': 'flat',
  'gated_num_fc_layers': 1,
  'gated_num_layers': 2,
  'gated_residual': True,
  'learning_rate': 0.001,
  'precision': 'bf16',
  'loss': 'mse',
  'num_lstm_iters': 3,
  'num_lstm_layers': 1,
  'restore': False,
  'weight_decay': 0.0,
  'max_epochs': 1000,
  'max_epochs_transfer': 10,
  'transfer': False,
  'filter_outliers': True,
  'freeze': True,
  'reacta

In [5]:
trainer = pl.Trainer(
    max_epochs=20,
    accelerator="gpu",
    devices=[0],
    accumulate_grad_batches=5,
    enable_progress_bar=True,
    gradient_clip_val=1.0,
    enable_checkpointing=True,
    precision=32,
)

#trainer.fit(model, dataloader, dataloader_val)
#trainer.fit(model, dm.train_dataloader(), dm.train_dataloader())
trainer.fit(model, dm)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A5000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

   | Name            | Type              | Params
-------------------------------------------------------
0  | embedding       | UnifySize         | 124   
1  | gated_layers    | ModuleList        | 1.6 K 
2  | readout_layer   | Set2SetThenCat    | 2.6 K 
3  | fc_layers       | ModuleList        | 3.3 K 
4  | loss            | MeanSquaredError  | 0     
5  | train_r2        | R2Score           | 0     
6  | train_torch_l1  | MeanAbsoluteError | 0     
7  | train_torch_mse | MeanSquaredError  | 0     
8  | val_r2          | R2Score           | 0     
9  | val_torch_l1    | MeanAbsoluteError | 0     
10 | val_torch_mse   | MeanSquaredError  | 0     
11 | test_r2         | R2Score           | 0     
12 | test_torch_l1   | MeanAbsoluteError | 0     
13 | test_torch_mse  | MeanSquaredError  | 0     
-------------------------------------------------------
7.6 K     Trainable params
0         Non-trainable params
7.6 K     Total params
0.030     To

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/santiagovargas/anaconda3/envs/bondnet_new/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

/home/santiagovargas/anaconda3/envs/bondnet_new/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 2:  43%|████▎     | 32/75 [00:03<00:05,  8.33it/s, v_num=145, val_loss=0.940, val_r2=-33.0, val_l1=0.718, val_mse=0.907, train_loss=0.965, train_r2=-39.2, train_l1=0.733, train_mse=0.931]

/home/santiagovargas/anaconda3/envs/bondnet_new/lib/python3.11/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
