In [1]:
# baseline GNN model for node-level regression

import pytorch_lightning as pl
from qtaim_embed.models.graph_level.base_gcn import GCNGraphPred
from qtaim_embed.utils.data import get_default_graph_level_config
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
from pytorch_lightning.callbacks import (
    LearningRateMonitor,
    EarlyStopping,
    ModelCheckpoint,
)

from qtaim_embed.models.utils import load_graph_level_model_from_config
from qtaim_embed.core.datamodule import QTAIMGraphTaskDataModule

In [8]:
config = get_default_graph_level_config()
config["dataset"]["log_scale_features"] = True
config["dataset"]["log_scale_targets"] = False
config["dataset"]["standard_scale_features"] = True
config["dataset"]["standard_scale_targets"] = True
config["dataset"]["debug"] = True
config["dataset"][
    "train_dataset_loc"
] = "/home/santiagovargas/dev/qtaim_embed/data/xyz_qm8/molecules_qtaim_labelled.pkl"

In [18]:
print(config["dataset"]["target_list"])

['extra_feat_global_E1_CAM']


In [10]:
from qtaim_embed.core.datamodule import QTAIMGraphTaskDataModule

dm = QTAIMGraphTaskDataModule(
    config=config,
)
feature_names, feature_size = dm.prepare_data(stage="fit")
config["model"]["atom_feature_size"] = feature_size["atom"]
config["model"]["bond_feature_size"] = feature_size["bond"]
config["model"]["global_feature_size"] = feature_size["global"]
config["model"]["target_dict"] = {}
config["model"]["restore"] = False
config["model"]["target_dict"]["global"] = config["dataset"]["target_list"]

... > running in debug mode
... > creating MoleculeWrapper objects


100%|██████████| 100/100 [00:00<00:00, 10518.37it/s]


... > bond_feats_error_count:  0
... > atom_feats_error_count:  0
element set {'N', 'H', 'O', 'C'}
selected atomic keys ['extra_feat_atom_esp_total']
selected bond keys ['extra_feat_bond_esp_total', 'bond_length']
selected global keys ['extra_feat_global_E1_CAM']
... > Building graphs and featurizing


100%|██████████| 100/100 [00:00<00:00, 330.07it/s]


included in labels
{'global': ['extra_feat_global_E1_CAM']}
included in graph features
{'atom': ['total_degree', 'total_H', 'is_in_ring', 'ring_size_3', 'ring_size_4', 'ring_size_5', 'ring_size_6', 'ring_size_7', 'chemical_symbol_N', 'chemical_symbol_H', 'chemical_symbol_O', 'chemical_symbol_C', 'extra_feat_atom_esp_total'], 'bond': ['metal bond', 'ring inclusion', 'ring size_3', 'ring size_4', 'ring size_5', 'ring size_6', 'ring size_7', 'bond_length', 'extra_feat_bond_esp_total'], 'global': ['num atoms', 'num bonds', 'molecule weight']}
original loader node types: dict_keys(['atom', 'bond', 'global'])
original loader label types: dict_keys([])
include names:  dict_keys(['global'])
... > parsing labels and features in graphs


100%|██████████| 100/100 [00:00<00:00, 33423.41it/s]


original loader node types: dict_keys(['atom', 'bond', 'global'])
original loader label types: dict_keys(['global'])
... > Log scaling features
... > Log scaling features complete
... > Scaling features
mean [1.03348744 0.29579238 0.1931437  0.02300363 0.03993083 0.06814284
 0.04296905 0.01909736 0.04557323 0.35547122 0.05555594 0.23654678
 9.03963532]
std [0.3914866  0.46054609 0.3107612  0.12416012 0.16150379 0.2063724
 0.16714525 0.11345734 0.17179068 0.34645936 0.18820728 0.32864473
 5.66985947]
mean [0.         0.19579709 0.02420918 0.04417114 0.07007922 0.04587003
 0.0208114  0.81517279 0.66599706]
std [0.         0.31205721 0.12725739 0.16931041 0.2089596  0.17230968
 0.11828885 0.09367724 0.18940919]
Standard deviation for feature 0 is 0.0, smaller than 0.001. You may want to exclude this feature.
mean [2.81851833 2.8375313  4.69764359]
std [0.16318695 0.17198598 0.06353343]
... > Scaling features complete
... > feature mean(s): 
 {'atom': tensor([1.0335, 0.2958, 0.1931, 0.0230

In [13]:
model = load_graph_level_model_from_config(config["model"])

:::REGRESSION MODEL:::


In [14]:
import torch

torch.set_float32_matmul_precision("high")
lr_monitor = LearningRateMonitor(logging_interval="step")

trainer_transfer = pl.Trainer(
    max_epochs=10,
    accelerator="gpu",
    devices=1,
    enable_progress_bar=True,
    gradient_clip_val=3.0,
    default_root_dir="./test/",
    precision="32",
    log_every_n_steps=10,
    enable_checkpointing=True,
)

# move model to gpu
# model = model.cuda()

trainer_transfer.fit(model, dm)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

   | Name            | Type               | Params
--------------------------------------------------------
0  | embedding       | UnifySize          | 500   
1  | conv_layers     | ModuleList         | 29.0 K
2  | readout         | SumPoolingThenCat  | 0     
3  | loss            | MultioutputWrapper | 0     
4  | fc_layers       | ModuleList         | 52.9 K
5  | train_r2        | MultioutputWrapper | 0     
6  | train_torch_l1  | MultioutputWrapper | 0     
7  | train_torch_mse | MultioutputWrapper | 0     
8  | val_r2          | MultioutputWrapper | 0     
9  | val_torch_l1    | MultioutputWrapper | 0     
10 | val_torch_mse   | MultioutputWrapper | 0     
11 | test_r2         | MultioutputWrapper | 0     
12 | test_torch_l1   | MultioutputWrapper | 0     
13 | test_torch_mse  | Multioutp

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
