In [1]:
# model dir
import os

model_dir = "../../save_models/qm9/1027/"
from qtaim_embed.models.graph_level.base_gcn import GCNGraphPred
from qtaim_embed.models.utils import load_graph_level_model_from_config
from qtaim_embed.utils.data import get_default_graph_level_config

In [2]:
model_list = os.listdir(model_dir)
model_list = [os.path.join(model_dir, model) for model in model_list]
model_list_ordered = sorted(
    model_list, key=lambda x: int(x.split("=")[-1].split(".")[0])
)
config = get_default_graph_level_config()
for model_path in model_list_ordered:
    config["restore"] = True
    # model_path = "./top_models/model_lightning_epoch=208-val_l1=1.94.ckpt"
    config["restore_path"] = model_path
    model_restart = load_graph_level_model_from_config(config)
    # load model to gpu
    model_restart.cuda()
    model_list.append(model_restart)

:::RESTORING MODEL FROM EXISTING FILE:::


  rank_zero_warn(


TypeError: attribute name must be string, not 'ReLU'

In [3]:
import wandb, argparse, torch, json
import numpy as np
from copy import deepcopy

import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
from pytorch_lightning.callbacks import (
    LearningRateMonitor,
    EarlyStopping,
    ModelCheckpoint,
)
from qtaim_embed.core.datamodule import QTAIMGraphTaskDataModule
from qtaim_embed.models.utils import LogParameters, load_graph_level_model_from_config
from qtaim_embed.utils.data import get_default_graph_level_config


torch.set_float32_matmul_precision("high")  # might have to disable on older GPUs
torch.multiprocessing.set_sharing_strategy("file_system")


on_gpu = bool(True)
debug = bool(False)
project_name = "qm9_eval"
dataset_loc = "../../../data/qm9_qtaim_1025_labelled.pkl"
log_save_dir = "./qm9_eval/"
config = None
if config is None:
    config = get_default_graph_level_config()

config["dataset"]["log_scale_features"] = True
config["dataset"]["standard_scale_features"] = True
config["dataset"]["standard_scale_targets"] = True
config["dataset"]["target_list"] = ["u0"]
config["dataset"]["train_batch_size"] = 512
config["dataset"]["extra_keys"] = {
    "atom": ["extra_feat_atom_esp_total"],
    "bond": [
        "extra_feat_bond_esp_total",
        "bond_length",
    ],
    "global": ["u0"],
}

config["model"] = {
    "n_conv_layers": 4,
    "resid_n_graph_convs": 1,
    "conv_fn": "GraphConvDropoutBatch",
    "global_pooling_fn": "SumPoolingThenCat",
    "dropout": 0.2,
    "batch_norm": True,
    "activation": "ReLU",
    "bias": True,
    "norm": "both",
    "aggregate": "sum",
    "lr": 0.0002,
    "scheduler_name": "reduce_on_plateau",
    "weight_decay": 0.00001,
    "lr_plateau_patience": 50,
    "lr_scale_factor": 0.75,
    "loss_fn": "mse",
    "embedding_size": 50,
    # "fc_layer_size": [256, 128],
    "shape_fc": "cone",
    "fc_hidden_size_1": 512,
    "fc_num_layers": 1,
    "fc_dropout": 0.2,
    "fc_batch_norm": True,
    "lstm_iters": 3,
    "lstm_layers": 2,
    "output_dims": 1,
    "pooling_ntypes": ["atom", "bond", "global"],
    "pooling_ntypes_direct": ["global"],
    "restore": False,
    "max_epochs": 10,
}

if config["optim"]["precision"] == "16" or config["optim"]["precision"] == "32":
    config["optim"]["precision"] = int(config["optim"]["precision"])

# set log save dir
config["dataset"]["log_save_dir"] = log_save_dir

# dataset
if dataset_loc is not None:
    config["dataset"]["train_dataset_loc"] = dataset_loc
extra_keys = config["dataset"]["extra_keys"]

if debug:
    config["dataset"]["debug"] = debug
print(">" * 40 + "config_settings" + "<" * 40)

# for k, v in config.items():
#    print("{}\t\t\t{}".format(str(k).ljust(20), str(v).ljust(20)))
dm = QTAIMGraphTaskDataModule(config=config)

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>config_settings<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<


In [4]:
feature_names, feature_size = dm.prepare_data(stage="fit")
config["model"]["atom_feature_size"] = feature_size["atom"]
config["model"]["bond_feature_size"] = feature_size["bond"]
config["model"]["global_feature_size"] = feature_size["global"]
config["model"]["target_dict"] = {}
config["model"]["target_dict"]["global"] = config["dataset"]["target_list"]
# config["dataset"]["feature_names"] = feature_names

print(">" * 40 + "config_settings" + "<" * 40)
for k, v in config.items():
    print("{}\t\t\t{}".format(str(k).ljust(20), str(v).ljust(20)))

print(">" * 40 + "config_settings" + "<" * 40)

model = load_graph_level_model_from_config(config["model"])
print("model constructed!")

with wandb.init(project=project_name) as run:
    log_parameters = LogParameters()
    logger_tb = TensorBoardLogger(config["dataset"]["log_save_dir"], name="test_logs")
    logger_wb = WandbLogger(project=project_name, name="test_logs")
    lr_monitor = LearningRateMonitor(logging_interval="step")

    checkpoint_callback = ModelCheckpoint(
        dirpath=config["dataset"]["log_save_dir"],
        filename="model_lightning_{epoch:02d}-{val_mae:.2f}",
        monitor="val_mae",
        mode="min",
        auto_insert_metric_name=True,
        save_last=True,
    )

    early_stopping_callback = EarlyStopping(
        monitor="val_mae", min_delta=0.00, patience=200, verbose=False, mode="min"
    )

    trainer = pl.Trainer(
        max_epochs=config["model"]["max_epochs"],
        accelerator="gpu",
        devices=config["optim"]["num_devices"],
        num_nodes=config["optim"]["num_nodes"],
        gradient_clip_val=config["optim"]["gradient_clip_val"],
        accumulate_grad_batches=config["optim"]["accumulate_grad_batches"],
        enable_progress_bar=True,
        callbacks=[
            early_stopping_callback,
            lr_monitor,
            log_parameters,
            checkpoint_callback,
        ],
        enable_checkpointing=True,
        strategy=config["optim"]["strategy"],
        default_root_dir=config["dataset"]["log_save_dir"],
        logger=[logger_tb, logger_wb],
        precision=config["optim"]["precision"],
    )

    trainer.fit(model, dm)
    trainer.test(model, dm)
run.finish()

... > creating MoleculeWrapper objects


100%|██████████| 133848/133848 [00:11<00:00, 11959.49it/s]


... > bond_feats_error_count:  0
... > atom_feats_error_count:  0
element set {'O', 'F', 'H', 'C', 'N'}
selected atomic keys ['extra_feat_atom_esp_total']
selected bond keys ['extra_feat_bond_esp_total', 'bond_length']
selected global keys ['u0']
... > Building graphs and featurizing


100%|██████████| 133848/133848 [03:57<00:00, 564.52it/s]


included in labels
{'global': ['u0']}
included in graph features
{'atom': ['total_degree', 'total_H', 'is_in_ring', 'ring_size_3', 'ring_size_4', 'ring_size_5', 'ring_size_6', 'ring_size_7', 'chemical_symbol_O', 'chemical_symbol_F', 'chemical_symbol_H', 'chemical_symbol_C', 'chemical_symbol_N', 'extra_feat_atom_esp_total'], 'bond': ['metal bond', 'ring inclusion', 'ring size_3', 'ring size_4', 'ring size_5', 'ring size_6', 'ring size_7', 'bond_length', 'extra_feat_bond_esp_total'], 'global': ['num atoms', 'num bonds', 'molecule weight']}
original loader node types: dict_keys(['atom', 'bond', 'global'])
original loader label types: dict_keys([])
include names:  dict_keys(['global'])
... > parsing labels and features in graphs


100%|██████████| 133848/133848 [00:04<00:00, 33130.61it/s]


original loader node types: dict_keys(['atom', 'bond', 'global'])
original loader label types: dict_keys(['global'])
... > Log scaling features
... > Log scaling features complete
... > Scaling features
mean [7.15019050e-01 2.09609839e-01 4.42638150e-02 5.92276711e-03
 1.63601755e-02 1.57111131e-02 5.19192391e-03 1.07783534e-03
 5.41201195e-02 9.54300379e-04 3.54128949e-01 2.43707123e-01
 4.02366904e-02 8.24172281e+00]
std [0.6036383  0.37273362 0.16947582 0.06379867 0.10522525 0.10316625
 0.05976463 0.02731184 0.18596833 0.02570136 0.34649123 0.3309558
 0.16208318 5.5485659 ]
mean [0.         0.05763829 0.00770921 0.02136993 0.02054987 0.00689768
 0.00150661 0.89991691 0.68164747]
std [0.         0.19138873 0.07269242 0.11981584 0.11756609 0.06880064
 0.0322805  0.37276119 0.22762408]
Standard deviation for feature 0 is 0.0, smaller than 0.001. You may want to exclude this feature.
mean [2.93093013 2.69683748 4.81626082]
std [0.16152484 0.13399224 0.06767423]
... > Scaling features co

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


model constructed!


[34m[1mwandb[0m: Currently logged in as: [33msanti[0m ([33mhydro_homies[0m). Use [1m`wandb login --relogin`[0m to force relogin


  rank_zero_warn(
  rank_zero_warn(
Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

   | Name            | Type               | Params
--------------------------------------------------------
0  | embedding       | UnifySize          | 1.3 K 
1  | conv_layers     | ModuleList         | 95.4 K
2  | readout         | SumPoolingThenCat  | 0     
3  | loss            | MultioutputWrapper | 0     
4  | fc_layers       | ModuleList         | 78.8 K
5  | train_r2        | MultioutputWrapper | 0     
6  | train_torch_l1  | MultioutputWrapper | 0     
7  | train_torch_mse | MultioutputWrapper | 0     
8  | val_r2          | MultioutputWrapper | 0     
9  | val_torch_l1    | MultioutputWrapper | 0     
10 | val_torch_mse   | Mu

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]



Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



VBox(children=(Label(value='0.006 MB of 0.028 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.226765…

0,1
epoch,▁▁▂▂▂▂▃▃▄▄▅▅▅▅▆▆▇▇▇▇█
lr-Adam,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_loss,▁
test_mae,▁
test_mse,▁
test_r2,▁
train_loss,█▃▂▂▂▁▁▁▁▁
train_mae,█▄▃▃▂▂▂▁▁▁
train_mse,█▄▃▃▂▂▂▁▁▁
train_r2,▁▆▇▇▇█████

0,1
epoch,10.0
lr-Adam,0.0002
test_loss,0.005
test_mae,0.04592
test_mse,0.07073
test_r2,0.99498
train_loss,0.03301
train_mae,0.12893
train_mse,0.18169
train_r2,0.96699


In [17]:
import dgl

dl_test = dm.val_dataloader()
r2_list, mae_list, mse_list = [], [], []
# iterate over batches
for batch in dl_test:
    print("batch: ", len(batch))
    # batch_graphs
    batch_graphs = batch[0]
    
    batch_label = batch_graphs.ndata["labels"]
    scaler_list = dm.test_dataset.dataset.label_scalers
    r2_val, mae_val, mse_val = model.evaluate_manually(
        batch_graphs, batch_label, scaler_list
    )
    r2_list.append(r2_val)
    mae_list.append(mae_val)
    mse_list.append(mse_val)

print("r2: ", np.mean(r2_list))
print("mae: ", np.mean(mae_list))
print("mse: ", np.mean(mse_list))

batch:  2
r2:  0.9751056
mae:  4.5093374
mse:  6.3279834


In [14]:
mae_list

[tensor(4.5498)]

In [18]:
import torch_geometric

ModuleNotFoundError: No module named 'torch_geometric'

In [19]:
from dgl.data import QM9Dataset

In [23]:
QM9Dataset(label_keys=["u0"],transform=None)

Downloading /home/santiagovargas/.dgl/qm9_eV.npz from https://data.dgl.ai/dataset/qm9_eV.npz...


KeyError: 'u0 is not a file in the archive'