Load packages

In [1]:
import os
import schnetpack as spk
from schnetpack.datasets import QM9
import schnetpack.transform as trn

import torch
import torchmetrics
import pytorch_lightning as pl

qm9tut = './qm9tut'
if not os.path.exists('qm9tut'):
    os.makedirs(qm9tut)

Load the data

In [2]:
PROPERTY = QM9.homo
PROPERTIES = [QM9.homo, QM9.lumo, QM9.zpve, QM9.Cv]

In [3]:
%rm split.npz

qm9data = QM9(
    './qm9.db', 
    batch_size=100,
    num_train=110000,
    num_val=10000,
    transforms=[
        trn.ASENeighborList(cutoff=5.),
        # trn.RemoveOffsets(PROPERTY, remove_mean=True, remove_atomrefs=True), #remove for homo and lumo
        trn.CastTo32()
    ],
    num_workers=2,
    split_file=os.path.join(qm9tut, "split.npz"),
    pin_memory=True, # set to false, when not using a GPU
    load_properties=[PROPERTY] #only load relevant properties
)
qm9data.prepare_data()
qm9data.setup()

rm: cannot remove 'split.npz': No such file or directory


INFO:root:Downloading GDB-9 atom references...
INFO:root:Done.
INFO:root:Downloading GDB-9 data...
INFO:root:Done.
INFO:root:Extracting files...
INFO:root:Done.
INFO:root:Parse xyz files...
100%|██████████| 133885/133885 [01:53<00:00, 1175.01it/s]
INFO:root:Write atoms to db...
INFO:root:Done.


In [4]:
# get all properties in db
for property, value in qm9data.dataset[0].items():
    print(property, value.size)

_idx <built-in method size of Tensor object at 0x7fcac70c9170>
homo <built-in method size of Tensor object at 0x7fcac70c93f0>
_n_atoms <built-in method size of Tensor object at 0x7fcac70c9710>
_atomic_numbers <built-in method size of Tensor object at 0x7fcac70c9210>
_positions <built-in method size of Tensor object at 0x7fcac70c9440>
_cell <built-in method size of Tensor object at 0x7fcac70c92b0>
_pbc <built-in method size of Tensor object at 0x7fcac70c9350>


Setup the model

In [5]:
cutoff = 5.
n_atom_basis = 30

pairwise_distance = spk.atomistic.PairwiseDistances() # calculates pairwise distances between atoms
radial_basis = spk.nn.GaussianRBF(n_rbf=20, cutoff=cutoff)
schnet = spk.representation.SchNet(
    n_atom_basis=n_atom_basis, n_interactions=3,
    radial_basis=radial_basis,
    cutoff_fn=spk.nn.CosineCutoff(cutoff)
)
pred_property = spk.atomistic.Atomwise(n_in=n_atom_basis, output_key=PROPERTY)

nnpot = spk.model.NeuralNetworkPotential(
    representation=schnet,
    input_modules=[pairwise_distance],
    output_modules=[pred_property],
    postprocessors=[trn.CastTo64()] #, trn.AddOffsets(PROPERTY, add_mean=True, add_atomrefs=True)] #remove AddOffsets for homo and lumo
)

output = spk.task.ModelOutput(
    name=PROPERTY,
    loss_fn=torch.nn.MSELoss(),
    loss_weight=1.,
    metrics={
        "MAE": torchmetrics.MeanAbsoluteError()
    }
)

task = spk.task.AtomisticTask(
    model=nnpot,
    outputs=[output],
    optimizer_cls=torch.optim.AdamW,
    optimizer_args={"lr": 1e-4}
)

/home/oda/Desktop/DeepLearningProject/.venv/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:198: Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.


In [6]:
logger = pl.loggers.TensorBoardLogger(save_dir=qm9tut)
callbacks = [
    spk.train.ModelCheckpoint(
        model_path=os.path.join(qm9tut, "best_inference_model"),
        save_top_k=1,
        monitor="val_loss"
    )
]

trainer = pl.Trainer(
    callbacks=callbacks,
    logger=logger,
    default_root_dir=qm9tut,
    max_epochs=1, # for testing, we restrict the number of epochs
)
trainer.fit(task, datamodule=qm9data)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: ./qm9tut/lightning_logs

  | Name    | Type                   | Params
---------------------------------------------------
0 | model   | NeuralNetworkPotential | 16.4 K
1 | outputs | ModuleList             | 0     
---------------------------------------------------
16.4 K    Trainable params
0         Non-trainable params
16.4 K    Total params
0.066     Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:01<00:00,  1.15it/s]

/home/oda/Desktop/DeepLearningProject/.venv/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 100. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Epoch 0:  63%|██████▎   | 696/1100 [12:08<07:02,  0.96it/s, v_num=0]       

/home/oda/Desktop/DeepLearningProject/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
