In [1]:
import os
from schnetpack.datasets import QM9
import schnetpack.transform as trn

import torch

qm9tut = './qm9tut'
if not os.path.exists('qm9tut'):
    os.makedirs(qm9tut)


**NOTE:** Set batch size to 1!

In [2]:
# %rm split.npz

DB_PATH = "./qm9.db"
PROPERTIES = [QM9.homo, QM9.lumo, QM9.Cv, QM9.zpve]
BATCH_SIZE = 1
NUM_TRAIN = 110000
NUM_VALIDATION = 10000
CUTOFF = 5.
N_ATOM_BASIS = 32
T = 3
EPOCHS = 3
LR = 1e-4
NUM_WORKERS = 1
PIN_MEMORY = True

torch.manual_seed(0)

<torch._C.Generator at 0x7f4afa1bf0f0>

In [3]:
import os

models_folder = "./models"
model_dict = {}

for filename in os.listdir(models_folder):
    if filename.endswith(".pt"):
        name = os.path.splitext(filename)[0]
        model_dict[name] = torch.load(os.path.join(models_folder, filename), map_location=torch.device('cpu'))

print(model_dict)


{'zpve_model': NeuralNetworkPotential(
  (postprocessors): ModuleList(
    (0): CastTo64()
  )
  (representation): SchNet(
    (radial_basis): GaussianRBF()
    (cutoff_fn): CosineCutoff()
    (embedding): Embedding(100, 32, padding_idx=0)
    (interactions): ModuleList(
      (0-2): 3 x SchNetInteraction(
        (in2f): Dense(
          in_features=32, out_features=32, bias=False
          (activation): Identity()
        )
        (f2out): Sequential(
          (0): Dense(in_features=32, out_features=32, bias=True)
          (1): Dense(
            in_features=32, out_features=32, bias=True
            (activation): Identity()
          )
        )
        (filter_network): Sequential(
          (0): Dense(in_features=20, out_features=32, bias=True)
          (1): Dense(
            in_features=32, out_features=32, bias=True
            (activation): Identity()
          )
        )
      )
    )
  )
  (input_modules): ModuleList(
    (0): PairwiseDistances()
  )
  (output_modules):

In [4]:
qm9data = QM9(
    DB_PATH,
    batch_size=BATCH_SIZE,
    num_train=NUM_TRAIN,
    num_val=NUM_VALIDATION,
    transforms=[
        trn.ASENeighborList(cutoff=float(CUTOFF)),
        trn.CastTo32()
    ],
    num_workers=NUM_WORKERS,
    split_file=os.path.join(qm9tut, "split.npz"),
    pin_memory=PIN_MEMORY, # set to false, when not using a GPU
    load_properties=PROPERTIES, #only load U0 property
)
qm9data.prepare_data()
qm9data.setup()

In [5]:
from data_handler import QM9DataHandler

dh = QM9DataHandler(qm9data)

dh.fetch_data(PROPERTIES)

100%|██████████| 10000/10000 [01:37<00:00, 102.18it/s]


When data is fetch, you can parse a trained model to the `fetch_model_output`-function. This will return the output of the model for the given data. The output will update the previous `dict`, such that it will contain the following keys:
- `positions`: The positions of the atoms in the molecule
- `atom_numbers`: The atomic numbers in sequence in the given molecule.
- `atom_mask`: A mask to indicate the positions of the chosen atom in the molecule.
- `properties`: The propery values of the molecule.
- `embeddings`: The embeddings of the atoms in the molecule.
- `predictions`: The output of the model for the given molecule.

In [6]:
for name, model in model_dict.items():
    print(f"Fetching outputs for: {name}")
    dh.fetch_model_outputs(model)
    dh.save(name)

Fetching outputs for: zpve_model


100%|██████████| 10000/10000 [00:49<00:00, 201.59it/s]


Fetching outputs for: TLF_Cv_zpve_model


100%|██████████| 10000/10000 [00:45<00:00, 218.82it/s]


Fetching outputs for: TL_lumo_homo_model


100%|██████████| 10000/10000 [00:47<00:00, 211.98it/s]


Fetching outputs for: TL_Cv_zpve_model


100%|██████████| 10000/10000 [00:50<00:00, 199.69it/s]


Fetching outputs for: TL_homo_lumo_model


100%|██████████| 10000/10000 [00:47<00:00, 208.69it/s]


Fetching outputs for: homo_model


100%|██████████| 10000/10000 [00:49<00:00, 200.20it/s]


Fetching outputs for: TLF_zpve_Cv_model


100%|██████████| 10000/10000 [00:47<00:00, 210.97it/s]


Fetching outputs for: TL_zpve_Cv_model


100%|██████████| 10000/10000 [00:51<00:00, 193.91it/s]


Fetching outputs for: Cv_model


100%|██████████| 10000/10000 [00:50<00:00, 196.86it/s]


Fetching outputs for: homo_lumo_model


100%|██████████| 10000/10000 [00:51<00:00, 192.73it/s]


Fetching outputs for: lumo_model


100%|██████████| 10000/10000 [00:51<00:00, 193.38it/s]


Fetching outputs for: Cv_zpve_model


100%|██████████| 10000/10000 [00:54<00:00, 184.26it/s]


Fetching outputs for: TLF_lumo_homo_model


100%|██████████| 10000/10000 [00:49<00:00, 201.69it/s]


Fetching outputs for: TLF_homo_lumo_model


100%|██████████| 10000/10000 [00:51<00:00, 194.56it/s]


In [13]:
import pickle

data_folder = os.path.join(os.getcwd(), "data")
files = os.listdir(data_folder)
file_paths = [os.path.join(data_folder, basename) for basename in files]

for data_file in file_paths:
    with open(data_file, "rb") as file:
        loaded_data = pickle.load(file)

    # Print the loaded data
    print(data_file)
    print(loaded_data[3230]["properties"])
    print(loaded_data[3230]["predictions"])
    print()


/home/aimas/dtu/dl/DeepLearningProject/data_handling/data/outputs_TLF_lumo_homo_model.pkl
[-0.252       0.0542     36.32600021  0.180738  ]
[-0.24687959253787994 None None None]

/home/aimas/dtu/dl/DeepLearningProject/data_handling/data/outputs_TL_lumo_homo_model.pkl
[-0.252       0.0542     36.32600021  0.180738  ]
[-0.24681030213832855 None None None]

/home/aimas/dtu/dl/DeepLearningProject/data_handling/data/outputs_TL_Cv_zpve_model.pkl
[-0.252       0.0542     36.32600021  0.180738  ]
[None None None 0.18087434768676758]

/home/aimas/dtu/dl/DeepLearningProject/data_handling/data/outputs_homo_lumo_model.pkl
[-0.252       0.0542     36.32600021  0.180738  ]
[-0.23666533827781677 0.05192480608820915 None None]

/home/aimas/dtu/dl/DeepLearningProject/data_handling/data/outputs_zpve_model.pkl
[-0.252       0.0542     36.32600021  0.180738  ]
[None None None 0.18036428093910217]

/home/aimas/dtu/dl/DeepLearningProject/data_handling/data/outputs_Cv_zpve_model.pkl
[-0.252       0.0542     