In [55]:
import random
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
from rdkit import Chem
from pathlib import Path
import torch.nn.functional as F
from torch.utils.data import Subset
from torch_geometric.data import Data
from rdkit.Chem import SDMolSupplier
from torch_geometric.data import InMemoryDataset
from torch_geometric.loader import DataLoader
from sklearn.metrics import mean_squared_error, r2_score
from torch_geometric.nn import SAGEConv, global_mean_pool

In [73]:
SEED = 42

In [68]:
class BaselineGNN(nn.Module):
    def __init__(self, in_channels, hidden_channels=64, num_layers=2):
        super().__init__()
        self.convs = nn.ModuleList()
        self.bns = nn.ModuleList()

        self.convs.append(SAGEConv(in_channels, hidden_channels))
        self.bns.append(nn.BatchNorm1d(hidden_channels))
        for i in range(num_layers-1):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
            self.bns.append(nn.BatchNorm1d(hidden_channels))

        self.pool = global_mean_pool
        
        self.head = nn.Sequential(
            nn.Linear(hidden_channels, hidden_channels//2),
            nn.ReLU(),
            nn.Linear(hidden_channels//2, 1)
        )

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        for conv, bn in zip(self.convs, self.bns):
            x = conv(x, edge_index)
            x = bn(x)
            x = F.relu(x)
        batch = data.batch if hasattr(data, "batch") else torch.zeros(x.size(0), dtype=torch.long, device=x.device)
        h = self.pool(x, batch)
        out = self.head(h).squeeze(-1)
        return out


In [95]:
COMMON_ATOMS = ["H", "C", "N", "O", "F", "P", "S", "Cl", "Br", "I", "B", "Si", "Se"]
ATOM_MAP = {a: i for i, a in enumerate(COMMON_ATOMS)}

HYBRIDIZATION_MAP = {
    Chem.rdchem.HybridizationType.SP: 0,
    Chem.rdchem.HybridizationType.SP2: 1,
    Chem.rdchem.HybridizationType.SP3: 2,
    Chem.rdchem.HybridizationType.SP3D: 3,
    Chem.rdchem.HybridizationType.SP3D2: 4
}

def one_hot(x, choices):
    out = [0]*len(choices)
    if x in choices:
        out[choices.index(x)] = 1
    return out

def atom_to_feature_vector(atom):
    symbol = atom.GetSymbol()
    atom_onehot = one_hot(symbol, COMMON_ATOMS)
    degree = atom.GetDegree()  # int
    formal_charge = atom.GetFormalCharge()
    num_hs = atom.GetTotalNumHs()
    aromatic = 1 if atom.GetIsAromatic() else 0
    hybrid = HYBRIDIZATION_MAP.get(atom.GetHybridization(), -1)
    hybrid_oh = one_hot(hybrid, list(range(len(HYBRIDIZATION_MAP)+1)))  # include -1 as index 0?
    vec = atom_onehot + [degree, formal_charge, num_hs, aromatic] + hybrid_oh
    return np.array(vec, dtype=np.float32)

def featurize_rdkit_mol(mol: Chem.Mol, use_explicit_hs: bool = True):
    if mol is None:
        return None

    if use_explicit_hs:
        try:
            mol = Chem.AddHs(mol)
        except Exception:
            pass

    num_atoms = mol.GetNumAtoms()
    node_feats = np.vstack([atom_to_feature_vector(a) for a in mol.GetAtoms()]) if num_atoms > 0 else np.zeros((0, len(COMMON_ATOMS)+5+len(HYBRIDIZATION_MAP)+1), dtype=np.float32)

    edges = []
    edge_attrs = []
    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()
        bt = bond.GetBondType()
        is_aromatic = 1 if bond.GetIsAromatic() else 0
        bt_oh = [
            1 if bt == Chem.rdchem.BondType.SINGLE else 0,
            1 if bt == Chem.rdchem.BondType.DOUBLE else 0,
            1 if bt == Chem.rdchem.BondType.TRIPLE else 0,
            1 if bt == Chem.rdchem.BondType.AROMATIC else 0,
        ]
        edge_attr = bt_oh + [is_aromatic]
        edges.append((i, j))
        edges.append((j, i))
        edge_attrs.append(edge_attr)
        edge_attrs.append(edge_attr)

    if len(edges) > 0:
        edge_index = np.array(edges, dtype=np.int64).T  # [2, E]
        edge_attr = np.array(edge_attrs, dtype=np.float32)
    else:
        edge_index = np.zeros((2,0), dtype=np.int64)
        edge_attr = np.zeros((0,5), dtype=np.float32)

    pos = np.zeros((num_atoms, 3), dtype=np.float32)
    if mol.GetNumConformers() > 0:
        conf = mol.GetConformer(0)
        for i in range(num_atoms):
            p = conf.GetAtomPosition(i)
            pos[i] = [p.x, p.y, p.z]
    else:
        try:
            AllChem.EmbedMolecule(mol, randomSeed=0xf00d)
            conf = mol.GetConformer()
            for i in range(num_atoms):
                p = conf.GetAtomPosition(i)
                pos[i] = [p.x, p.y, p.z]
        except Exception:
            pass

    node_feats = torch.tensor(node_feats)
    edge_index = torch.tensor(edge_index, dtype=torch.long)
    edge_attr = torch.tensor(edge_attr)
    pos = torch.tensor(pos, dtype=torch.float32)

    return {
        "x": node_feats,
        "edge_index": edge_index,
        "edge_attr": edge_attr,
        "pos": pos
    }

In [96]:
class PDBBindLigandDataset(InMemoryDataset):
    def __init__(self, metadata_csv="data/processed/refined_dataset_metadata.csv", 
                 root="data/processed/graphs", transform=None, 
                 pre_transform=None, force_rebuild=False):
        self.metadata_csv = Path(metadata_csv)
        self.root = Path(root)
        self.root.mkdir(parents=True, exist_ok=True)
        self.cache_file = self.root / "dataset.pt"
        self.force_rebuild = force_rebuild
        super().__init__(self.root, transform, pre_transform)
        if self.cache_file.exists() and not self.force_rebuild:
            self.data, self.slices = torch.load(self.cache_file)
        else:
            self.process()
            self.data, self.slices = torch.load(self.cache_file)

    @property
    def raw_file_names(self):
        return [self.metadata_csv.name]

    @property
    def processed_file_names(self):
        return [self.cache_file.name]

    def process(self):
        df = pd.read_csv(self.metadata_csv)
        data_list = []
        skipped = 0
        for idx, row in df.iterrows():
            cid = row["complex_id"]
            ligand_path = Path(row["ligand_file"])
            affinity = float(row["affinity"])
            if not ligand_path.exists():
                skipped += 1
                continue
                
            mol = None
            if ligand_path.suffix.lower() == ".sdf":
                supplier = SDMolSupplier(str(ligand_path), removeHs=False)
                mol = supplier[0]
            else:
                mol = Chem.MolFromMolFile(str(ligand_path), removeHs=False)

            if mol is None:
                skipped += 1
                continue

            feats = featurize_rdkit_mol(mol)
            if feats is None:
                skipped += 1
                continue

            data = Data(x = feats["x"], edge_index = feats["edge_index"],
                edge_attr = feats["edge_attr"], pos = feats["pos"] if feats["pos"].shape[0] > 0 else None,
                y = torch.tensor([affinity], dtype=torch.float32), complex_id = cid)
            data_list.append(data)

        print(f"done processing. total processed: {len(data_list)}. skipped: {skipped}")
        if len(data_list) == 0:
            raise RuntimeError("no data processed")

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.cache_file)
        print(f"saved processed dataset to {self.cache_file}")

    def get(self, idx):
        return super().get(idx)

    def len(self):
        return super().len()


In [97]:
def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

In [98]:
def scaffold_split(dataset, frac_train=0.8, frac_val=0.1, frac_test=0.1, seed=SEED):
    num = len(dataset)
    idx = list(range(num))
    random.Random(seed).shuffle(idx)
    ntrain = int(frac_train * num)
    nval = int(frac_val * num)
    train_idx = idx[:ntrain]
    val_idx = idx[ntrain:ntrain+nval]
    test_idx = idx[ntrain+nval:]
    return train_idx, val_idx, test_idx

In [99]:
dataset = PDBBindLigandDataset(metadata_csv="data/processed/refined_dataset_metadata.csv", 
                                   root="data/processed/graphs", force_rebuild=False)

scaffold_split(dataset)

Processing...
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13
[11:28:36] ERROR: Could not sanitize molecule ending on line 52
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
[11:28:36] ERROR: Could not sanitize molecule ending on line 81
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15
[11:28:36] ERROR: Could not sanitize molecule ending on line 75
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:36] ERROR: Could not sanitize molecule ending on line 42
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 4 10 11 12 13
[11:28:36] ERROR: Could not sanitize molecule ending on line 60
[11:28:36] ERROR: Can't kekulize mol.  Un

[11:28:36] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8 9 10
[11:28:36] ERROR: Could not sanitize molecule ending on line 36
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8 9 10
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8 9 10
[11:28:36] ERROR: Could not sanitize molecule ending on line 44
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 5 6 7 8 9 10
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11
[11:28:36] ERROR: Could not sanitize molecule ending on line 65
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 0 1 3 4 5 6 7 9 10
[11:28:36] ERROR: Could not sanitize molecule ending on line 34
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 3 4 5 6 7 9 10
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 26
[11:28:36] ERROR: Could not sanitize molecule ending on line 107
[11:28:36] ERROR: Can't kekulize mol.  

[11:28:36] Can't kekulize mol.  Unkekulized atoms: 21 22 23 24 25 26 27 28 29
[11:28:36] ERROR: Could not sanitize molecule ending on line 110
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 21 22 23 24 25 26 27 28 29
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15
[11:28:36] ERROR: Could not sanitize molecule ending on line 66
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 0 1 9 10 11 12 13 14 15
[11:28:36] ERROR: Could not sanitize molecule ending on line 63
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 9 10 11 12 13 14 15
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[11:28:36] ERROR: Could not sanitize molecule ending on line 67
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 21 23 24 25 26
[11:28:36] ERROR: Could not sanitize molecule ending on line 117
[11:

[11:28:36] Can't kekulize mol.  Unkekulized atoms: 0 1 4 5 7 9 10 11 14
[11:28:36] ERROR: Could not sanitize molecule ending on line 99
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 4 5 7 9 10 11 14
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18
[11:28:36] ERROR: Could not sanitize molecule ending on line 196
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10
[11:28:36] ERROR: Could not sanitize molecule ending on line 47
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 6 7 8 9 10
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
[11:28:36] ERROR: Could not sanitize molecule ending on line 101
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
[11:28:36] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9
[11:28:36] ERROR: Could not sanitize molecule ending on line 45
[11:28:36] ERROR: Can't kekulize mol.  Unkekulized atoms:

[11:28:37] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
[11:28:37] ERROR: Could not sanitize molecule ending on line 91
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 10 11 12 13
[11:28:37] ERROR: Could not sanitize molecule ending on line 57
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 10 11 12 13
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 10 11 12 13
[11:28:37] ERROR: Could not sanitize molecule ending on line 69
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 10 11 12 13
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9
[11:28:37] ERROR: Could not sanitize molecule ending on line 67
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:37] ERROR: Could not sanitize molecule ending on line 99
[11:28:37] ERROR: Can

[11:28:37] Can't kekulize mol.  Unkekulized atoms: 8 12 13 14 15
[11:28:37] ERROR: Could not sanitize molecule ending on line 76
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 8 12 13 14 15
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:37] ERROR: Could not sanitize molecule ending on line 101
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11
[11:28:37] ERROR: Could not sanitize molecule ending on line 139
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[11:28:37] ERROR: Could not sanitize molecule ending on line 116
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
[11:28:37] ERROR: Could not sanitize molecule ending on line 72
[11:28:37] ERROR: Can't kekulize mol.  Unke

[11:28:37] Can't kekulize mol.  Unkekulized atoms: 0 1 2 8 11
[11:28:37] ERROR: Could not sanitize molecule ending on line 56
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 8 11
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9 10 11 12 13
[11:28:37] ERROR: Could not sanitize molecule ending on line 94
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9 10 11 12 13
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 0 1 2 8 9 10 11 12 13
[11:28:37] ERROR: Could not sanitize molecule ending on line 47
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 8 9 10 11 12 13
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 2 4 5 7 8 9 10 11 12
[11:28:37] ERROR: Could not sanitize molecule ending on line 44
[11:28:37] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 4 5 7 8 9 10 11 12
[11:28:37] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 9 11 17 18
[11:28:37] ERROR: Could not sanitize molecule ending on line 63
[11:28:37] ERROR: Ca

[11:28:38] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 11 17 18 19 20 21
[11:28:38] ERROR: Could not sanitize molecule ending on line 103
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 11 17 18 19 20 21
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 42 43 44 45 46 47 48 49 50
[11:28:38] ERROR: Could not sanitize molecule ending on line 270
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 42 43 44 45 46 47 48 49 50
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
[11:28:38] ERROR: Could not sanitize molecule ending on line 101
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[11:28:38] ERROR: Could not sanitize molecule ending on line 93
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
[11:28:38] ERROR: Could not sanitize molecule ending on lin

[11:28:38] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:38] ERROR: Could not sanitize molecule ending on line 77
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 8 9
[11:28:38] ERROR: Could not sanitize molecule ending on line 32
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 8 9
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 10 11 14 15 17
[11:28:38] ERROR: Could not sanitize molecule ending on line 74
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 10 11 14 15 17
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 49 50 51 52 53 54 55 56 57
[11:28:38] ERROR: Could not sanitize molecule ending on line 260
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 49 50 51 52 53 54 55 56 57
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 20 21 22 23
[11:28:38] ERROR: Could not sanitize molecule ending on line 93
[11:28:38] ERR

[11:28:38] Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11 12 13 14 15
[11:28:38] ERROR: Could not sanitize molecule ending on line 132
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11 12 13 14 15
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 4 12 14 15 16
[11:28:38] ERROR: Could not sanitize molecule ending on line 119
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 4 12 14 15 16
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 14 17 19
[11:28:38] ERROR: Could not sanitize molecule ending on line 78
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 14 17 19
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 4 12 14 15 16
[11:28:38] ERROR: Could not sanitize molecule ending on line 77
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 4 12 14 15 16
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 17 18 19 21 22
[11:28:38] ERROR: Could not sanitize molecule ending on line 127
[11:28:38] ERROR: Can

[11:28:38] Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18
[11:28:38] ERROR: Could not sanitize molecule ending on line 277
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 1 3 4 5 6
[11:28:38] ERROR: Could not sanitize molecule ending on line 105
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 3 4 5 6
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 35 36 37 38 39
[11:28:38] ERROR: Could not sanitize molecule ending on line 144
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 35 36 37 38 39
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 1 2 3 12 13 14 15
[11:28:38] ERROR: Could not sanitize molecule ending on line 135
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 12 13 14 15
[11:28:38] Can't kekulize mol.  Unkekulized atoms: 10 11 12 23 24
[11:28:38] ERROR: Could not sanitize molecule ending on line 114
[11:28:38] ERROR: Can't kekulize mol.  Unkekulized atoms:

[11:28:39] Can't kekulize mol.  Unkekulized atoms: 1 11 13 25 26
[11:28:39] ERROR: Could not sanitize molecule ending on line 128
[11:28:39] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 11 13 25 26
[11:28:39] Can't kekulize mol.  Unkekulized atoms: 39 40 41 42 43
[11:28:39] ERROR: Could not sanitize molecule ending on line 171
[11:28:39] ERROR: Can't kekulize mol.  Unkekulized atoms: 39 40 41 42 43
[11:28:39] Can't kekulize mol.  Unkekulized atoms: 21 22 23 24 25
[11:28:39] ERROR: Could not sanitize molecule ending on line 194
[11:28:39] ERROR: Can't kekulize mol.  Unkekulized atoms: 21 22 23 24 25
[11:28:39] Can't kekulize mol.  Unkekulized atoms: 2 3 5 9 15 16 17 18 19 21 22 23 24
[11:28:39] ERROR: Could not sanitize molecule ending on line 122
[11:28:39] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 3 5 9 15 16 17 18 19 21 22 23 24
[11:28:39] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:39] ERROR: Could not sanitize molecule ending on line 103
[11:28:39] E

[11:28:39] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
[11:28:39] ERROR: Could not sanitize molecule ending on line 99
[11:28:39] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
[11:28:39] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:39] ERROR: Could not sanitize molecule ending on line 101
[11:28:39] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[11:28:39] Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18 19 20 52 53
[11:28:39] ERROR: Could not sanitize molecule ending on line 221
[11:28:39] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18 19 20 52 53
[11:28:39] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 18
[11:28:39] ERROR: Could not sanitize molecule ending on line 73
[11:28:39] ERROR: Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 18
[11:28:39] Can't kekulize mol.  Unkekulized atoms: 0 9 11 16 19
[11:28:39] ERROR: Could not sanitize molecule ending on line 120
[11:28:39] ERROR: Can't

done processing. total processed: 4640. skipped: 676
saved processed dataset to data/processed/graphs/dataset.pt


([3477,
  742,
  221,
  2021,
  232,
  3940,
  3575,
  1899,
  3369,
  2215,
  2279,
  3179,
  2728,
  4368,
  3913,
  4037,
  2890,
  2176,
  1995,
  1620,
  4217,
  3640,
  533,
  3120,
  2051,
  1011,
  2178,
  2477,
  112,
  3191,
  3569,
  1635,
  3596,
  1766,
  3034,
  3518,
  1253,
  431,
  3118,
  1200,
  1611,
  1371,
  3916,
  3839,
  4423,
  3440,
  1079,
  1597,
  4064,
  2754,
  1553,
  4623,
  4156,
  415,
  1471,
  3244,
  545,
  1572,
  3862,
  2062,
  1162,
  1830,
  337,
  1342,
  2658,
  1187,
  1661,
  1452,
  2223,
  3451,
  2627,
  1191,
  3629,
  3992,
  4462,
  3490,
  2099,
  1312,
  296,
  2688,
  636,
  1232,
  2608,
  1041,
  1435,
  1021,
  2287,
  1416,
  1538,
  2766,
  3653,
  571,
  730,
  3765,
  4235,
  4318,
  3637,
  684,
  3442,
  126,
  716,
  1778,
  2631,
  3918,
  100,
  657,
  39,
  2984,
  4309,
  2636,
  3827,
  530,
  4633,
  42,
  3270,
  3526,
  3590,
  754,
  71,
  2924,
  1098,
  1930,
  273,
  158,
  622,
  2735,
  2523,
  412,
  3944

In [82]:
def train_one_epoch(model, loader, optim, device):
    model.train()
    total_loss = 0.0
    for batch in loader:
        batch = batch.to(device)
        optim.zero_grad()
        pred = model(batch)
        loss = torch.nn.functional.mse_loss(pred, batch.y.view(-1).to(pred.dtype))
        loss.backward()
        optim.step()
        total_loss += loss.item() * batch.num_graphs
    return total_loss / len(loader.dataset)

In [83]:
device = ("cuda" if torch.cuda.is_available() else "cpu")

train_idx, val_idx, test_idx = scaffold_split(dataset, seed=SEED)
train_ds = Subset(dataset, train_idx)
train_loader = DataLoader(train_ds, batch_size=4, shuffle=True)

sample = dataset.get(0)
in_channels = sample.x.shape[1]
model = BaselineGNN(in_channels=in_channels, hidden_channels=128, num_layers=3).to(device)
optim = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)

train_one_epoch(model, train_loader, optim, device)

4.034849620054893

In [None]:
def evaluate(model, loader, device):
    model.eval()
    ys, preds = [], []
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            out = model(batch)
            ys.append(batch.y.cpu().numpy().reshape(-1))
            preds.append(out.cpu().numpy().reshape(-1))
    y = np.concatenate(ys)
    p = np.concatenate(preds)
    rmse = mean_squared_error(y, p, squared=False)
    r2 = r2_score(y, p)
    return {"rmse": float(rmse), "r2": float(r2)}

In [85]:
test_ds = Subset(dataset, test_idx)
test_loader = DataLoader(test_ds, batch_size=4, shuffle=False)

evaluate(model, test_loader, device)

{'rmse': 1.7373987436294556, 'r2': 0.19658255134808622}

In [88]:
def train(batch_size=32, epochs=3, lr=1e-3, device=None):
    set_seed()
    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Starting ligand-only baseline training on device={device}")

    dataset = PDBBindLigandDataset(metadata_csv="data/processed/refined_dataset_metadata.csv", 
                                   root="data/processed/graphs", force_rebuild=False)
    print(f"Dataset size: {len(dataset)}")

    train_idx, val_idx, test_idx = scaffold_split(dataset, seed=SEED)

    train_ds = Subset(dataset, train_idx)
    val_ds = Subset(dataset, val_idx)
    test_ds = Subset(dataset, test_idx)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

    sample = dataset.get(0)
    in_channels = sample.x.shape[1]
    model = BaselineGNN(in_channels=in_channels, hidden_channels=128, num_layers=3).to(device)
    optim = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-5)

    best_val = float("inf")
    for epoch in range(1, epochs+1):
        train_loss = train_one_epoch(model, train_loader, optim, device)
        val_metrics = evaluate(model, val_loader, device)
        test_metrics = evaluate(model, test_loader, device)

        print(f"Epoch {epoch:03d} | train_loss {train_loss:.4f} | val_rmse {val_metrics['rmse']:.4f} | test_rmse {test_metrics['rmse']:.4f}")
    
        if val_metrics["rmse"] < best_val:
            best_val = val_metrics["rmse"]

    print("Training complete. Best val rmse: %.4f" % best_val)

In [89]:
train()

Starting ligand-only baseline training on device=cpu


Processing...
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13
[10:36:52] ERROR: Could not sanitize molecule ending on line 52
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
[10:36:52] ERROR: Could not sanitize molecule ending on line 81
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 19 23
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15
[10:36:52] ERROR: Could not sanitize molecule ending on line 75
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:52] ERROR: Could not sanitize molecule ending on line 42
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 4 10 11 12 13
[10:36:52] ERROR: Could not sanitize molecule ending on line 60
[10:36:52] ERROR: Can't kekulize mol.  Un

[10:36:52] Can't kekulize mol.  Unkekulized atoms: 0 6 7 8 9
[10:36:52] ERROR: Could not sanitize molecule ending on line 54
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 6 7 8 9
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 8 9 10 11 12
[10:36:52] ERROR: Could not sanitize molecule ending on line 46
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 8 9 10 11 12
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
[10:36:52] ERROR: Could not sanitize molecule ending on line 56
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 19 20 21 22 23
[10:36:52] ERROR: Could not sanitize molecule ending on line 150
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 19 20 21 22 23
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 19 20 21 22 23
[10:36:52] ERROR: Could not sanitize molecule ending on line 136
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 19 20 

[10:36:52] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 10 13 14 15 16
[10:36:52] ERROR: Could not sanitize molecule ending on line 64
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 10 13 14 15 16
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 21 23 24 25 26
[10:36:52] ERROR: Could not sanitize molecule ending on line 122
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 21 23 24 25 26
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:52] ERROR: Could not sanitize molecule ending on line 38
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9
[10:36:52] ERROR: Could not sanitize molecule ending on line 45
[10:36:52] ERROR: Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9
[10:36:52] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 14
[10:36:52] ERROR: Could not sanitize molecule ending on line 67
[10:36:52] ERROR: Can't kekulize mol.  Unkekulize

[10:36:53] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15
[10:36:53] ERROR: Could not sanitize molecule ending on line 66
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 0 1 9 10 11 12 13 14 15
[10:36:53] ERROR: Could not sanitize molecule ending on line 63
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 9 10 11 12 13 14 15
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:53] ERROR: Could not sanitize molecule ending on line 67
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 21 23 24 25 26
[10:36:53] ERROR: Could not sanitize molecule ending on line 117
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 21 23 24 25 26
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 29 30 31 32 33 34 35 36 37
[10:36:53] ERROR: Could not sanitize molecule ending on line 177
[10:36:53] ERROR

[10:36:53] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 6 7 8 9
[10:36:53] ERROR: Could not sanitize molecule ending on line 34
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 6 7 8 9
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 7 8 10 11 14 18 20 21 22
[10:36:53] ERROR: Could not sanitize molecule ending on line 83
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 7 8 10 11 14 18 20 21 22
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
[10:36:53] ERROR: Could not sanitize molecule ending on line 52
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 8 9 10 13 14
[10:36:53] ERROR: Could not sanitize molecule ending on line 69
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 8 9 10 13 14
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 0 1 2 4 5 6 8 9 10
[10:36:53] ERROR: Could not sanitize molecule ending on line 65
[10:36:53] ERROR: Can't ke

[10:36:53] Can't kekulize mol.  Unkekulized atoms: 32 33 34 35 36
[10:36:53] ERROR: Could not sanitize molecule ending on line 228
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 32 33 34 35 36
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 21 22 25 28 32
[10:36:53] ERROR: Could not sanitize molecule ending on line 109
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 21 22 25 28 32
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
[10:36:53] ERROR: Could not sanitize molecule ending on line 91
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:53] ERROR: Could not sanitize molecule ending on line 56
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 22 23 24 27 31
[10:36:53] ERROR: Could not sanitize molecule ending on line 101
[10:36:53] ERROR: Can't kekulize mol.  Unkekulize

[10:36:53] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 13
[10:36:53] ERROR: Could not sanitize molecule ending on line 60
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 13
[10:36:53] Explicit valence for atom # 0 O, 2, is greater than permitted
[10:36:53] ERROR: Could not sanitize molecule ending on line 199
[10:36:53] ERROR: Explicit valence for atom # 0 O, 2, is greater than permitted
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 18
[10:36:53] ERROR: Could not sanitize molecule ending on line 101
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 18
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 13
[10:36:53] ERROR: Could not sanitize molecule ending on line 54
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 10 11 12 13
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 26 27 28 31 35
[10:36:53] ERROR: Could not sanitize molecule ending on line 121
[10:36:53] ERROR: Can

[10:36:53] Can't kekulize mol.  Unkekulized atoms: 8 9 10 11 12
[10:36:53] ERROR: Could not sanitize molecule ending on line 57
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 8 9 10 11 12
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7
[10:36:53] ERROR: Could not sanitize molecule ending on line 37
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 4 5 7
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 8 9 12 15 19
[10:36:53] ERROR: Could not sanitize molecule ending on line 109
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 8 9 12 15 19
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 2 9 10 13 15 16
[10:36:53] ERROR: Could not sanitize molecule ending on line 75
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 9 10 13 15 16
[10:36:53] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
[10:36:53] ERROR: Could not sanitize molecule ending on line 91
[10:36:53] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27


[10:36:54] Can't kekulize mol.  Unkekulized atoms: 4 5 8 9 13 15 19 21 22
[10:36:54] ERROR: Could not sanitize molecule ending on line 83
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 4 5 8 9 13 15 19 21 22
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
[10:36:54] ERROR: Could not sanitize molecule ending on line 36
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 10 11
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 8 9 10 11 12 13 14 15 16
[10:36:54] ERROR: Could not sanitize molecule ending on line 71
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 8 9 10 11 12 13 14 15 16
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 16 17 18 19 23
[10:36:54] ERROR: Could not sanitize molecule ending on line 110
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 16 17 18 19 23
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 0 1 8 9 10
[10:36:54] ERROR: Could not sanitize molecule ending on

[10:36:54] Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11
[10:36:54] ERROR: Could not sanitize molecule ending on line 139
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 7 8 9 10 11
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:54] ERROR: Could not sanitize molecule ending on line 116
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
[10:36:54] ERROR: Could not sanitize molecule ending on line 72
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 3 4 5 6 7 8 9 10
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
[10:36:54] ERROR: Could not sanitize molecule ending on line 111
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 23 27
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 22 23 24 25 26
[10:36:54] ERROR: Could not sanitize molecule ending on line 92
[10:36:54] ERROR: Can't kekulize mol.  Unke

[10:36:54] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 11 12
[10:36:54] ERROR: Could not sanitize molecule ending on line 55
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 8 9 11 12
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9
[10:36:54] ERROR: Could not sanitize molecule ending on line 32
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 9 10 14 15 16
[10:36:54] ERROR: Could not sanitize molecule ending on line 92
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 9 10 14 15 16
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:54] ERROR: Could not sanitize molecule ending on line 146
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 6 7 8 13 15 16
[10:36:54] ERROR: Could not sanitize molecule ending on line 69
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized at

[10:36:54] Can't kekulize mol.  Unkekulized atoms: 0 1 2 8 11
[10:36:54] ERROR: Could not sanitize molecule ending on line 56
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 8 11
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9 10 11 12 13
[10:36:54] ERROR: Could not sanitize molecule ending on line 94
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9 10 11 12 13
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 0 1 2 8 9 10 11 12 13
[10:36:54] ERROR: Could not sanitize molecule ending on line 47
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 8 9 10 11 12 13
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 2 4 5 7 8 9 10 11 12
[10:36:54] ERROR: Could not sanitize molecule ending on line 44
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 4 5 7 8 9 10 11 12
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 3 4 5 6 7 9 11 17 18
[10:36:54] ERROR: Could not sanitize molecule ending on line 63
[10:36:54] ERROR: Ca

[10:36:54] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 7 12 14 15 16
[10:36:54] ERROR: Could not sanitize molecule ending on line 110
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 7 12 14 15 16
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 0 4 14 15 16 17 18 19 20
[10:36:54] ERROR: Could not sanitize molecule ending on line 90
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 4 14 15 16 17 18 19 20
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18
[10:36:54] ERROR: Could not sanitize molecule ending on line 200
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 23 24 25 26 27
[10:36:54] ERROR: Could not sanitize molecule ending on line 294
[10:36:54] ERROR: Can't kekulize mol.  Unkekulized atoms: 23 24 25 26 27
[10:36:54] Can't kekulize mol.  Unkekulized atoms: 22 23 24 25 26 27 28 29 30
[10:36:54] ERROR: Could not sanitize molecule ending on line 124
[10:

[10:36:55] Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8 9 10 11 12
[10:36:55] ERROR: Could not sanitize molecule ending on line 90
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 4 5 6 7 8 9 10 11 12
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 8 9 10 11 12
[10:36:55] ERROR: Could not sanitize molecule ending on line 82
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 8 9 10 11 12
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 9
[10:36:55] ERROR: Could not sanitize molecule ending on line 73
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 9
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:55] ERROR: Could not sanitize molecule ending on line 112
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:55] ERROR: Could not sanitize molecule ending on line 73
[10:36:55] ERROR: Can't kekulize mol.  Unkek

[10:36:55] Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
[10:36:55] ERROR: Could not sanitize molecule ending on line 147
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 13 14 15 18 22
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 10 11 12 13 14 15 16 17 18
[10:36:55] ERROR: Could not sanitize molecule ending on line 84
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 10 11 12 13 14 15 16 17 18
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 13 15 16 17 18 25
[10:36:55] ERROR: Could not sanitize molecule ending on line 119
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 13 15 16 17 18 25
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:55] ERROR: Could not sanitize molecule ending on line 90
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 30 31 32 35 39
[10:36:55] ERROR: Could not sanitize molecule ending on line 138
[10:36:55] ERRO

[10:36:55] Can't kekulize mol.  Unkekulized atoms: 17 18 21 24 28
[10:36:55] ERROR: Could not sanitize molecule ending on line 99
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 17 18 21 24 28
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:55] ERROR: Could not sanitize molecule ending on line 67
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9
[10:36:55] ERROR: Could not sanitize molecule ending on line 45
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 24 28 29 30 41
[10:36:55] ERROR: Could not sanitize molecule ending on line 166
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 24 28 29 30 41
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 5 6 7 8 9 18 19 20 21
[10:36:55] ERROR: Could not sanitize molecule ending on line 109
[10:36:55] ERROR: Can't kekulize mol.  Unkekulize

[10:36:55] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 20 21 22 24 25
[10:36:55] ERROR: Could not sanitize molecule ending on line 133
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 20 21 22 24 25
[10:36:55] Explicit valence for atom # 0 C, 4, is greater than permitted
[10:36:55] ERROR: Could not sanitize molecule ending on line 194
[10:36:55] ERROR: Explicit valence for atom # 0 C, 4, is greater than permitted
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:55] ERROR: Could not sanitize molecule ending on line 50
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 24 25 26 27 28 29 30 31 32
[10:36:55] ERROR: Could not sanitize molecule ending on line 141
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 24 25 26 27 28 29 30 31 32
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 0 1 6 8 11 12 15 16 18
[10:36:55] ERROR: Could not sanitize molecule endin

[10:36:55] Can't kekulize mol.  Unkekulized atoms: 9
[10:36:55] ERROR: Could not sanitize molecule ending on line 58
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 9
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 16 17 18 24
[10:36:55] ERROR: Could not sanitize molecule ending on line 102
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 16 17 18 24
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 10 11 12 13 14 15 16 17 18
[10:36:55] ERROR: Could not sanitize molecule ending on line 95
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 10 11 12 13 14 15 16 17 18
[10:36:55] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4
[10:36:55] ERROR: Could not sanitize molecule ending on line 63
[10:36:55] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 7 11 12
[10:36:56] ERROR: Could not sanitize molecule ending on line 76
[10:36:56] ERROR: Can't kekulize mol.  Unkekul

[10:36:56] Can't kekulize mol.  Unkekulized atoms: 4 12 14 15 16
[10:36:56] ERROR: Could not sanitize molecule ending on line 119
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 4 12 14 15 16
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 16 17 18 19 20 21 22 23 24
[10:36:56] ERROR: Could not sanitize molecule ending on line 209
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 16 17 18 19 20 21 22 23 24
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 19
[10:36:56] ERROR: Could not sanitize molecule ending on line 94
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 19
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 0
[10:36:56] ERROR: Could not sanitize molecule ending on line 117
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 0
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 12 13 14 15 16
[10:36:56] ERROR: Could not sanitize molecule ending on line 83
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 1

[10:36:56] Can't kekulize mol.  Unkekulized atoms: 1 2 3 12 13 14 15
[10:36:56] ERROR: Could not sanitize molecule ending on line 135
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 12 13 14 15
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 10 11 12 23 24
[10:36:56] ERROR: Could not sanitize molecule ending on line 114
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 10 11 12 23 24
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 1 8 9 19 30
[10:36:56] ERROR: Could not sanitize molecule ending on line 134
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 8 9 19 30
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 49 50 51 52 53
[10:36:56] ERROR: Could not sanitize molecule ending on line 230
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 49 50 51 52 53
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 9 10 11
[10:36:56] ERROR: Could not sanitize molecule ending on line 65
[10:36:56] ERROR: Can't kekulize mol.  Unkekulize

[10:36:56] Can't kekulize mol.  Unkekulized atoms: 20 21 22 23 24 25 26 27 28
[10:36:56] ERROR: Could not sanitize molecule ending on line 99
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 20 21 22 23 24 25 26 27 28
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 17 18 20 21 22
[10:36:56] ERROR: Could not sanitize molecule ending on line 103
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 17 18 20 21 22
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 18 19 20 21 22 23 24 25 26
[10:36:56] ERROR: Could not sanitize molecule ending on line 85
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 18 19 20 21 22 23 24 25 26
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 19
[10:36:56] ERROR: Could not sanitize molecule ending on line 96
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 19
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 7 8 9 41 42
[10:36:56] ERROR: Could not sanitize molecule ending on line

[10:36:56] Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
[10:36:56] ERROR: Could not sanitize molecule ending on line 56
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 2 3 4 5 6 7 8 9
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 2 6 7 17 18 19 22 23 24 25
[10:36:56] ERROR: Could not sanitize molecule ending on line 108
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 6 7 17 18 19 22 23 24 25
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18
[10:36:56] ERROR: Could not sanitize molecule ending on line 114
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 19
[10:36:56] ERROR: Could not sanitize molecule ending on line 114
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 15 16 17 18 19
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 11 12 13 14 15 22 23 25 26
[10:36:56] ERROR: Could not sanitize molecule ending on line 109
[10:36:56] E

[10:36:56] Can't kekulize mol.  Unkekulized atoms: 1 11 13 25 26
[10:36:56] ERROR: Could not sanitize molecule ending on line 128
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 1 11 13 25 26
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 39 40 41 42 43
[10:36:56] ERROR: Could not sanitize molecule ending on line 171
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 39 40 41 42 43
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 21 22 23 24 25
[10:36:56] ERROR: Could not sanitize molecule ending on line 194
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 21 22 23 24 25
[10:36:56] Can't kekulize mol.  Unkekulized atoms: 2 3 5 9 15 16 17 18 19 21 22 23 24
[10:36:56] ERROR: Could not sanitize molecule ending on line 122
[10:36:56] ERROR: Can't kekulize mol.  Unkekulized atoms: 2 3 5 9 15 16 17 18 19 21 22 23 24
[10:36:57] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 4 5 6 7 8
[10:36:57] ERROR: Could not sanitize molecule ending on line 103
[10:36:57] E

[10:36:57] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 11 12 13 15 16
[10:36:57] ERROR: Could not sanitize molecule ending on line 85
[10:36:57] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 11 12 13 15 16
[10:36:57] Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18 19 20 21 22
[10:36:57] ERROR: Could not sanitize molecule ending on line 91
[10:36:57] ERROR: Can't kekulize mol.  Unkekulized atoms: 14 15 16 17 18 19 20 21 22
[10:36:57] Can't kekulize mol.  Unkekulized atoms: 0 4 5 8 9 10
[10:36:57] ERROR: Could not sanitize molecule ending on line 79
[10:36:57] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 4 5 8 9 10
[10:36:57] Can't kekulize mol.  Unkekulized atoms: 33 34 35 37 38 39 40 51 52
[10:36:57] ERROR: Could not sanitize molecule ending on line 234
[10:36:57] ERROR: Can't kekulize mol.  Unkekulized atoms: 33 34 35 37 38 39 40 51 52
[10:36:57] Can't kekulize mol.  Unkekulized atoms: 3 5 6 7 8 9 10 11 12 13 14
[10:36:57] ERROR: Could not sanitize molecule end

[10:36:57] Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9
[10:36:57] ERROR: Could not sanitize molecule ending on line 68
[10:36:57] ERROR: Can't kekulize mol.  Unkekulized atoms: 3 4 7 8 9
[10:36:57] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 11 12 13 15 16
[10:36:57] ERROR: Could not sanitize molecule ending on line 85
[10:36:57] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 11 12 13 15 16
[10:36:57] Can't kekulize mol.  Unkekulized atoms: 0 1 2 4 5 6 7 13 14
[10:36:57] ERROR: Could not sanitize molecule ending on line 205
[10:36:57] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 4 5 6 7 13 14
[10:36:57] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 11 12 13 15 16
[10:36:57] ERROR: Could not sanitize molecule ending on line 85
[10:36:57] ERROR: Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 11 12 13 15 16
[10:36:57] Can't kekulize mol.  Unkekulized atoms: 0 1 2 3 11 12 13 15 16
[10:36:57] ERROR: Could not sanitize molecule ending on line 85
[10:36:57] ERROR:

done processing. total processed: 4640. skipped: 676
saved processed dataset to data/processed/graphs/dataset.pt
Dataset size: 4640


Done!


Epoch 001 | train_loss 9.3337 | val_rmse 1.7074 | test_rmse 1.7156
Epoch 002 | train_loss 2.9958 | val_rmse 2.1214 | test_rmse 2.0638
Epoch 003 | train_loss 2.9037 | val_rmse 1.7207 | test_rmse 1.6774
Training complete. Best val rmse: 1.7074
