In [1]:
import torch
from torch_geometric.datasets import MoleculeNet
from torch_geometric.loader import DataLoader
from torch.utils.data import Subset
from torch_geometric.data import Dataset

from torch import nn
from graphormer.model import Graphormer
from graphormer.functional import precalculate_custom_attributes, precalculate_paths

import time

In [3]:
!pip install rdkit

Collecting rdkit
  Downloading rdkit-2025.9.1-cp310-cp310-manylinux_2_28_x86_64.whl (36.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.2/36.2 MB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: rdkit
Successfully installed rdkit-2025.9.1


In [2]:
dataset = MoleculeNet(root="./", name="ESOL")
dataset

  if osp.exists(f) and torch.load(f) != _repr(self.pre_transform):
  if osp.exists(f) and torch.load(f) != _repr(self.pre_filter):
  return torch.load(f, map_location)


ESOL(1128)

In [3]:
# HYPER-PARAMETERS
NUM_LAYERS = 3
NODE_DIM = 128
FF_DIM = 256
N_HEADS = 4
MAX_IN_DEGREE = 5
MAX_OUT_DEGREE = 5
MAX_PATH_DISTANCE = 5

# Create model
model = Graphormer(
    num_layers=NUM_LAYERS,
    input_node_dim=dataset.num_node_features,
    node_dim=NODE_DIM,
    input_edge_dim=dataset.num_edge_features,
    edge_dim=NODE_DIM,
    output_dim=dataset[0].y.shape[1],
    n_heads=N_HEADS,
    ff_dim=FF_DIM,
    max_in_degree=MAX_IN_DEGREE,
    max_out_degree=MAX_OUT_DEGREE,
    max_path_distance=MAX_PATH_DISTANCE,
)

In [4]:
# precalculate attributes for each graph
modified_data_list = []
for data in dataset:
    modified_data = precalculate_custom_attributes(data, max_in_degree=MAX_IN_DEGREE, max_out_degree=MAX_OUT_DEGREE)
    modified_data_list.append(modified_data)

class ModifiedDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list        
    def __len__(self):
        return len(self.data_list)    
    def __getitem__(self, idx):
        return self.data_list[idx]

modified_dataset = ModifiedDataset(modified_data_list)

# Dataset splitting
from sklearn.model_selection import train_test_split
test_ids, train_ids = train_test_split([i for i in range(len(modified_dataset))], test_size=0.8, random_state=42)
train_loader = DataLoader(Subset(modified_dataset, train_ids), batch_size=8)
test_loader = DataLoader(Subset(modified_dataset, test_ids), batch_size=8)

# precalculate node_paths_length, edge_paths_tensor and edge_paths_length for each batch
train_node_edge_paths = []
for batch in train_loader:
    _, _, node_paths_length, edge_paths_tensor, edge_paths_length = precalculate_paths(batch, max_path_distance=MAX_PATH_DISTANCE)
    train_node_edge_paths.append((node_paths_length, edge_paths_tensor, edge_paths_length))
test_node_edge_paths = []
for batch in test_loader:
    _, _, node_paths_length, edge_paths_tensor, edge_paths_length = precalculate_paths(batch, max_path_distance=MAX_PATH_DISTANCE)
    test_node_edge_paths.append((node_paths_length, edge_paths_tensor, edge_paths_length))

In [5]:
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)
loss_functin = nn.L1Loss(reduction="sum")

In [8]:
from tqdm import tqdm
from torch_geometric.nn.pool import global_mean_pool

DEVICE = "cuda"

model.to(DEVICE)
for epoch in range(10):
    model.train()
    batch_loss = 0.0
    epoch_start = time.time()

    for i, batch in tqdm(enumerate(train_loader)):
        node_paths_length, edge_paths_tensor, edge_paths_length = train_node_edge_paths[i]
        batch.node_paths_length = node_paths_length
        batch.edge_paths_tensor = edge_paths_tensor
        batch.edge_paths_length = edge_paths_length

        batch.to(DEVICE)
        y = batch.y
        optimizer.zero_grad()
        output = global_mean_pool(model(batch), batch.batch)
        loss = loss_functin(output, y)
        batch_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

    epoch_time = time.time() - epoch_start
    print(f"Epoch {epoch+1} - TRAIN_LOSS: {batch_loss / len(train_ids):.6f}, Time: {epoch_time:.2f}s")

    model.eval()
    batch_loss = 0.0
    for i, batch in tqdm(enumerate(test_loader)):
        node_paths_length, edge_paths_tensor, edge_paths_length = test_node_edge_paths[i]
        batch.node_paths_length = node_paths_length
        batch.edge_paths_tensor = edge_paths_tensor
        batch.edge_paths_length = edge_paths_length

        batch.to(DEVICE)
        y = batch.y
        with torch.no_grad():
            output = global_mean_pool(model(batch), batch.batch)
            loss = loss_functin(output, y)
            
        batch_loss += loss.item()

    print("EVAL LOSS", batch_loss / len(test_ids))

    

113it [00:03, 34.57it/s]


Epoch 1 - TRAIN_LOSS: 1.229842, Time: 3.27s


29it [00:00, 68.98it/s]


EVAL LOSS 1.365941055085924


113it [00:03, 34.85it/s]


Epoch 2 - TRAIN_LOSS: 1.179976, Time: 3.24s


29it [00:00, 68.34it/s]


EVAL LOSS 1.1474688413408067


113it [00:03, 34.87it/s]


Epoch 3 - TRAIN_LOSS: 1.139024, Time: 3.24s


29it [00:00, 68.12it/s]


EVAL LOSS 1.1391176467471653


113it [00:03, 34.86it/s]


Epoch 4 - TRAIN_LOSS: 1.103325, Time: 3.24s


29it [00:00, 68.34it/s]


EVAL LOSS 1.3508825672997369


113it [00:03, 34.72it/s]


Epoch 5 - TRAIN_LOSS: 1.001362, Time: 3.26s


29it [00:00, 69.06it/s]


EVAL LOSS 1.0079428895314535


113it [00:03, 35.00it/s]


Epoch 6 - TRAIN_LOSS: 1.065582, Time: 3.23s


29it [00:00, 68.18it/s]


EVAL LOSS 0.9859108384450277


113it [00:03, 34.86it/s]


Epoch 7 - TRAIN_LOSS: 0.887400, Time: 3.24s


29it [00:00, 68.57it/s]


EVAL LOSS 0.9475120830535889


113it [00:03, 34.68it/s]


Epoch 8 - TRAIN_LOSS: 0.854567, Time: 3.26s


29it [00:00, 68.43it/s]


EVAL LOSS 0.9496096685197618


113it [00:03, 34.94it/s]


Epoch 9 - TRAIN_LOSS: 0.789633, Time: 3.24s


29it [00:00, 68.09it/s]


EVAL LOSS 0.9175022252400716


113it [00:03, 34.68it/s]


Epoch 10 - TRAIN_LOSS: 0.742772, Time: 3.26s


29it [00:00, 69.69it/s]

EVAL LOSS 0.8340490076276991



