In [1]:
from torch_geometric.datasets import QM9
import torch_geometric.transforms as T
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

transform = T.Compose([
    T.NormalizeFeatures(attrs=["y"]),
])

dataset = QM9(root="./data", transform=transform)

#print(f"HOMO mean = {dataset.mean(2)}")
#print(f"LUMO mean = {dataset.mean(3)}")

train_set_size = int(len(dataset) * 0.9)

dataset_train = dataset[:train_set_size]
dataset_val = dataset[train_set_size:]

qm9_sample = dataset[1]
qm9_sample.y

cuda


tensor([[6.4551e-02, 6.4879e-02, 6.4189e-02, 6.4577e-02, 6.4870e-02, 6.5579e-02,
         6.4522e-02, 2.1752e-05, 2.5016e-05, 2.6090e-05, 0.0000e+00, 6.4747e-02,
         6.3979e-02, 6.3976e-02, 6.3973e-02, 6.4011e-02, 7.6787e-02, 7.6784e-02,
         7.2503e-02]])

In [4]:
qm9_sample

Data(x=[4, 11], edge_index=[2, 6], edge_attr=[6, 4], y=[1, 19], pos=[4, 3], z=[4], smiles='[H]N([H])[H]', name='gdb_2', idx=[1])

In [5]:
qm9_sample.edge_index

tensor([[0, 0, 0, 1, 2, 3],
        [1, 2, 3, 0, 0, 0]])

In [7]:
qm9_sample.z

tensor([7, 1, 1, 1])

In [6]:
qm9_sample.edge_attr

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.]])

In [2]:
qm9_sample.x

tensor([[0., 0., 1., 0., 0., 7., 0., 0., 0., 0., 3.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

In [3]:
qm9_sample.smiles

'[H]N([H])[H]'

In [3]:
print(f"QM9 Dataset Size = {len(dataset_train)}")
print(f"QM9 Number of Target Attributes = {dataset_train.num_classes}")
print(f"QM9 Number of Node features = {dataset_train.num_node_features}")

QM9 Dataset Size = 117747


KeyboardInterrupt: 

In [4]:
# Node attributes (AtomID, ???) TODO
qm9_sample.x

tensor([[0., 0., 1., 0., 0., 7., 0., 0., 0., 0., 3.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(dataset_train.num_node_features, 16)
        self.conv2 = GCNConv(16, 32)
        self.fc1 = nn.Linear(32, dataset_train.num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = global_mean_pool(x, batch)
        x = self.fc1(x)
        return x

In [3]:
from torch_geometric.loader import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

model = GCN().to(device)

#total_params = sum(p.numel() for p in model.parameters())
#print(f"Total number of parameters is: {total_params}")

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
train_loader = DataLoader(dataset_train, batch_size=128, shuffle=True)
epochs = 100

writer = SummaryWriter()

model.train()
for epoch in tqdm(range(epochs)):
    epoch_train_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        sample = batch.to(device)
        model_output = model(sample)
        loss = F.mse_loss(model_output, sample.y)
        epoch_train_loss += loss.item()
        loss.backward()
        optimizer.step()

    epoch_train_loss /= len(train_loader.dataset)
    writer.add_scalar('Loss/train', epoch_train_loss, epoch)

  2%|▏         | 2/100 [01:28<1:12:01, 44.09s/it]


KeyboardInterrupt: 