In [6]:
%load_ext autoreload
%autoreload 2

from torch_geometric.datasets import QM9
import torch_geometric.transforms as T
import torch
from torch.utils.data import random_split
from data_utils import SelectQM9TargetProperties, create_qm9_data_split, SelectQM9NodeFeatures


transform = T.Compose([
    SelectQM9TargetProperties(properties=["homo", "lumo", "r2"]),
    SelectQM9NodeFeatures(features=["atom_type"])
])

dataset = QM9(root="./data", transform=transform)

train_dataset, val_dataset, test_dataset = create_qm9_data_split(dataset=dataset)

print(dataset[0])
print(dataset.num_features)
#print(dataset[0].y[:, [0, 1, 2]])

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Data(x=[5, 5], edge_index=[2, 8], edge_attr=[8, 4], y=[1, 3], pos=[5, 3], z=[5], smiles='[H]C([H])([H])[H]', name='gdb_1', idx=[1])
5


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

class GCN(torch.nn.Module):
    def __init__(self, num_node_features: int, num_targets: int):
        super().__init__()
        self.conv1 = GCNConv(num_node_features, 16)
        self.conv2 = GCNConv(16, 32)
        self.fc1 = nn.Linear(32, num_targets)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = global_mean_pool(x, batch)
        x = self.fc1(x)
        return x

In [5]:
from torch_geometric.loader import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

model = GCN(num_node_features=dataset.num_node_features, num_targets=dataset.num_classes).to(device)

#total_params = sum(p.numel() for p in model.parameters())
#print(f"Total number of parameters is: {total_params}")

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
epochs = 10

writer = SummaryWriter()

print("Training...")
model.train()
for epoch in tqdm(range(epochs)):
    epoch_train_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        sample = batch.to(device)
        model_output = model(sample)
        loss = F.mse_loss(model_output, sample.y)
        epoch_train_loss += loss.item()
        loss.backward()
        optimizer.step()

    epoch_train_loss /= len(train_loader.dataset)
    writer.add_scalar('Loss/train', epoch_train_loss, epoch)

Training...


 10%|█         | 1/10 [00:25<03:51, 25.72s/it]