In [1]:
import mygenai
from mygenai.models.graphvae import GraphVAE
from mygenai.utils.transforms import CompleteGraph, SetTarget, PadToFixedSize, ExtractFeatures
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from sklearn.model_selection import train_test_split

import torch
import torch_geometric
import torch_geometric.transforms
import numpy as np

print("PyTorch version {}".format(torch.__version__))
print("PyG version {}".format(torch_geometric.__version__))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device: {}".format(device))


PyTorch version 2.5.0+cu124
PyG version 2.6.1
Using device: cuda


In [2]:
# Transforms which are applied during data loading:
# (1) Fully connect the graphs, (2) Select the target/label

transform = torch_geometric.transforms.Compose([
        ExtractFeatures(),
        PadToFixedSize(),
        CompleteGraph(),
        SetTarget()
    ])
target = 4

# Load the QM9 dataset with the transforms defined
dataset = QM9("../data/QM9", transform=transform)

# Normalize targets per data sample to mean = 0 and std = 1.
mean = dataset.data.y.mean(dim=0, keepdim=True)
std = dataset.data.y.std(dim=0, keepdim=True)
dataset.data.y = (dataset.data.y - mean) / std
mean, std = mean[:, target].item(), std[:, target].item()
# dataset = dataset[1000]
print(mean)

6.858491897583008




In [3]:
print(f"Total number of samples: {len(dataset)}.")

# Split datasets (in case of using the full dataset)
# test_dataset = dataset[:10000]
# val_dataset = dataset[10000:20000]
# train_dataset = dataset[20000:]

# Split datasets (our 3K subset)
train_dataset = dataset[:1000]
val_dataset = dataset[1000:2000]
test_dataset = dataset[2000:3000]
print(f"Created dataset splits with {len(train_dataset)} training, {len(val_dataset)} validation, {len(test_dataset)} test samples.")

# Create dataloaders with batch size = 32
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Total number of samples: 130831.
Created dataset splits with 1000 training, 1000 validation, 1000 test samples.


In [4]:
model = GraphVAE().to(device)

In [5]:
# test forward passs
batch = next(iter(train_loader))
batch = batch.to(device)
with torch.no_grad():
    outputs = model(batch)
print("Forward pass successful!")

Forward pass successful!


In [6]:
import mygenai.training.training as training
training.train_model(model, train_loader, val_loader, device)

Epoch 000 | Train Loss: 0.4629 | Val Loss: 0.1651
Epoch 001 | Train Loss: 0.1668 | Val Loss: 0.1538
Epoch 002 | Train Loss: 0.1517 | Val Loss: 0.1488
Epoch 003 | Train Loss: 0.1444 | Val Loss: 0.1384
Epoch 004 | Train Loss: 0.1260 | Val Loss: 0.1068
Epoch 005 | Train Loss: 0.0719 | Val Loss: 0.0306
Epoch 006 | Train Loss: 0.0114 | Val Loss: 0.0025
Epoch 007 | Train Loss: 0.0011 | Val Loss: 0.0005
Epoch 008 | Train Loss: 0.0004 | Val Loss: 0.0003
Epoch 009 | Train Loss: 0.0002 | Val Loss: 0.0002
Epoch 010 | Train Loss: 0.0001 | Val Loss: 0.0001
Epoch 011 | Train Loss: 0.0001 | Val Loss: 0.0001
Epoch 012 | Train Loss: 0.0001 | Val Loss: 0.0001
Epoch 013 | Train Loss: 0.0001 | Val Loss: 0.0001
Epoch 014 | Train Loss: 0.0001 | Val Loss: 0.0001
Epoch 015 | Train Loss: 0.0000 | Val Loss: 0.0000
Epoch 016 | Train Loss: 0.0000 | Val Loss: 0.0000
Epoch 017 | Train Loss: 0.0000 | Val Loss: 0.0000
Epoch 018 | Train Loss: 0.0000 | Val Loss: 0.0000
Epoch 019 | Train Loss: 0.0000 | Val Loss: 0.0000


In [7]:
water = dataset[2]

In [8]:
water.edge_attr

tensor([[1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        ...,
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.]])

In [9]:
water.x

tensor([[0., 0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.]])

In [10]:
# feed water to the model
water = water.to(device)
with torch.no_grad():
    outputs = model(water)
print("Water molecule edge attributes: ", water.edge_attr)
print("Water molecule node attributes: ", water.x)

Water molecule edge attributes:  tensor([[1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        ...,
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1.]], device='cuda:0')
Water molecule node attributes:  tensor([[0., 0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1.],
        [

In [11]:
edge_logits, mu, logvar, property_pred = outputs

In [None]:
edge_probs, edge_preds = model.decoder.predict_edges(mu)
print("Edge logits: ", edge_logits)
print("Edge probabilities: ", edge_probs)

ValueError: not enough values to unpack (expected 4, got 1)