## Train and Equivariant Diffusion Model on Mini-QM9

### Loading dependencies and datasets

In [9]:
import sys
sys.path.append('../../..')
from probai.src.data.mini_qm9 import MiniQM9Dataset
from torch_geometric.loader import DataLoader 
from probai.src.models.gt.ddpm import DDPM
from probai.src.models.egnn import EGNNScore
from probai.src.training.training_loop import Trainer
import matplotlib.pyplot as plt
import torch
import yaml


In [10]:

# Load train and val dataloaders
train_dataset = MiniQM9Dataset(file_path=f"../../raw_data/mini_qm9_train.pickle")
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True) 

valid_dataset = MiniQM9Dataset(file_path=f"../../raw_data/mini_qm9_valid.pickle")
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)

### Initialize EGNN and DDPM models 

In [11]:
# Initialize EGNN
with open("../../configs/default_config.yml", 'r') as file:  
    config = yaml.safe_load(file)  
  
egnn_config = config['EGNN']  
hidden_nf = egnn_config['hidden_nf']  
n_layers = egnn_config['n_layers']
score = EGNNScore(in_node_nf=5 + 1, # 5 for the one hot encoding, 1 for diffusion time
        hidden_nf=hidden_nf,
        n_layers=n_layers,
        out_node_nf=5) # 5 atom types in QM9

In [12]:
# Initialize DDPM
ddpm_config = config['DDPM']
N = config['DDPM']['N'] # Numbero of noise level, default set to 100
ddpm = DDPM(noise_schedule_type="linear", model=score, N=N)
trainer = Trainer(ddpm, lr=0.0005, checkpoints_path="../../checkpoints/egnn_checkpoint.pth")


### Train the Diffusion EGNN Model
<small> If the model is training well, the Validation Loss should be below 1.0 when using <code>noise_schedule_type="linear"</code></small>

In [13]:
trainer.train(train_loader, valid_loader, epochs=50, device=torch.device('cuda:0'))
plt.plot(trainer.val_losses)

Epochs Progress:  30%|██████▎              | 15/50 [02:07<04:52,  8.35s/it, Epoch=14, Val_loss=1.08]