# QM9 Pro.
Not sure what I’m supposed to call this file.  
Plan is to keep this repo much neater than before.  
Getting to work now.

In [1]:
import torch_geometric
import torch
import os
from torch.nn import Module, Embedding, Linear, MSELoss
from torch.optim import Adam
from torch_geometric.nn import global_mean_pool
import torch.optim.lr_scheduler as lr_scheduler
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv
import wandb

In [2]:
# setting up wandb
os.environ['WANDB_NOTEBOOK_NAME'] = 'main.ipynb'
wandb.login()

# reproducibility
torch.manual_seed(2002)

[34m[1mwandb[0m: Currently logged in as: [33msharshe[0m. Use [1m`wandb login --relogin`[0m to force relogin


<torch._C.Generator at 0x10965c090>

In [3]:
# load in dataset
dataset = QM9(root='QM9/')

# 80/10/10 split
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size

# build train, val, test datasets out of main dataset
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# turn into DataLoaders for batching efficiency
train_loader = DataLoader(train_dataset, batch_size=128)
val_loader = DataLoader(val_dataset, batch_size=128)
test_loader = DataLoader(test_dataset, batch_size=128)



In [4]:
config_1 = {
    "base_learning_rate": 1e-3,
    "architecture": "Sparse 2-layer MPNN",
    "optimizer": "Adam",
    "scheduler": "ReduceLROnPlateau",
    "dataset": "QM9",
    "epochs": 50,
    "batch_size": 128,
    "name": "2LP"
}

config_2 = {
    "base_learning_rate": 1e-3,
    "architecture": "Sparse 2-layer MPNN",
    "optimizer": "Adam",
    "scheduler": "CosineAnnealingWarmRestarts",
    "dataset": "QM9",
    "epochs": 50,
    "batch_size": 128,
    "name": "2LC"
}

config_3 = {
    "base_learning_rate": 1e-3,
    "architecture": "Sparse 1-layer MPNN",
    "optimizer": "Adam",
    "scheduler": "ReduceLROnPlateau",
    "dataset": "QM9",
    "epochs": 50,
    "batch_size": 128,
    "name": "1LP"
}

config_4 = {
    "base_learning_rate": 1e-3,
    "architecture": "Sparse 1-layer MPNN",
    "optimizer": "Adam",
    "scheduler": "CosineAnnealingWarmRestarts",
    "dataset": "QM9",
    "epochs": 50,
    "batch_size": 128,
    "name": "1LC"
}

configs = [config_1, config_2, config_3, config_4]

In [5]:
class DoubleLayerGCN(Module):
    def __init__(self):
        super().__init__()
        
        # hard-coded here
        # could have been a parameter but that
        # would not make anything easier
        self.emb_dim = 32
        
        # initialize layers
        self.embedding = Embedding(118, self.emb_dim)
        self.conv1 = GCNConv(self.emb_dim, self.emb_dim)
        self.conv2 = GCNConv(self.emb_dim, self.emb_dim)
        self.lin1 = torch.nn.Linear(self.emb_dim, 8)
        self.lin2 = torch.nn.Linear(8, 1)

    # define forward pass
    def forward(self, data):
        # get relevant parts from data arg
        edge_index = data.edge_index
        edge_attr = data.edge_attr
        # notes: use rbf: radial basis function to
        # expand the edges d_ij -> [w_ij1, w_ij2,
        # \cdot , w_ijd]
        
        # initialize x
        x = data.x

        # embed x and put it through embedding and
        # conv layers
        x = self.embedding(x)
        x = self.conv1(x, edge_index, edge_attr)
        x = x.relu()
        x = self.conv2(x, edge_index, edge_attr)
        x = x.relu()
        
        # put x through linear layers
        x = self.lin1(x)
        x = x.relu()
        x = self.lin2(x)
        x = x.relu()
        
        # combine representations of all nodes
        # into single graph-level prediction
        x = global_mean_pool(x, data.batch)
        
        # return x
        return x

In [6]:
class SingleLayerGCN(Module):
    def __init__(self):
        super().__init__()
        
        # lower embedding dimension for a smaller model
        self.emb_dim = 8
        
        # initialize layers
        self.embedding = Embedding(118, self.emb_dim)
        self.conv = GCNConv(self.emb_dim, self.emb_dim)
        self.lin = torch.nn.Linear(self.emb_dim, 1)

    # define forward pass
    def forward(self, data):
        # get relevant parts from data arg
        edge_index = data.edge_index
        edge_attr = data.edge_attr
        
        # initialize x
        x = data.x

        # put x through each layer
        x = self.embedding(x)
        x = self.conv(x, edge_index, edge_attr)
        x = self.lin(x)
        x = x.relu()
        
        # combine into single output
        x = global_mean_pool(x, data.batch)
        
        return x

In [7]:
model1 = DoubleLayerGCN()
model2 = DoubleLayerGCN()
model3 = SingleLayerGCN()
model4 = SingleLayerGCN()

models = [model1, model2, model3, model4]

In [9]:
for config, model in zip(configs[0:1], models[0:1]):
    # wandb project init
    wandb.init(
        project = "QM9-Pro-2",
        config = config
    )

    # hyperparameter init
    num_epochs = config['epochs']
    base_learning_rate = config['base_learning_rate']
    loss_fn = MSELoss()
    optimizer = Adam(model.parameters(), base_learning_rate)
    
    # define the scheduler dependig on config
    if config['scheduler'] == 'ReduceLROnPlateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.1, patience=1, threshold=0)
        # bool to control step() call below
        plateau = True
    else:
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=1, T_mult=2, eta_min=1e-5, last_epoch=-1)
        # bool to control step() call below
        plateau = False

    # stats
    val_mean_losses = []
    test_losses = []
    learning_rates = [base_learning_rate]

    for epoch in range(num_epochs):
        # TRAINING
        # track gradients
        model.train()
        
        i = 0
        # loop through loader
        for data in train_loader:
            # clear gradients
            optimizer.zero_grad()
            
            # get actual and predicted values
            y = data.y
            y_hat = model(data).view(-1)
            
            # calculate loss
            loss = loss_fn(y_hat, y)
            
            # save loss every 100 goes
            if i%100 == 0:
                train_loss_item = loss.item()
                wandb.log({"train_losses": train_loss_item})
                
                # save learning rate
                lr = optimizer.param_groups[0]['lr']
                wandb.log({"training_rates": lr})
            i+=1
            
            # calculate gradients
            loss.backward()
            
            # backprop
            optimizer.step()
        
        # VAL
        epoch_losses = []
        
        # do not keep track of gradients
        model.eval()
        
        # loop through val loader
        for data in val_loader:
            # get actual and predicted values
            y = data.y
            y_hat = model(data).view(-1)
            
            # calculate and save loss
            loss = loss_fn(y_hat, y)
            
            # save loss
            epoch_loss_item = loss.item()
            epoch_losses.append(epoch_loss_item)
        
        epoch_mean_loss = torch.mean(torch.tensor(epoch_losses)).item()
        val_mean_losses.append(epoch_mean_loss)
        wandb.log({"epoch_mean_loss": epoch_mean_loss})
        
        # print out the results of the epoch
        print(f'EPOCH {epoch+1} OF {num_epochs} | VAL MEAN LOSS: {epoch_mean_loss}')
        
        # if this is our best val performance yet, save the weights
        if min(val_mean_losses) == epoch_mean_loss:
            torch.save(model, 'models/'+config['name']+'.pth')
            
        # if we are using a scheduler that needs the epoch loss passed in to know 
        # whether or not to change the LR, pass it in
        if plateau:
            scheduler.step(epoch_mean_loss)
        else:
            scheduler.step()
    
    # TEST
    for data in test_loader:
        # get actual and predicted values
        y = data.y
        y_hat = model(data).view(-1)
        
        # calculate and save loss
        loss = loss_fn(y_hat, y)
        
        # save loss
        test_losses.append(loss.item())
    
    # save and print mean test loss
    test_mean_loss = torch.mean(torch.tensor(test_losses)).item()
    wandb.log({"test_mean_loss": test_mean_loss})
    print(f'TEST MEAN LOSS: {test_mean_loss}')

    wandb.finish()

EPOCH 1 OF 50 | VAL MEAN LOSS: 0.006441839504987001
EPOCH 2 OF 50 | VAL MEAN LOSS: 0.006441826466470957
EPOCH 3 OF 50 | VAL MEAN LOSS: 0.006441825069487095
EPOCH 4 OF 50 | VAL MEAN LOSS: 0.006441824603825808
EPOCH 5 OF 50 | VAL MEAN LOSS: 0.00644182413816452
EPOCH 6 OF 50 | VAL MEAN LOSS: 0.00644182413816452
EPOCH 7 OF 50 | VAL MEAN LOSS: 0.00644182413816452
EPOCH 8 OF 50 | VAL MEAN LOSS: 0.00643862783908844
EPOCH 9 OF 50 | VAL MEAN LOSS: 0.006438631098717451
EPOCH 10 OF 50 | VAL MEAN LOSS: 0.006438631564378738


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x105f19150>>
Traceback (most recent call last):
  File "/usr/local/Caskroom/miniconda/base/envs/GDL/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 789, in _clean_thread_parent_frames
    for identity in list(thread_to_parent_header.keys()):
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt: 


EPOCH 11 OF 50 | VAL MEAN LOSS: 0.006438670679926872
EPOCH 12 OF 50 | VAL MEAN LOSS: 0.006438683718442917


KeyboardInterrupt: 

wandb: Network error (ConnectionError), entering retry loop.


In [14]:
wandb.finish()

0,1
epoch_mean_loss,███████▁▁▁▁▁
train_losses,▄▄█▁▄█▁▄▆█▇▆█▇▆█▄▄▇▄▄▇▄▄█▁▄█▁▄█▁▇▆█▇▆█▇▆
training_rates,██████████████████████▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch_mean_loss,0.00644
train_losses,0.00661
training_rates,0.0
