In [4]:
import torch
import torch.nn as nn
from torch_geometric.loader import DataLoader
from networks import FractalNet, FractalNetShared, Net, GNN_no_rel, GNN, TransformerNet
from subgraph import Graph_to_Subgraph
from train import train_model, get_qm9

In [5]:
# GLOBAL VARIABLES FOR THE EXPERIMENT
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs = 55
batch_size = 32
node_features = 5
# data related (x and y)
Z_ONE_HOT_DIM = 5
LABEL_INDEX = 7
EDGE_ATTR_DIM = 4
edge_features = 0
hidden_features = 64
out_features = 1

# TRAINING SHARED PARAMETERS FRACTAL NET

In [3]:
model_name = 'FractalNetShared'
model = FractalNetShared(node_features,
                           edge_features,
                           hidden_features,
                           out_features,
                           depth=1,
                           pool='add').to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)
train, valid, test = get_qm9("data/qm9",
                             device=device,
                             LABEL_INDEX = LABEL_INDEX,
                             transform=Graph_to_Subgraph())
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid, batch_size=32, shuffle=False)
test_loader = DataLoader(test, batch_size=32, shuffle=False)

  warn("Using non-standard permutation since permute.pt does not exist.")


In [10]:
fractalnetshared_results = train_model(model, model_name, epochs, train_loader, valid_loader, test_loader, optimizer, criterion, scheduler, device, LABEL_INDEX, Z_ONE_HOT_DIM)

Total number of parameters: 25281


  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/3125 [00:00<?, ?it/s]

ValueError: Model name not recognized

# TRAINING FRACTAL NET

In [13]:
model_name = 'FractalNet'
model = FractalNet(node_features,
                   edge_features,
                   hidden_features,
                   out_features,
                   depth=3,
                   pool='add',
                   add_residual_skip=True).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)
train, valid, test = get_qm9("data/qm9",
                             device=device,
                             LABEL_INDEX=LABEL_INDEX,
                             transform=Graph_to_Subgraph())
train = train[:int(0.1 * len(train))]
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid, batch_size=32, shuffle=False)
test_loader = DataLoader(test, batch_size=32, shuffle=False)

In [14]:
fractalnet_results = train_model(model, model_name, epochs, train_loader, valid_loader, test_loader, optimizer, criterion, scheduler, device, LABEL_INDEX, Z_ONE_HOT_DIM)

Total number of parameters: 298433


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 0, Loss: 0.8165286079453774, Valid Loss: 0.3341747196718527


  0%|          | 0/313 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 1, Loss: 0.24079825270313995, Valid Loss: 0.184387219552034


  0%|          | 0/313 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 2, Loss: 0.08893119533822988, Valid Loss: 0.06514097188417904


  0%|          | 0/313 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 3, Loss: 0.022421466380234262, Valid Loss: 0.021305969437977045


  0%|          | 0/313 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Training a Transformer Subgraph GNN

In [6]:
#device = 'cpu'
model_name = 'TransformerNet'
mode = 'transformer_10'
transformer_size = int(mode.split('_')[1])
model = TransformerNet(node_features+transformer_size, # this is needed as now we have more onehot encodings due to unique transformer nodes
                   edge_features,
                   hidden_features,
                   out_features,
                   depth=3,
                   pool='add',
                   add_residual_skip=True,
                   num_heads=4).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)

train, valid, test = get_qm9("data/qm9",
                             device=device,
                             LABEL_INDEX=LABEL_INDEX,
                             transform=Graph_to_Subgraph(mode=mode))
# take only 10% of the training data
train = train[:int(0.1 * len(train))]
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)

valid_loader = DataLoader(valid, batch_size=32, shuffle=False)
test_loader = DataLoader(test, batch_size=32, shuffle=False)

  warn("Using non-standard permutation since permute.pt does not exist.")


In [7]:
fractalnet_transformer_results = train_model(model, model_name, epochs, train_loader, valid_loader, test_loader, optimizer, criterion, scheduler, device, LABEL_INDEX, Z_ONE_HOT_DIM)

Total number of parameters: 406593


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (6237x256 and 64x256)

# TRAINING SAME NET AS FRACTAL BUT WITHOUT SUBNODES

In [5]:
model_name = 'Net'
model = Net(node_features,
            edge_features,
            hidden_features,
            out_features,
            depth=3,
            pool='add').to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)
train, valid, test = get_qm9("data/qm9",
                             device=device,
                             LABEL_INDEX=LABEL_INDEX,
                             transform=None)
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid, batch_size=32, shuffle=False)
test_loader = DataLoader(test, batch_size=32, shuffle=False)

  warn("Using non-standard permutation since permute.pt does not exist.")


In [6]:
no_subnode_results = train_model(model, model_name, epochs, train_loader, valid_loader, test_loader, optimizer, criterion, scheduler, device, LABEL_INDEX, Z_ONE_HOT_DIM)

Total number of parameters: 74945


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/3125 [00:00<?, ?it/s]

ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_index cpu
pos cpu
idx cpu
batch cpu
x cpu
z cpu
y cpu
ptr cpu
edge_attr cpu
edge_i

KeyboardInterrupt: 

# TRAINING A NORMAL GNN WITH NO RELATIONAL INFO NET

In [17]:
model_name = 'GNN_no_rel'
model = GNN_no_rel(5, edge_features, hidden_features, out_features, num_convolution_blocks=3, pooling='add').to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)
train, valid, test = get_qm9("data/qm9",
                             device=device,
                             LABEL_INDEX=LABEL_INDEX,
                             transform=None)
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid, batch_size=32, shuffle=False)
test_loader = DataLoader(test, batch_size=32, shuffle=False)

In [18]:
gnn_no_rel_results = train_model(model, model_name, epochs, train_loader, valid_loader, test_loader, optimizer, criterion, scheduler, device, LABEL_INDEX, Z_ONE_HOT_DIM)

Total number of parameters: 277057


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 0, Loss: 0.34073594618916514, Valid Loss: 0.26796615189804246


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 1, Loss: 0.2002595726749301, Valid Loss: 0.11254382863069495


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 2, Loss: 0.052325787158980966, Valid Loss: 0.01320092515430797


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 3, Loss: 0.004784174772752449, Valid Loss: 0.0029446304483434407


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 4, Loss: 0.0017802835443534422, Valid Loss: 0.0011488691687557143


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 5, Loss: 0.0013485616207064596, Valid Loss: 0.0008311322576402814


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 6, Loss: 0.0012334666186670075, Valid Loss: 0.0003099502361021205


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 7, Loss: 0.0010672309357262566, Valid Loss: 0.0017416033840451104


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 8, Loss: 0.0009332941744074923, Valid Loss: 0.0005364063240841742


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 9, Loss: 0.000706238659650553, Valid Loss: 0.0005293432732171099


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch 00011: reducing learning rate of group 0 to 3.5000e-04.
Epoch: 10, Loss: 0.0005822339579527033, Valid Loss: 0.0005092425674254733


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 11, Loss: 0.0003077914858500299, Valid Loss: 0.0008925487304637476


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 12, Loss: 0.00034889882537550876, Valid Loss: 0.0005732798170199636


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 13, Loss: 0.00028307309053852807, Valid Loss: 0.0005240922351795495


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch 00015: reducing learning rate of group 0 to 2.4500e-04.
Epoch: 14, Loss: 0.00031421407085596004, Valid Loss: 0.0004182716977618711


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 15, Loss: 0.0001408623219979927, Valid Loss: 0.0004023277626471715


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 16, Loss: 0.00015678595537177897, Valid Loss: 0.0004508169166948877


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 17, Loss: 0.00015129269527606085, Valid Loss: 0.00037310238281751114


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch 00019: reducing learning rate of group 0 to 1.7150e-04.
Epoch: 18, Loss: 0.00014093932886949915, Valid Loss: 0.0005385101686695594


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 19, Loss: 7.378145212042e-05, Valid Loss: 0.0006095445816242954


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 20, Loss: 5.1224616796316695e-05, Valid Loss: 0.00043007814446944907


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 21, Loss: 8.91770587641804e-05, Valid Loss: 0.0004898761498408273


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch 00023: reducing learning rate of group 0 to 1.2005e-04.
Epoch: 22, Loss: 6.0796163273807904e-05, Valid Loss: 0.0006306070788185479


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 23, Loss: 2.972445246125062e-05, Valid Loss: 0.0004025591450440065


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 24, Loss: 3.784198907251266e-05, Valid Loss: 0.00041785356562161377


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 25, Loss: 3.283295836803518e-05, Valid Loss: 0.0004213635944069074


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch 00027: reducing learning rate of group 0 to 8.4035e-05.
Epoch: 26, Loss: 4.4157627581244016e-05, Valid Loss: 0.000441299996451655


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 27, Loss: 1.4356185552260285e-05, Valid Loss: 0.00042767308757948847


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 28, Loss: 2.1143508784134612e-05, Valid Loss: 0.0004223199410105259


  0%|          | 0/3125 [00:00<?, ?it/s]

  0%|          | 0/313 [00:00<?, ?it/s]

Epoch: 29, Loss: 1.61127240807582e-05, Valid Loss: 0.00044732314751880724


  0%|          | 0/3125 [00:00<?, ?it/s]

KeyboardInterrupt: 

# TRAINING A GNN WITH EDGE FEATURES

In [12]:
# create a fractal net and train it
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'GNN'
model = GNN(n_node_features = Z_ONE_HOT_DIM,
            n_edge_features=EDGE_ATTR_DIM,
            n_hidden=64,
            n_output=out_features,
            num_convolution_blocks=2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=3, verbose=True)
train, valid, test = get_qm9("data/qm9",
                             device=device,
                             LABEL_INDEX=LABEL_INDEX,
                             transform=None)
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid, batch_size=32, shuffle=False)

  warn("Using non-standard permutation since permute.pt does not exist.")


In [13]:
gnn_results = train_model(model, model_name, epochs, train_loader, valid_loader, test_loader, optimizer, criterion, scheduler, device, LABEL_INDEX, Z_ONE_HOT_DIM)

Total number of parameters: 227329


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/3125 [00:00<?, ?it/s]

IndexError: too many indices for tensor of dimension 2

# PLOTTING LOSS

In [None]:
# plot loss
# IGNORE FOR NOW #
import matplotlib.pyplot as plt
# plot train loss on same plot for different runs
plt.plot(fractalnetshared_results['train_loss'], label='FractalNetShared')
plt.plot(fractalnet_results['train_loss'], label='FractalNet')
plt.plot(gnn_no_rel_results['train_loss'], label='GNN_no_rel')
plt.plot(gnn_results['train_loss'], label='GNN')
plt.plot(no_subnode_results['train_loss'], label='No Subnodes')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# plot validation loss on same plot for different runs
plt.plot(fractalnetshared_results['valid_loss'], label='FractalNetShared')
plt.plot(fractalnet_results['valid_loss'], label='FractalNet')
plt.plot(gnn_no_rel_results['valid_loss'], label='GNN_no_rel')
plt.plot(gnn_results['valid_loss'], label='GNN')
plt.plot(no_subnode_results['valid_loss'], label='No Subnodes')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [21]:
# print final test losses of all models
#print('FractalNetShared Test Loss: ', fractalnetshared_results['test_loss'])
print('FractalNet Test Loss: ', fractalnet_results['test_loss'])
print('GNN_no_rel Test Loss: ', gnn_no_rel_results['test_loss'])
#print('GNN Test Loss: ', gnn_results['test_loss'])
print('No Subnodes Test Loss: ', no_subnode_results['test_loss'])

FractalNet Test Loss:  0.0005467538204020275
GNN_no_rel Test Loss:  1.4065201867546444e-05
No Subnodes Test Loss:  3.3362182585271106e-06
