In [None]:
!pip install dgl -f https://data.dgl.ai/wheels/repo.html

!pip install dglgo -f https://data.dgl.ai/wheels-test/repo.html

Looking in links: https://data.dgl.ai/wheels/repo.html
Collecting dgl
  Downloading dgl-1.1.1-cp310-cp310-manylinux1_x86_64.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m57.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dgl
Successfully installed dgl-1.1.1
Looking in links: https://data.dgl.ai/wheels-test/repo.html
Collecting dglgo
  Downloading dglgo-0.0.2-py3-none-any.whl (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.5/63.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Collecting isort>=5.10.1 (from dglgo)
  Downloading isort-5.12.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autopep8>=1.6.0 (from dglgo)
  Downloading autopep8-2.0.2-py2.py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m5.9 MB/s[0m eta [36m0:00:

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Import libraries

In [None]:
%matplotlib inline
import os

os.environ["DGLBACKEND"] = "pytorch"     #specifies that DGL should use PyTorch as the backend for operations on graphs
import dgl
import numpy as np
import networkx as nx
import torch
import torch.nn as nn
import dgl.function as fn                #contains a collection of built-in message-passing functions
import torch.nn.functional as F          #provides a set of functions that can be used to define the behavior of neural network layers (F.relu())
import shutil                            #provides a set of high-level operations on files and collections of files, such as copying, moving, and deleting files and directories.
from torch.utils.data import DataLoader
import cloudpickle                       #provides the ability to serialize and deserialize complex Python objects.
from dgl.nn import GraphConv
from sklearn.preprocessing import StandardScaler

In [None]:
current_dir = "/content/drive/MyDrive/Graph_regression/"

checkpoint_path = current_dir + "save_models/model_checkpoints/" + "checkpoint"
os.makedirs(checkpoint_path, exist_ok=True) #files for saving and resuming training,
                                            #evaluating model performance,
                                            # and making predictions on new data.

best_model_path = current_dir + "save_models/best_model/"

folder_data_temp = current_dir +"data_temp/" #store data that is only needed temporarily,
                                             #such as intermediate results or temporary copies of files.

shutil.rmtree(folder_data_temp, ignore_errors=True) #function can be used to delete a directory and
                                                    #its contents when it is no longer needed.

path_save = current_dir + "esol.zip"
shutil.unpack_archive(path_save, folder_data_temp) #This function automatically detects the archive
                                                   #format and extracts the contents to the specified directory.

In [None]:
!unzip /content/drive/MyDrive/Graph_regression/esol.zip

Archive:  /content/drive/MyDrive/Graph_regression/esol.zip
  inflating: scaffold_0_smiles_train.pickle  
  inflating: scaffold_0_test.bin     
  inflating: scaffold_0_val.bin      
  inflating: scaffold_0_smiles_val.pickle  
  inflating: scaffold_0_smiles_test.pickle  
  inflating: scaffold_0_train.bin    


Regression Dataset

In [None]:
class DGLDatasetReg(torch.utils.data.Dataset):
    def __init__(self, address, transform=None, train=False, scaler=None):
            self.transform = transform
            self.train = train
            self.scaler = scaler
            self.list_graphs, train_labels_masks_globals = dgl.load_graphs(address+"bin") # load graphs and extract the labels, masks, and globals for the graphs.
            num_graphs = len(self.list_graphs) #number of graphs in the datase
            self.labels = train_labels_masks_globals["labels"].view(num_graphs,-1)
            self.masks = train_labels_masks_globals["masks"].view(num_graphs,-1)
            self.globals = train_labels_masks_globals["globals"].view(num_graphs,-1)



    #scaler_method is used to ensure that the labels are normalized during training and testing.
    #If self.train is True, it fits the scaler to the labels and returns it otherwise, it just
    #returns the scaler that was fitted during training
    def scaler_method(self):
        if self.train:
            scaler = StandardScaler().fit(self.labels)
            self.scaler = scaler
        return self.scaler

    def __len__(self):
        return len(self.list_graphs)

    def __getitem__(self, idx):
        return  self.list_graphs[idx], torch.tensor(self.scaler.transform(self.labels)[idx]).float(), self.masks[idx], self.globals[idx] #This tuple represents a single example from the dataset.

Train_Validation_Test Set

In [None]:
path_data_temp = folder_data_temp + "scaffold"+"_"+str(0)
train_set = DGLDatasetReg(address=path_data_temp+"_train.", train=True)
scaler = train_set.scaler_method()
val_set = DGLDatasetReg(address=path_data_temp+"_val.", scaler=scaler)
test_set = DGLDatasetReg(address=path_data_temp+"_test.", scaler=scaler)
print('Size of train set : {}, Size of validation set : {}, Size of test set : {}'.format(len(train_set), len(val_set), len(test_set)))

Size of train set : 902, Size of validation set : 112, Size of test set : 114


Dataloader

In [None]:
#The collate_fn argument specifies the function that should be used to collate the data from each batch into a single batch
def collate(batch):
    # batch is a list of tuples (graphs, labels, masks, globals)
    # Concatenate a sequence of graphs
    graphs = [e[0] for e in batch]
    g = dgl.batch(graphs)

    # Concatenate a sequence of tensors (labels) along a new dimension
    labels = [e[1] for e in batch]
    labels = torch.stack(labels, 0) #stacked along the first (horizontal) dimension.

    # Concatenate a sequence of tensors (masks) along a new dimension
    masks = [e[2] for e in batch]
    masks = torch.stack(masks, 0)

    # Concatenate a sequence of tensors (globals) along a new dimension
    globals = [e[3] for e in batch]
    globals = torch.stack(globals, 0)

    return g, labels, masks, globals


def loader(batch_size=64):
    train_dataloader = DataLoader(train_set,
                              batch_size=batch_size,
                              collate_fn=collate,
                              drop_last=False,  #the last batch will be included even if it is smaller than the specified batch size.
                              shuffle=True,
                              num_workers=1)    #specifies the number of worker processes that should be used to load the data in parallel.


    val_dataloader =  DataLoader(val_set,
                             batch_size=batch_size,
                             collate_fn=collate,
                             drop_last=False,
                             shuffle=False,
                             num_workers=1)

    test_dataloader = DataLoader(test_set,
                             batch_size=batch_size,
                             collate_fn=collate,
                             drop_last=False,
                             shuffle=False,
                             num_workers=1)
    return train_dataloader, val_dataloader, test_dataloader

In [None]:
train_dataloader, val_dataloader, test_dataloader = loader(batch_size=64)

Some variables

In [None]:
num_tasks = 1     #Esol dataset has 1 task.
global_size = 200 # Size of global feature of each graph
num_epochs = 100  # Number of epochs to train the model
patience = 10     # Number of steps to wait if the model performance on the validation set does not improve

#Configurations to instantiate the model
config = {"node_feature_size":127, "edge_feature_size":12, "hidden_size":100}


Defining GNN

In [None]:
# MODEL(1) :two GraphConv layers
class GNN(nn.Module):
    def __init__(self, config, global_size = 200, num_tasks = 1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127) #returns the value associated with the key in the dictionary. If the key is not found in the dictionary, the method returns the default value 127.

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = GraphConv(self.node_feature_size, self.hidden_size, allow_zero_in_degree='True')  #The allow_zero_in_degree parameter is set to True to allow nodes with zero incoming edges.
        self.conv2 = GraphConv(self.hidden_size, self.num_tasks, allow_zero_in_degree='True')

    # def forward(self, g, in_feat):
    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"]= mol_dgl_graph.ndata["v"][:,:self.node_feature_size] #represents the node features and is sliced to the size of self.node_feature_size
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:,:self.edge_feature_size] #represents the edge features and is sliced to the size of self.edge_feature_size.
        h = self.conv1(mol_dgl_graph, mol_dgl_graph.ndata["v"])
        h = F.relu(h)
        h = self.conv2(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h

        return dgl.mean_nodes(mol_dgl_graph, "h") #The node-level representation is then aggregated to a single graph-level representation

Compute score of the model ---> RMSE

In [None]:
def compute_score(model, data_loader, val_size, num_tasks):
    model.eval() #switch for some specific parts of the model that behave differently during training and evaluating time
    loss_sum = nn.MSELoss(reduction='sum') # MSE with sum instead of mean, i.e., sum_i[(y_i)^2-(y'_i)^2]
    final_loss = 0
    with torch.no_grad(): #turn off gradients computation
        for i, (mol_dgl_graph, labels, masks, globals) in enumerate(data_loader): #iterates over each batch of the DataLoader and computes the model's predictions for the batch.
            prediction = model(mol_dgl_graph, globals)
            prediction = torch.tensor(scaler.inverse_transform(prediction.detach().cpu())) #The predictions are transformed back to the original scale
            labels = torch.tensor(scaler.inverse_transform(labels.cpu())) #The labels are then transformed back to the original scale
                                                                          #when evaluating the model on new data, it is important to transform
                                                                          #the model's outputs back to their original scale to obtain meaningful
                                                                          #predictions that can be compared to the true values.

            loss = loss_sum(prediction, labels)
            final_loss += loss.item()
        final_loss /= val_size
        final_loss = np.sqrt(final_loss)  # RMSE
    return final_loss / num_tasks

Defining loss function

In [None]:
def loss_func(output, label, mask, num_tasks):
    criterion = nn.MSELoss(reduction='none')
    loss = mask*criterion(output,label)
    loss = loss.sum() / mask.sum()
    return loss

training

In [None]:
def train_epoch(train_dataloader, model, optimizer):
    epoch_train_loss = 0
    iterations = 0
    model.train() # Prepare model for training
    for i, (mol_dgl_graph, labels, masks, globals) in enumerate(train_dataloader):
        prediction = model(mol_dgl_graph, globals)
        loss_train = loss_func(prediction, labels, masks, num_tasks)
        optimizer.zero_grad(set_to_none=True) #clear any previous gradients that might still be stored in the optimizer.
        loss_train.backward()  #update the model's parameters based
        optimizer.step()
        epoch_train_loss += loss_train.detach().item()
        iterations += 1
    epoch_train_loss /= iterations
    return epoch_train_loss

In [None]:
def train_evaluate():  #The function saves the best model checkpoint based on the validation score and returns the average validation
                       #score over the best checkpoints.

    model = GNN(config, global_size, num_tasks)
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

    best_val = np.Inf
    patience_count = 1
    epoch = 1

    while epoch <= num_epochs:
        if patience_count <= patience: #checks whether the patience count has exceeded the specified patience patience. If not:
            model.train() #it trains the model for one epoch using the train_epoch function
            loss_train = train_epoch(train_dataloader, model, optimizer)
            model.eval()
            score_val = compute_score(model, val_dataloader, len(val_set), num_tasks) #computes the validation score using the compute_score function.
            if score_val < best_val:#If the current best validation score is better than the validation score, the function saves the model checkpoint
                                     #to a file using cloudpickle, updates the best_val variable, and resets the patience_count to 1. Otherwise,
                                     #it increments the patience_count by 1
                best_val = score_val
                print("Save checkpoint")
                path = os.path.join(checkpoint_path, 'checkpoint.pth')
                dict_checkpoint = {"score_val": score_val}
                dict_checkpoint.update({"model_state_dict": model.state_dict(), "optimizer_state": optimizer.state_dict()})
                with open(path, "wb") as outputfile: #open function is called to open the file specified by the path variable in binary write mode ("wb").
                                                     #Binary write mode ("wb") is a file mode in Python that is used to open a file for writing binary data.
                                                     #In binary mode, the data is written to the file as a sequence of bytes, rather than as text characters.
                    cloudpickle.dump(dict_checkpoint, outputfile) #serialize the dict_checkpoint dictionary and write the resulting bytes to the open file.
                patience_count = 1
            else:
                print("Patience", patience_count)
                patience_count += 1

            print("Epoch: {}/{} | Training Loss: {:.3f} | Valid Score: {:.3f}".format(
            epoch, num_epochs, loss_train, score_val))

            print(" ")
            print("Epoch: {}/{} | Best Valid Score Until Now: {:.3f}".format(epoch, num_epochs, best_val), "\n")
        epoch += 1

    # best model save
    # Once the loop finishes, the function saves the best model checkpoint to a directory named best_model_path and prints the final average validation score
    # based on the best checkpoints.
    shutil.rmtree(best_model_path, ignore_errors=True) #remove the best_model_path directory and its contents if it already exists.
                                                       #The ignore_errors=True argument is provided to ignore any errors that might occur if the directory does not exist.
    shutil.copytree(checkpoint_path, best_model_path)

    print("Final results:")
    print("Average Valid Score: {:.3f}".format(np.mean(best_val)), "\n")


In [None]:
#evaluates the final GNN model on a test set and prints the test score.
def test_evaluate():
    final_model = GNN(config, global_size, num_tasks)
    path = os.path.join(best_model_path, 'checkpoint.pth') #loads the best model checkpoint from the best_model_path directory using cloudpickle.load,
                                                            #which reads the serialized dictionary from the file and returns it as a Python object.
    with open(path, 'rb') as f:
        checkpoint = cloudpickle.load(f)
    final_model.load_state_dict(checkpoint["model_state_dict"])
    final_model.eval()
    test_score = compute_score(final_model, test_dataloader, len(test_set), num_tasks)#The function then computes the test score using the compute_score function on the test DataLoader


    print("Test Score: {:.3f}".format(test_score), "\n")
    print("Execution time: {:.3f} seconds".format(time.time() - start_time))


In [None]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 1.015 | Valid Score: 2.177
 
Epoch: 1/100 | Best Valid Score Until Now: 2.177 

Patience 1
Epoch: 2/100 | Training Loss: 1.001 | Valid Score: 2.187
 
Epoch: 2/100 | Best Valid Score Until Now: 2.177 

Patience 2
Epoch: 3/100 | Training Loss: 0.953 | Valid Score: 2.188
 
Epoch: 3/100 | Best Valid Score Until Now: 2.177 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.944 | Valid Score: 2.166
 
Epoch: 4/100 | Best Valid Score Until Now: 2.166 

Patience 1
Epoch: 5/100 | Training Loss: 0.933 | Valid Score: 2.169
 
Epoch: 5/100 | Best Valid Score Until Now: 2.166 

Patience 2
Epoch: 6/100 | Training Loss: 0.943 | Valid Score: 2.173
 
Epoch: 6/100 | Best Valid Score Until Now: 2.166 

Patience 3
Epoch: 7/100 | Training Loss: 0.898 | Valid Score: 2.168
 
Epoch: 7/100 | Best Valid Score Until Now: 2.166 

Save checkpoint
Epoch: 8/100 | Training Loss: 0.932 | Valid Score: 2.154
 
Epoch: 8/100 | Best Valid Score Until Now: 2.154 

Save checkpoint
E

In [None]:
class Desired_GNN_1(nn.Module):
    def __init__(self, in_feat, out_feat):
        super(Desired_GNN_1, self).__init__()
        # A linear submodule for projecting the input and neighbor feature to the output.
        self.linear = nn.Linear(in_feat * 2, out_feat)

    def forward(self, g, h):

        with g.local_scope():
            g.ndata["h"] = h
            # update_all is a message passing API.
            g.update_all(
                message_func=fn.u_add_v('h', 'h', 'm'),
                reduce_func=fn.mean("m", "h_N"),
            )
            h_N = g.ndata["h_N"]
            h_total = torch.cat([h, h_N], dim=1)
            return self.linear(h_total)

In [None]:
#MODEL(2)

class GNN(nn.Module):
    def __init__(self, config, global_size = 200, num_tasks = 1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = Desired_GNN_1(self.node_feature_size, self.hidden_size )
        self.conv2 = Desired_GNN_1(self.hidden_size, self.num_tasks )

    # def forward(self, g, in_feat):
    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"]= mol_dgl_graph.ndata["v"][:,:self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:,:self.edge_feature_size]
        h = self.conv1(mol_dgl_graph, mol_dgl_graph.ndata["v"])
        h = F.relu(h)
        h = self.conv2(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")

In [None]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 0.960 | Valid Score: 2.157
 
Epoch: 1/100 | Best Valid Score Until Now: 2.157 

Patience 1
Epoch: 2/100 | Training Loss: 1.007 | Valid Score: 2.169
 
Epoch: 2/100 | Best Valid Score Until Now: 2.157 

Save checkpoint
Epoch: 3/100 | Training Loss: 0.943 | Valid Score: 2.153
 
Epoch: 3/100 | Best Valid Score Until Now: 2.153 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.921 | Valid Score: 2.141
 
Epoch: 4/100 | Best Valid Score Until Now: 2.141 

Save checkpoint
Epoch: 5/100 | Training Loss: 0.960 | Valid Score: 2.135
 
Epoch: 5/100 | Best Valid Score Until Now: 2.135 

Patience 1
Epoch: 6/100 | Training Loss: 0.921 | Valid Score: 2.138
 
Epoch: 6/100 | Best Valid Score Until Now: 2.135 

Patience 2
Epoch: 7/100 | Training Loss: 0.932 | Valid Score: 2.146
 
Epoch: 7/100 | Best Valid Score Until Now: 2.135 

Patience 3
Epoch: 8/100 | Training Loss: 0.945 | Valid Score: 2.149
 
Epoch: 8/100 | Best Valid Score Until Now: 2.135 

Save checkpo

In [None]:
class Desired_GNN_2(nn.Module):

    def __init__(self, in_feat, out_feat):
        super(Desired_GNN_2, self).__init__()
        # A linear submodule for projecting the input and neighbor feature to the output.
        self.linear = nn.Linear(in_feat * 2, out_feat)

    def forward(self, g, h):

        with g.local_scope():
            g.ndata["h"] = h
            # update_all is a message passing API.
            g.update_all(
                message_func=fn.u_mul_v('h', 'h', 'm'),
                reduce_func=fn.sum("m", "h_N"),
            )
            h_N = g.ndata["h_N"]
            h_total = torch.cat([h, h_N], dim=1)
            return self.linear(h_total)

In [None]:
#MODEL(3)

class GNN(nn.Module):
    def __init__(self, config, global_size = 200, num_tasks = 1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = Desired_GNN_2(self.node_feature_size, self.hidden_size )
        self.conv2 = Desired_GNN_2(self.hidden_size, self.num_tasks )

    # def forward(self, g, in_feat):
    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"]= mol_dgl_graph.ndata["v"][:,:self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:,:self.edge_feature_size]
        h = self.conv1(mol_dgl_graph, mol_dgl_graph.ndata["v"])
        h = F.relu(h)
        h = self.conv2(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")

In [None]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 1.012 | Valid Score: 2.111
 
Epoch: 1/100 | Best Valid Score Until Now: 2.111 

Save checkpoint
Epoch: 2/100 | Training Loss: 1.066 | Valid Score: 2.095
 
Epoch: 2/100 | Best Valid Score Until Now: 2.095 

Save checkpoint
Epoch: 3/100 | Training Loss: 0.949 | Valid Score: 2.078
 
Epoch: 3/100 | Best Valid Score Until Now: 2.078 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.976 | Valid Score: 2.067
 
Epoch: 4/100 | Best Valid Score Until Now: 2.067 

Patience 1
Epoch: 5/100 | Training Loss: 1.047 | Valid Score: 2.078
 
Epoch: 5/100 | Best Valid Score Until Now: 2.067 

Patience 2
Epoch: 6/100 | Training Loss: 0.981 | Valid Score: 2.074
 
Epoch: 6/100 | Best Valid Score Until Now: 2.067 

Patience 3
Epoch: 7/100 | Training Loss: 0.897 | Valid Score: 2.072
 
Epoch: 7/100 | Best Valid Score Until Now: 2.067 

Patience 4
Epoch: 8/100 | Training Loss: 0.901 | Valid Score: 2.070
 
Epoch: 8/100 | Best Valid Score Until Now: 2.067 

Patience 5
E

In [None]:
#MODEL(4)

class GNN(nn.Module):
    def __init__(self, config, global_size = 200, num_tasks = 1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = Desired_GNN_2(self.node_feature_size, self.hidden_size )
        self.conv2 = Desired_GNN_2(self.hidden_size, self.hidden_size )
        self.conv3 = Desired_GNN_2(self.hidden_size, self.hidden_size )
        self.conv4 = Desired_GNN_2(self.hidden_size, self.num_tasks )

    # def forward(self, g, in_feat):
    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"]= mol_dgl_graph.ndata["v"][:,:self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:,:self.edge_feature_size]
        h = self.conv1(mol_dgl_graph, mol_dgl_graph.ndata["v"])
        h = F.relu(h)
        h = self.conv2(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.conv3(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.conv4(mol_dgl_graph, h)

        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")

In [None]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 1.153 | Valid Score: 2.125
 
Epoch: 1/100 | Best Valid Score Until Now: 2.125 

Patience 1
Epoch: 2/100 | Training Loss: 0.962 | Valid Score: 2.125
 
Epoch: 2/100 | Best Valid Score Until Now: 2.125 

Patience 2
Epoch: 3/100 | Training Loss: 0.998 | Valid Score: 2.126
 
Epoch: 3/100 | Best Valid Score Until Now: 2.125 

Patience 3
Epoch: 4/100 | Training Loss: 0.992 | Valid Score: 2.128
 
Epoch: 4/100 | Best Valid Score Until Now: 2.125 

Patience 4
Epoch: 5/100 | Training Loss: 0.961 | Valid Score: 2.130
 
Epoch: 5/100 | Best Valid Score Until Now: 2.125 

Patience 5
Epoch: 6/100 | Training Loss: 0.949 | Valid Score: 2.130
 
Epoch: 6/100 | Best Valid Score Until Now: 2.125 

Patience 6
Epoch: 7/100 | Training Loss: 0.931 | Valid Score: 2.130
 
Epoch: 7/100 | Best Valid Score Until Now: 2.125 

Patience 7
Epoch: 8/100 | Training Loss: 0.969 | Valid Score: 2.130
 
Epoch: 8/100 | Best Valid Score Until Now: 2.125 

Patience 8
Epoch: 9/100 | T

In [None]:
#MODEL(5)

class GNN(nn.Module):
    def __init__(self, config, global_size = 200, num_tasks = 1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = Desired_GNN_2(self.node_feature_size, self.hidden_size )
        self.bn1 = nn.BatchNorm1d(self.hidden_size)
        self.conv2 = Desired_GNN_2(self.hidden_size, self.hidden_size )
        self.bn2 = nn.BatchNorm1d(self.hidden_size)
        self.conv3 = Desired_GNN_2(self.hidden_size, self.hidden_size )
        self.bn3 = nn.BatchNorm1d(self.hidden_size)
        self.conv4 = Desired_GNN_2(self.hidden_size, self.num_tasks )

    # def forward(self, g, in_feat):
    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"]= mol_dgl_graph.ndata["v"][:,:self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:,:self.edge_feature_size]
        h = self.conv1(mol_dgl_graph, mol_dgl_graph.ndata["v"])
        h = self.bn1(h)
        h = F.relu(h)
        h = self.conv2(mol_dgl_graph, h)
        h = self.bn2(h)
        h = F.relu(h)
        h = self.conv3(mol_dgl_graph, h)
        h = self.bn3(h)
        h = F.relu(h)
        h = self.conv4(mol_dgl_graph, h)

        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")

In [None]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 2.534 | Valid Score: 2.212
 
Epoch: 1/100 | Best Valid Score Until Now: 2.212 

Patience 1
Epoch: 2/100 | Training Loss: 1.503 | Valid Score: 2.820
 
Epoch: 2/100 | Best Valid Score Until Now: 2.212 

Patience 2
Epoch: 3/100 | Training Loss: 1.306 | Valid Score: 5.262
 
Epoch: 3/100 | Best Valid Score Until Now: 2.212 

Patience 3
Epoch: 4/100 | Training Loss: 1.198 | Valid Score: 5.824
 
Epoch: 4/100 | Best Valid Score Until Now: 2.212 

Patience 4
Epoch: 5/100 | Training Loss: 1.346 | Valid Score: 4.119
 
Epoch: 5/100 | Best Valid Score Until Now: 2.212 

Patience 5
Epoch: 6/100 | Training Loss: 1.186 | Valid Score: 2.875
 
Epoch: 6/100 | Best Valid Score Until Now: 2.212 

Patience 6
Epoch: 7/100 | Training Loss: 1.260 | Valid Score: 4.749
 
Epoch: 7/100 | Best Valid Score Until Now: 2.212 

Patience 7
Epoch: 8/100 | Training Loss: 1.169 | Valid Score: 9.598
 
Epoch: 8/100 | Best Valid Score Until Now: 2.212 

Patience 8
Epoch: 9/100 | T

In [None]:
#MODEL(6)

class GNN(nn.Module):
    def __init__(self, config, global_size = 200, num_tasks = 1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = Desired_GNN_2(self.node_feature_size, self.hidden_size )
        self.bn1 = nn.BatchNorm1d(self.hidden_size)
        self.d1 = nn.Dropout(0.5)
        self.conv2 = Desired_GNN_2(self.hidden_size, self.hidden_size )
        self.bn2 = nn.BatchNorm1d(self.hidden_size)
        self.conv3 = Desired_GNN_2(self.hidden_size, self.hidden_size )
        self.bn3 = nn.BatchNorm1d(self.hidden_size)
        self.d2 = nn.Dropout(0.5)
        self.conv4 = Desired_GNN_2(self.hidden_size, self.num_tasks )

    # def forward(self, g, in_feat):
    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"]= mol_dgl_graph.ndata["v"][:,:self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:,:self.edge_feature_size]
        h = self.conv1(mol_dgl_graph, mol_dgl_graph.ndata["v"])
        h = self.bn1(h)
        h = F.relu(h)
        h = self.d1(h)
        h = self.conv2(mol_dgl_graph, h)
        h = self.bn2(h)
        h = F.relu(h)
        h = self.conv3(mol_dgl_graph, h)
        h = self.bn3(h)
        h = F.relu(h)
        h = self.d2(h)
        h = self.conv4(mol_dgl_graph, h)

        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")

In [None]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 8.064 | Valid Score: 2.180
 
Epoch: 1/100 | Best Valid Score Until Now: 2.180 

Patience 1
Epoch: 2/100 | Training Loss: 6.841 | Valid Score: 2.195
 
Epoch: 2/100 | Best Valid Score Until Now: 2.180 

Patience 2
Epoch: 3/100 | Training Loss: 8.168 | Valid Score: 2.203
 
Epoch: 3/100 | Best Valid Score Until Now: 2.180 

Patience 3
Epoch: 4/100 | Training Loss: 4.936 | Valid Score: 2.376
 
Epoch: 4/100 | Best Valid Score Until Now: 2.180 

Patience 4
Epoch: 5/100 | Training Loss: 5.693 | Valid Score: 2.379
 
Epoch: 5/100 | Best Valid Score Until Now: 2.180 

Patience 5
Epoch: 6/100 | Training Loss: 6.297 | Valid Score: 2.208
 
Epoch: 6/100 | Best Valid Score Until Now: 2.180 

Patience 6
Epoch: 7/100 | Training Loss: 4.077 | Valid Score: 2.200
 
Epoch: 7/100 | Best Valid Score Until Now: 2.180 

Patience 7
Epoch: 8/100 | Training Loss: 5.307 | Valid Score: 2.324
 
Epoch: 8/100 | Best Valid Score Until Now: 2.180 

Patience 8
Epoch: 9/100 | T