# Installation of requirements

In [21]:
# install requirements

!pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113  --extra-index-url https://download.pytorch.org/whl/cu113


Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu113


In [22]:
# install requirements

!pip install torch-geometric==2.2.0 torch-sparse==0.6.16 torch-scatter==2.1.0 -f https://data.pyg.org/whl/torch-1.12.0+cu113.html

Looking in links: https://data.pyg.org/whl/torch-1.12.0+cu113.html


In [23]:
# Connecting the drive with Colab

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
# Making sure the working directory is the one

%cd /content/drive/MyDrive/AGILE

/content/drive/MyDrive/AGILE


In [25]:
# install requirements

!pip install -r requirements.txt



# The following is the content of the file 'config_pretrain.yaml'

batch_size: 512 # batch size
warm_up: 10 # warm-up epochs
epochs: 2 # total number of epochs

load_model: pretrained_gin # resume training
eval_every_n_epochs: 1 # validation frequency
save_every_n_epochs: 5 # automatic model saving frequecy
log_every_n_steps: 50 # print training log frequency

fp16_precision: False # float precision 16 (i.e. True/False)
init_lr: 0.0005 # initial learning rate for Adam
weight_decay: 1e-5 # weight decay for Adam
gpu: cuda:0 # training GPU

model:
  num_layer: 5 # number of graph conv layers
  emb_dim: 300 # embedding dimension in graph conv layers
  feat_dim: 512 # output feature dimention
  drop_ratio: 0 # dropout ratio
  pool: mean # readout pooling (i.e., mean/max/add)

aug: node # molecule graph augmentation strategy (i.e., node/subgraph/mix)
dataset:
  num_workers: 12 # dataloader number of workers
  valid_size: 0.05 # ratio of validation data
  data_path: data/{yourowndata}.csv # path of pre-training data

loss:
  temperature: 0.1 # temperature of NT-Xent loss
  use_cosine_similarity: True # whether to use cosine similarity in NT-Xent loss (i.e. True/False)


# Further ahead from here, I will try to pre-train the MolCLR model with the file 'pretrain.py'

In [26]:
import os
import shutil
import sys
import torch
import yaml
import numpy as np
from datetime import datetime

In [27]:
# imports the torch.nn.functional module
# The torch.nn.functional module contains various functions that are commonly used in neural network operations, such as activation functions, loss functions, and other operations commonly applied to tensors.

import torch.nn.functional as F

In [28]:
# This line imports the SummaryWriter class from the torch.utils.tensorboard module.
# SummaryWriter is a PyTorch utility that enables you to write TensorBoard-compatible logs.
# TensorBoard is a visualization tool provided with TensorFlow, but PyTorch provides integration to use it with PyTorch models as well.

from torch.utils.tensorboard import SummaryWriter

In [29]:
# imports the CosineAnnealingLR class from the torch.optim.lr_scheduler module in PyTorch.
# The learning rate is a hyperparameter that controls how much we are adjusting the weights of our network during training.
# CosineAnnealingLR is a learning rate scheduler in PyTorch that reduces the learning rate following the cosine annealing schedule.
# The learning rate starts at the initial value and is decreased following a cosine function until it reaches a minimum value and then starts increasing again.
# This kind of schedule is often used to improve the convergence and generalization of neural networks during training.

from torch.optim.lr_scheduler import CosineAnnealingLR

In [30]:
# Changing the working directory to 'utils'

%cd /content/drive/MyDrive/AGILE

/content/drive/MyDrive/AGILE


In [31]:
#  imports the NTXentLoss class from a custom module named nt_xent inside a package or directory called utils.
# This class likely contains the implementation of the NT-Xent loss function, which is commonly used in contrastive learning tasks.

from utils.nt_xent import NTXentLoss

In [32]:
# This code snippet checks if the Apex library is installed and imports it for mixed-precision training if it is available.
# If Apex is not installed, it prints a message indicating that Apex needs to be installed from a specific GitHub repository.

apex_support = False # Initializes the apex_support variable to False.
try: #The code inside the try block attempts to import the amp module from the Apex library.
    sys.path.append("./apex") #  Adds the "./apex" directory to the Python system path, allowing Python to find the Apex module in that directory.
    from apex import amp # Tries to import the amp module from the Apex library.

    apex_support = True # If the import is successful, sets apex_support to True indicating that Apex is available.
except: # If there is an ImportError (i.e., Apex is not installed), the code inside the except block is executed.
    print(
        "Please install apex for mixed precision training from: https://github.com/NVIDIA/apex" # Prints a message indicating that Apex needs to be installed and provides the GitHub repository URL for installation.
    )
    apex_support = False

In [33]:
# Installing Nvidia apex

import os, sys, shutil
import time
import gc
from contextlib import contextmanager
from pathlib import Path
import random
import numpy as np, pandas as pd
from tqdm import tqdm, tqdm_notebook

@contextmanager
def timer(name):
    t0 = time.time()
    yield
    print(f'[{name}] done in {time.time() - t0:.0f} s')

USE_APEX = True

if USE_APEX:
            with timer('install Nvidia apex'):
                # Installing Nvidia Apex
                os.system('git clone https://github.com/NVIDIA/apex; cd apex; pip install -v --no-cache-dir' +
                          ' --global-option="--cpp_ext" --global-option="--cuda_ext" ./')
                os.system('rm -rf apex/.git') # too many files, Kaggle fails


[install Nvidia apex] done in 2 s


In [34]:
from apex import amp


In [35]:
# This function appears to be responsible for saving a configuration file (config.yaml)
# to a specified directory (model_checkpoints_folder).
def _save_config_file(model_checkpoints_folder): # checks if the model_checkpoints_folder directory exists
    if not os.path.exists(model_checkpoints_folder): # If the directory does not exist, it creates it
        os.makedirs(model_checkpoints_folder) # copies a configuration file specified by args.config to this directory
        shutil.copy(args.config, os.path.join(model_checkpoints_folder, "config.yaml")) #  copied file is named "config.yaml"


class PreTrain(object): # initializer for a training process
    def __init__(self, dataset, config): # constructor for the PreTrain class. It takes two arguments: dataset and config.
        self.config = config # assigns the config argument to an instance variable self.config.
        self.device = self._get_device() # It calls the _get_device() method to determine and store the...
                                         # ...computing device (CPU or GPU) to be used for training

        dir_name = datetime.now().strftime("%b%d_%H-%M-%S") # generates a timestamp string in format "MonthDay_Hour-Minute-Second"
        log_dir = os.path.join("ckpt", dir_name)
        # It creates a directory path by joining "ckpt" (presumably a checkpoint directory)...
        # ...and the timestamp generated in the previous step. This path is where logs related to the training process will be saved.
        self.writer = SummaryWriter(log_dir=log_dir) # It initializes a SummaryWriter object...
        # ...typically used for logging and visualization during training. The logs will be saved in the directory specified by log_dir.

        self.dataset = dataset # It assigns the dataset argument to an instance variable 'self.dataset'...
        # ...This presumably represents the dataset used for training.
        self.nt_xent_criterion = NTXentLoss(
            self.device, config["batch_size"], **config["loss"]
        ) #  It initializes an instance of NTXentLoss and assigns it to the self.nt_xent_criterion instance variable.
        # This is likely a loss function used for training,
        # it is configured based on the config dictionary's "batch_size" and "loss" settings.


    # this method is responsible for dynamically selecting the appropriate computing device for training based on system capabilities...
    # ...and the configuration settings.
    # It sets the device to GPU if available and allowed, or to CPU if not.
    def _get_device(self):
        if torch.cuda.is_available() and self.config["gpu"] != "cpu": # checks if CUDA (GPU support for PyTorch) is available on the system.
            # checks if the "gpu" setting in the configuration (self.config) is not explicitly set to "cpu".
            device = self.config["gpu"] # assigns the GPU device specified in the configuration to the device variable.
            torch.cuda.set_device(device) # sets the current CUDA device to the one specified in device.
                                          # This ensures that the specified GPU is used for training
        else:
            device = "cpu" # assigns the string "cpu" to the device variable, indicating CPU usage.
        print("Running on:", device) # prints a message indicating whether the code is running on the CPU or GPU

        return device # method returns the selected device (device) as a string ("cpu" or the GPU identifier).


    # The _step method appears to define a single training step within a training loop for a contrastive learning task.
    def _step(self, model, xis, xjs, n_iter):
        # get the representations and the projections
        ris, zis = model(xis)  # [N,C]
        # This line passes the input data 'xis' through the neural network model. It computes two sets of values:
        # ris: These are the intermediate representations (features) obtained from the model for the input data xis.
        # zis: These are the projection vectors or embeddings corresponding to the representations ris.

        # get the representations and the projections
        rjs, zjs = model(xjs)  # [N,C]
        # this line passes the input data xjs through the same model to obtain representations and projections for xjs.
        #These are stored in rjs and zjs, respectively.

        # normalize projection feature vectors
        zis = F.normalize(zis, dim=1)
        zjs = F.normalize(zjs, dim=1)
        # These lines normalize the projection feature vectors zis and zjs along dimension 1 (usually the channel dimension for image data).
        # Normalization is typically done to ensure that the embeddings have a consistent scale, which can be important for contrastive learning.

        loss = self.nt_xent_criterion(zis, zjs)
        # This line computes the loss between the normalized projection vectors zis and zjs.
        # It uses the nt_xent_criterion, which is likely a custom loss function specifically designed for contrastive learning tasks.
        # Contrastive loss functions aim to minimize the similarity (e.g., cosine similarity) between positive pairs (pairs of similar items)...
        # ...and maximize the similarity between negative pairs (pairs of dissimilar items).
        return loss
        # Finally, the computed loss is returned as the result of this _step method.


# This portion of the train method sets up the training process for the neural network model.
    def train(self):
        train_loader, valid_loader = self.dataset.get_data_loaders()
        # This line obtains training and validation data loaders from the dataset object.
        # It suggests that the dataset object has a method named 'get_data_loaders' that returns data loaders for training and validation data.

        from models.agile_pretrain import AGILE # It imports the AGILE model from a module named 'agile_pretrain' within the 'models' package.
        model = AGILE(**self.config["model"]).to(self.device) # It instantiates the AGILE model using the model configuration specified in self.config["model"].
        # The ** syntax is used to unpack the dictionary and pass its contents as keyword arguments to the AGILE constructor.
        # The resulting model is then moved to the specified device (self.device), which can be either CPU or GPU.
        model = self._load_pre_trained_weights(model) #  It loads pre-trained weights for the model if they exist (as explained earlier in your code).
        #If pre-trained weights are not found, it initializes the model from scratch.
        print(model)

        optimizer = torch.optim.Adam(
            model.parameters(),
            self.config["init_lr"],
            weight_decay=eval(self.config["weight_decay"]),
        )
        scheduler = CosineAnnealingLR(
            optimizer,
            T_max=self.config["epochs"] - self.config["warm_up"],
            eta_min=0,
            last_epoch=-1,
        )

        if apex_support and self.config["fp16_precision"]:
            model, optimizer = amp.initialize(
                model, optimizer, opt_level="O2", keep_batchnorm_fp32=True
            )

        model_checkpoints_folder = os.path.join(self.writer.log_dir, "checkpoints")

        # save config file
        _save_config_file(model_checkpoints_folder)

        n_iter = 0
        valid_n_iter = 0
        best_valid_loss = np.inf

        for epoch_counter in range(self.config["epochs"]):
            for bn, (xis, xjs) in enumerate(train_loader):
                optimizer.zero_grad()

                xis = xis.to(self.device)
                xjs = xjs.to(self.device)

                loss = self._step(model, xis, xjs, n_iter)

                if n_iter % self.config["log_every_n_steps"] == 0:
                    self.writer.add_scalar("train_loss", loss, global_step=n_iter)
                    self.writer.add_scalar(
                        "cosine_lr_decay",
                        scheduler.get_last_lr()[0],
                        global_step=n_iter,
                    )
                    print("Epoch:", epoch_counter, "Iteration:", bn, "Train loss:",loss.item())

                if apex_support and self.config["fp16_precision"]:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                optimizer.step()
                n_iter += 1

            # validate the model if requested
            if epoch_counter % self.config["eval_every_n_epochs"] == 0:
                valid_loss = self._validate(model, valid_loader)
                print("Epoch:", epoch_counter, "Iteration:", bn, "Valid loss:", valid_loss)
                if valid_loss < best_valid_loss:
                    # save the model weights
                    best_valid_loss = valid_loss
                    torch.save(
                        model.state_dict(),
                        os.path.join(model_checkpoints_folder, "model.pth"),
                    )

                self.writer.add_scalar(
                    "validation_loss", valid_loss, global_step=valid_n_iter
                )
                valid_n_iter += 1

            if (epoch_counter + 1) % self.config["save_every_n_epochs"] == 0:
                torch.save(
                    model.state_dict(),
                    os.path.join(
                        model_checkpoints_folder,
                        "model_{}.pth".format(str(epoch_counter)),
                    ),
                )

            # warmup for the first few epochs
            if epoch_counter >= self.config["warm_up"]:
                scheduler.step()

    def _load_pre_trained_weights(self, model):
        try:
            checkpoints_folder = os.path.join(
                "./ckpt", self.config["load_model"], "checkpoints"
            )
            state_dict = torch.load(
                os.path.join(checkpoints_folder, "model.pth"),
                map_location=self.device,
            )
            model.load_state_dict(state_dict)
            print("Loaded pre-trained model with success.")
        except FileNotFoundError:
            print("Pre-trained weights not found. Training from scratch.")

        return model

    def _validate(self, model, valid_loader):
        # validation steps
        with torch.no_grad():
            model.eval()

            valid_loss = 0.0
            counter = 0
            for (xis, xjs) in valid_loader:
                xis = xis.to(self.device)
                xjs = xjs.to(self.device)

                loss = self._step(model, xis, xjs, counter)
                valid_loss += loss.item()
                counter += 1
            valid_loss /= counter

        model.train()
        return valid_loss



In [44]:
def main(config):
    if config["aug"] == "node":
        from dataset.dataset import MoleculeDatasetWrapper
    elif config["aug"] == "subgraph":
        from dataset.dataset_subgraph import MoleculeDatasetWrapper
    elif config["aug"] == "mix":
        from dataset.dataset_mix import MoleculeDatasetWrapper
    else:
        raise ValueError("Not defined molecule augmentation!")

    dataset = MoleculeDatasetWrapper(config["batch_size"], **config["dataset"])
    agile_pretrain = PreTrain(dataset, config)
    agile_pretrain.train()
    print(f"Training finished. Checkpoints saved in {agile_pretrain.writer.log_dir}.")

In [50]:

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    print(parser)
    parser.add_argument("config", type=str, help="config_pretrain.yaml")
    args = parser.parse_args()
    print(args)
    config = yaml.load(open(args.config, "r"), Loader=yaml.FullLoader)
    print(config)
    main(config)

NameError: ignored

# Further ahead from here, I will try to finetune the pre-trained MolCLR model with the file 'finetune.py'

In [36]:
import os
import shutil
import sys
import torch
import yaml
import numpy as np
from datetime import datetime

In [37]:
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingLR

In [38]:
from utils.nt_xent import NTXentLoss

In [39]:
apex_support = False
try:
    sys.path.append("./apex")
    from apex import amp

    apex_support = True
except:
    print(
        "Please install apex for mixed precision training from: https://github.com/NVIDIA/apex"
    )
    apex_support = False

In [40]:
def _save_config_file(model_checkpoints_folder):
    if not os.path.exists(model_checkpoints_folder):
        os.makedirs(model_checkpoints_folder)
        shutil.copy(args.config, os.path.join(model_checkpoints_folder, "config.yaml"))

In [41]:
class PreTrain(object):
    def __init__(self, dataset, config):
        self.config = config
        self.device = self._get_device()

        dir_name = datetime.now().strftime("%b%d_%H-%M-%S")
        log_dir = os.path.join("ckpt", dir_name)
        self.writer = SummaryWriter(log_dir=log_dir)

        self.dataset = dataset
        self.nt_xent_criterion = NTXentLoss(
            self.device, config["batch_size"], **config["loss"]
        )

    def _get_device(self):
        if torch.cuda.is_available() and self.config["gpu"] != "cpu":
            device = self.config["gpu"]
            torch.cuda.set_device(device)
        else:
            device = "cpu"
        print("Running on:", device)

        return device

    def _step(self, model, xis, xjs, n_iter):
        # get the representations and the projections
        ris, zis = model(xis)  # [N,C]

        # get the representations and the projections
        rjs, zjs = model(xjs)  # [N,C]

        # normalize projection feature vectors
        zis = F.normalize(zis, dim=1)
        zjs = F.normalize(zjs, dim=1)

        loss = self.nt_xent_criterion(zis, zjs)
        return loss

    def train(self):
        train_loader, valid_loader = self.dataset.get_data_loaders()

        from models.agile_pretrain import AGILE
        model = AGILE(**self.config["model"]).to(self.device)
        model = self._load_pre_trained_weights(model)
        print(model)

        optimizer = torch.optim.Adam(
            model.parameters(),
            self.config["init_lr"],
            weight_decay=eval(self.config["weight_decay"]),
        )
        scheduler = CosineAnnealingLR(
            optimizer,
            T_max=self.config["epochs"] - self.config["warm_up"],
            eta_min=0,
            last_epoch=-1,
        )

        if apex_support and self.config["fp16_precision"]:
            model, optimizer = amp.initialize(
                model, optimizer, opt_level="O2", keep_batchnorm_fp32=True
            )

        model_checkpoints_folder = os.path.join(self.writer.log_dir, "checkpoints")

        # save config file
        _save_config_file(model_checkpoints_folder)

        n_iter = 0
        valid_n_iter = 0
        best_valid_loss = np.inf

        for epoch_counter in range(self.config["epochs"]):
            for bn, (xis, xjs) in enumerate(train_loader):
                optimizer.zero_grad()

                xis = xis.to(self.device)
                xjs = xjs.to(self.device)

                loss = self._step(model, xis, xjs, n_iter)

                if n_iter % self.config["log_every_n_steps"] == 0:
                    self.writer.add_scalar("train_loss", loss, global_step=n_iter)
                    self.writer.add_scalar(
                        "cosine_lr_decay",
                        scheduler.get_last_lr()[0],
                        global_step=n_iter,
                    )
                    print("Epoch:", epoch_counter, "Iteration:", bn, "Train loss:",loss.item())

                if apex_support and self.config["fp16_precision"]:
                    with amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                optimizer.step()
                n_iter += 1

            # validate the model if requested
            if epoch_counter % self.config["eval_every_n_epochs"] == 0:
                valid_loss = self._validate(model, valid_loader)
                print("Epoch:", epoch_counter, "Iteration:", bn, "Valid loss:", valid_loss)
                if valid_loss < best_valid_loss:
                    # save the model weights
                    best_valid_loss = valid_loss
                    torch.save(
                        model.state_dict(),
                        os.path.join(model_checkpoints_folder, "model.pth"),
                    )

                self.writer.add_scalar(
                    "validation_loss", valid_loss, global_step=valid_n_iter
                )
                valid_n_iter += 1

            if (epoch_counter + 1) % self.config["save_every_n_epochs"] == 0:
                torch.save(
                    model.state_dict(),
                    os.path.join(
                        model_checkpoints_folder,
                        "model_{}.pth".format(str(epoch_counter)),
                    ),
                )

            # warmup for the first few epochs
            if epoch_counter >= self.config["warm_up"]:
                scheduler.step()

    def _load_pre_trained_weights(self, model):
        try:
            checkpoints_folder = os.path.join(
                "./ckpt", self.config["load_model"], "checkpoints"
            )
            state_dict = torch.load(
                os.path.join(checkpoints_folder, "model.pth"),
                map_location=self.device,
            )
            model.load_state_dict(state_dict)
            print("Loaded pre-trained model with success.")
        except FileNotFoundError:
            print("Pre-trained weights not found. Training from scratch.")

        return model

    def _validate(self, model, valid_loader):
        # validation steps
        with torch.no_grad():
            model.eval()

            valid_loss = 0.0
            counter = 0
            for (xis, xjs) in valid_loader:
                xis = xis.to(self.device)
                xjs = xjs.to(self.device)

                loss = self._step(model, xis, xjs, counter)
                valid_loss += loss.item()
                counter += 1
            valid_loss /= counter

        model.train()
        return valid_loss

In [42]:
def main(config):
    if config["aug"] == "node":
        from dataset.dataset import MoleculeDatasetWrapper
    elif config["aug"] == "subgraph":
        from dataset.dataset_subgraph import MoleculeDatasetWrapper
    elif config["aug"] == "mix":
        from dataset.dataset_mix import MoleculeDatasetWrapper
    else:
        raise ValueError("Not defined molecule augmentation!")

    dataset = MoleculeDatasetWrapper(config["batch_size"], **config["dataset"])
    agile_pretrain = PreTrain(dataset, config)
    agile_pretrain.train()
    print(f"Training finished. Checkpoints saved in {agile_pretrain.writer.log_dir}.")

In [43]:
if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("config", type=str, help="config_finetune.yaml")
    args = parser.parse_args()
    config = yaml.load(open(args.config, "r"), Loader=yaml.FullLoader)
    print(config)
    main(config)

usage: colab_kernel_launcher.py [-h] config
colab_kernel_launcher.py: error: unrecognized arguments: -f


SystemExit: ignored