In [3]:
import os
import numpy as np
import torch
import torch.nn as nn
import evoaug
from evoaug import utils, augment, robust_model
from evoaug.robust_model import RobustModel
import pytorch_lightning as pl

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [4]:

class CNN(nn.Module):
    def __init__(self, output_dim):
        super().__init__()
        
        self.activation1 = nn.ReLU()
        self.activation = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        self.dropout2 = nn.Dropout(0.2)
        self.dropout3 = nn.Dropout(0.2)
        self.dropout4 = nn.Dropout(0.5)
        self.flatten = nn.Flatten()
        self.output_activation = nn.Sigmoid()

        # Layer 1 (convolutional), constituent parts
        self.conv1_filters = torch.nn.Parameter(torch.zeros(64, 4, 7))
        torch.nn.init.kaiming_uniform_(self.conv1_filters)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.maxpool1 = nn.MaxPool1d(4)
        
        # Layer 3 (convolutional), constituent parts
        self.conv2_filters = torch.nn.Parameter(torch.zeros(96, 64, 5))
        torch.nn.init.kaiming_uniform_(self.conv2_filters)
        self.batchnorm2 = nn.BatchNorm1d(96)
        self.maxpool2 = nn.MaxPool1d(4)
        
        # Layer 4 (convolutional), constituent parts
        self.conv3_filters = torch.nn.Parameter(torch.zeros(128, 96, 5))
        torch.nn.init.kaiming_uniform_(self.conv3_filters)
        self.batchnorm3 = nn.BatchNorm1d(128)
        self.maxpool3 = nn.MaxPool1d(2)
        
        # Layer 5 (fully connected), constituent parts
        self.fc4 = nn.LazyLinear(256, bias=True)
        self.batchnorm4 = nn.BatchNorm1d(256)
        
        # Output layer (fully connected), constituent parts
        self.fc5 = nn.LazyLinear(output_dim, bias=True)
    
    def forward(self, x):
        # Layer 1
        cnn = torch.conv1d(x, self.conv1_filters, stride=1, padding="same")
        cnn = self.batchnorm1(cnn)
        cnn = self.activation1(cnn)
        cnn = self.maxpool1(cnn)
        cnn = self.dropout1(cnn)
        
        # Layer 2
        cnn = torch.conv1d(cnn, self.conv2_filters, stride=1, padding="same")
        cnn = self.batchnorm2(cnn)
        cnn = self.activation(cnn)
        cnn = self.maxpool2(cnn)
        cnn = self.dropout2(cnn)
        
        # Layer 3
        cnn = torch.conv1d(cnn, self.conv3_filters, stride=1, padding="same")
        cnn = self.batchnorm3(cnn)
        cnn = self.activation(cnn)
        cnn = self.maxpool3(cnn)
        cnn = self.dropout3(cnn)
        
        # Layer 4
        cnn = self.flatten(cnn)
        cnn = self.fc4(cnn)
        cnn = self.batchnorm4(cnn)
        cnn = self.activation(cnn)
        cnn = self.dropout4(cnn)
        
        # Output layer
        logits = self.fc5(cnn) 
        y_pred = self.output_activation(logits)
        
        return y_pred

# Load dataset

In [5]:
expt_name = 'GABPA'

# load data
data_path = '../data' 
filepath = os.path.join(data_path, expt_name+'_200.h5')
data_module = evoaug.utils.H5DataModule(filepath, batch_size=100, lower_case=True)


# Train model with EvoAug augmentations

In [None]:
output_dir = '../results'
utils.make_directory(output_dir)

trial = 0

In [6]:
cnn_model = CNN(1).to(device)
loss = torch.nn.BCELoss()
optimizer_dict = utils.configure_optimizer(cnn_model, lr=0.001, weight_decay=1e-6, decay_factor=0.1, patience=5, monitor='val_loss')

augment_list = [
    augment.RandomDeletion(delete_min=0, delete_max=20),
    augment.RandomRC(rc_prob=0.5),
    augment.RandomInsertion(insert_min=0, insert_max=20),
    augment.RandomTranslocation(shift_min=0, shift_max=20),
    augment.RandomNoise(noise_mean=0, noise_std=0.2),
]
robust_cnn = RobustModel(cnn_model, 
                         criterion=loss, 
                         optimizer=optimizer_dict, 
                         augment_list=augment_list, 
                         max_augs_per_seq=2, 
                         hard_aug=True, 
                         finetune=False,
                         inference_aug=False)

# create pytorch lightning trainer
ckpt_aug_path = expt_name+"_aug_"+str(trial)
callback_topmodel = pl.callbacks.ModelCheckpoint(monitor='val_loss', 
                                                 save_top_k=1, 
                                                 dirpath=output_dir, 
                                                 filename=ckpt_aug_path)
callback_es = pl.callbacks.early_stopping.EarlyStopping(monitor='val_loss', patience=10)
trainer = pl.Trainer(gpus=1, max_epochs=100, auto_select_gpus=True, logger=None, 
                    callbacks=[callback_es, callback_topmodel])

# fit model
trainer.fit(robust_cnn, datamodule=data_module)

  "pytorch_lightning.core.lightning.LightningModule has been deprecated in v1.7"
  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
INFO:pytorch_lightning.trainer.connectors.accelerator_connector:Auto select gpus: [0]
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "A layer with UninitializedParameter was found. "
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type    | Params
--------------------------------------
0 | model     | CNN     | 95.0 K
1 | criterion | BCELoss | 0     
-------------------------------

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [7]:
# load checkpoint for model with best validation performance
robust_cnn = robust_model.load_model_from_checkpoint(robust_cnn, ckpt_aug_path+'.ckpt')

# evaluate best model
pred = utils.get_predictions(robust_cnn, data_module.x_test, batch_size=100)
results = utils.evaluate_model(data_module.y_test, pred, task='binary')   # task is 'binary' or 'regression'

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Test AUROC: 0.8466
Test AUPR : 0.8537


# Fine-tune model (without augmentations)

In [8]:
# Load best EvoAug model from checkpoint
robust_cnn.finetune = True
robust_cnn.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, cnn_model.parameters()),
                                 lr=0.0001, weight_decay=1e-6)

# setup trainer fro fine-tuning
ckpt_finetune_path = filename=expt_name+"_finetune_"+str(trial)
callback_topmodel = pl.callbacks.ModelCheckpoint(monitor='val_loss', 
                                                 save_top_k=1, 
                                                 dirpath=output_dir, 
                                                 filename=ckpt_finetune_path)
trainer = pl.Trainer(gpus=1, max_epochs=5, auto_select_gpus=True, logger=None, 
                    callbacks=[callback_topmodel])


# Fine tune model
trainer.fit(robust_cnn, datamodule=data_module)

  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
INFO:pytorch_lightning.trainer.connectors.accelerator_connector:Auto select gpus: [0]
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type    | Params
--------------------------------------
0 | model     | CNN     | 292 K 
1 | criterion | BCELoss | 0     
--------------------------------------
292 K     Trainable params
0         Non-trainable params
292 K     Total params
1.169     Total estimated model params size 

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


In [9]:
# load checkpoint for model with best validation performance
robust_cnn = robust_model.load_model_from_checkpoint(robust_cnn, ckpt_finetune_path+'.ckpt')

# evaluate best model
pred = utils.get_predictions(robust_cnn, data_module.x_test, batch_size=100)
results = utils.evaluate_model(data_module.y_test, pred, task='binary') # task is 'binary' or 'regression'

  "pytorch_lightning.core.lightning.LightningModule has been deprecated in v1.7"
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Test AUROC: 0.8472
Test AUPR : 0.8551
