In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import pytorch_lightning as pl

import evoaug
from evoaug import utils, augment, robust_model
from evoaug.robust_model import RobustModel

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:

class DeepSTARR(nn.Module):
    """DeepSTARR model from de Almeida et al., 2022; 
        see <https://www.nature.com/articles/s41588-022-01048-5>
    """
    def __init__(self, output_dim, d=256,
                 conv1_filters=None, learn_conv1_filters=True,
                 conv2_filters=None, learn_conv2_filters=True,
                 conv3_filters=None, learn_conv3_filters=True,
                 conv4_filters=None, learn_conv4_filters=True):
        super().__init__()
        
        if d != 256:
            print("NB: number of first-layer convolutional filters in original DeepSTARR model is 256; current number of first-layer convolutional filters is not set to 256")
        
        self.activation = nn.ReLU()
        self.dropout4 = nn.Dropout(0.4)
        self.flatten = nn.Flatten()
        
        self.init_conv1_filters = conv1_filters
        self.init_conv2_filters = conv2_filters
        self.init_conv3_filters = conv3_filters
        self.init_conv4_filters = conv4_filters
        
        assert (not (conv1_filters is None and not learn_conv1_filters)), "initial conv1_filters cannot be set to None while learn_conv1_filters is set to False"
        assert (not (conv2_filters is None and not learn_conv2_filters)), "initial conv2_filters cannot be set to None while learn_conv2_filters is set to False"
        assert (not (conv3_filters is None and not learn_conv3_filters)), "initial conv3_filters cannot be set to None while learn_conv3_filters is set to False"
        assert (not (conv4_filters is None and not learn_conv4_filters)), "initial conv4_filters cannot be set to None while learn_conv4_filters is set to False"
        
        # Layer 1 (convolutional), constituent parts
        if conv1_filters is not None:
            if learn_conv1_filters: # continue modifying existing conv1_filters through learning
                self.conv1_filters = nn.Parameter( torch.Tensor(conv1_filters) )
            else:
                self.register_buffer("conv1_filters", torch.Tensor(conv1_filters))
        else:
            self.conv1_filters = nn.Parameter(torch.zeros(d, 4, 7))
            nn.init.kaiming_normal_(self.conv1_filters)
        self.batchnorm1 = nn.BatchNorm1d(d)
        self.activation1 = nn.ReLU() # name the first-layer activation function for hook purposes
        self.maxpool1 = nn.MaxPool1d(2)
        
        # Layer 2 (convolutional), constituent parts
        if conv2_filters is not None:
            if learn_conv2_filters: # continue modifying existing conv2_filters through learning
                self.conv2_filters = nn.Parameter( torch.Tensor(conv2_filters) )
            else:
                self.register_buffer("conv2_filters", torch.Tensor(conv2_filters))
        else:
            self.conv2_filters = nn.Parameter(torch.zeros(60, d, 3))
            nn.init.kaiming_normal_(self.conv2_filters)
        self.batchnorm2 = nn.BatchNorm1d(60)
        self.maxpool2 = nn.MaxPool1d(2)
        
        # Layer 3 (convolutional), constituent parts
        if conv3_filters is not None:
            if learn_conv3_filters: # continue modifying existing conv3_filters through learning
                self.conv3_filters = nn.Parameter( torch.Tensor(conv3_filters) )
            else:
                self.register_buffer("conv3_filters", torch.Tensor(conv3_filters))
        else:
            self.conv3_filters = nn.Parameter(torch.zeros(60, 60, 5))
            nn.init.kaiming_normal_(self.conv3_filters)
        self.batchnorm3 = nn.BatchNorm1d(60)
        self.maxpool3 = nn.MaxPool1d(2)
        
        # Layer 4 (convolutional), constituent parts
        if conv4_filters is not None:
            if learn_conv4_filters: # continue modifying existing conv4_filters through learning
                self.conv4_filters = nn.Parameter( torch.Tensor(conv4_filters) )
            else:
                self.register_buffer("conv4_filters", torch.Tensor(conv4_filters))
        else:
            self.conv4_filters = nn.Parameter(torch.zeros(120, 60, 3))
            nn.init.kaiming_normal_(self.conv4_filters)
        self.batchnorm4 = nn.BatchNorm1d(120)
        self.maxpool4 = nn.MaxPool1d(2)
        
        # Layer 5 (fully connected), constituent parts
        self.fc5 = nn.LazyLinear(256, bias=True)
        self.batchnorm5 = nn.BatchNorm1d(256)
        
        # Layer 6 (fully connected), constituent parts
        self.fc6 = nn.Linear(256, 256, bias=True)
        self.batchnorm6 = nn.BatchNorm1d(256)
        
        # Output layer (fully connected), constituent parts
        self.fc7 = nn.Linear(256, output_dim)
        
    def get_which_conv_layers_transferred(self):
        layers = []
        if self.init_conv1_filters is not None:
            layers.append(1)
        if self.init_conv2_filters is not None:
            layers.append(2)
        if self.init_conv3_filters is not None:
            layers.append(3)
        if self.init_conv4_filters is not None:
            layers.append(4)
        return layers
    
    def forward(self, x):
        # Layer 1
        cnn = torch.conv1d(x, self.conv1_filters, stride=1, padding="same")
        cnn = self.batchnorm1(cnn)
        cnn = self.activation1(cnn)
        cnn = self.maxpool1(cnn)
        
        # Layer 2
        cnn = torch.conv1d(cnn, self.conv2_filters, stride=1, padding="same")
        cnn = self.batchnorm2(cnn)
        cnn = self.activation(cnn)
        cnn = self.maxpool2(cnn)
        
        # Layer 3
        cnn = torch.conv1d(cnn, self.conv3_filters, stride=1, padding="same")
        cnn = self.batchnorm3(cnn)
        cnn = self.activation(cnn)
        cnn = self.maxpool3(cnn)
        
        # Layer 4
        cnn = torch.conv1d(cnn, self.conv4_filters, stride=1, padding="same")
        cnn = self.batchnorm4(cnn)
        cnn = self.activation(cnn)
        cnn = self.maxpool4(cnn)
        
        # Layer 5
        cnn = self.flatten(cnn)
        cnn = self.fc5(cnn)
        cnn = self.batchnorm5(cnn)
        cnn = self.activation(cnn)
        cnn = self.dropout4(cnn)
        
        # Layer 6
        cnn = self.fc6(cnn)
        cnn = self.batchnorm6(cnn)
        cnn = self.activation(cnn)
        cnn = self.dropout4(cnn)
        
        # Output layer
        y_pred = self.fc7(cnn) 
        
        return y_pred

# Load dataset

In [3]:
# load data
expt_name = 'DeepSTARR'
data_path = '../data'
filepath = os.path.join(data_path, expt_name + '_data.h5')
data_module = evoaug.utils.H5DataModule(filepath, batch_size=100, lower_case=False)

# Train model with EvoAug augmentations

In [None]:
output_dir = '../results'
utils.make_directory(output_dir)

trial = 0

In [4]:
deepstarr = DeepSTARR(data_module.y_train.shape[-1]).to(device)
loss = torch.nn.MSELoss()
optimizer_dict = utils.configure_optimizer(deepstarr, 
                                           lr=0.001, 
                                           weight_decay=1e-6, 
                                           decay_factor=0.1, 
                                           patience=5, 
                                           monitor='val_loss')

augment_list = [
    augment.RandomDeletion(delete_min=0, delete_max=30),
#     augment.RandomRC(rc_prob=0.5),
    augment.RandomInsertion(insert_min=0, insert_max=30),
    augment.RandomTranslocation(shift_min=0, shift_max=30),
#     augment.RandomNoise(noise_mean=0, noise_std=0.3),
]
robust_deepstarr = RobustModel(deepstarr,
                               criterion=loss,
                               optimizer=optimizer_dict, 
                               augment_list=augment_list,
                               max_augs_per_seq=2, 
                               hard_aug=True, 
                               inference_aug=False)

# create pytorch lightning trainer
ckpt_aug_path = expt_name + "_aug_" + str(trial)
callback_topmodel = pl.callbacks.ModelCheckpoint(monitor='val_loss', 
                                                 save_top_k=1, 
                                                 dirpath=output_dir, 
                                                 filename=ckpt_aug_path)
callback_es = pl.callbacks.early_stopping.EarlyStopping(monitor='val_loss', patience=10)
trainer = pl.Trainer(gpus=1, max_epochs=100, auto_select_gpus=True, logger=None, 
                    callbacks=[callback_es, callback_topmodel])

# fit model
trainer.fit(robust_deepstarr, datamodule=data_module)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]

  | Name      | Type      | Params
----------------------------------------
0 | model     | DeepSTARR | 161 K 
1 | criterion | MSELoss   | 0     
----------------------------------------
161 K     Trainable params
0         Non-trainable params
161 K     Total params
0.645     Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [7]:
# load checkpoint for model with best validation performance
robust_deepstarr = robust_model.load_model_from_checkpoint(robust_deepstarr, ckpt_aug_path+'.ckpt')

# evaluate best model
pred = utils.get_predictions(robust_deepstarr, data_module.x_test, batch_size=100)
results = utils.evaluate_model(data_module.y_test, pred, task='regression')   # task is 'binary' or 'regression'

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]
  rank_zero_warn(


Predicting: 0it [00:00, ?it/s]

TypeError: only size-1 arrays can be converted to Python scalars

# Fine-tune model (without augmentations)

In [8]:
# Load best EvoAug model from checkpoint
robust_deepstarr.finetune = True
robust_deepstarr.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, robust_deepstarr.model.parameters()),
                                           lr=0.0001, weight_decay=1e-6)

# set up trainer for fine-tuning
ckpt_finetune_path = expt_name + "_finetune_" + str(trial)
callback_topmodel = pl.callbacks.ModelCheckpoint(monitor='val_loss', 
                                                 save_top_k=1, 
                                                 dirpath=output_dir, 
                                                 filename=ckpt_finetune_path)
trainer = pl.Trainer(gpus=1, max_epochs=5, auto_select_gpus=True, logger=None, 
                    callbacks=[callback_topmodel])

# Fine-tune model
trainer.fit(robust_deepstarr, datamodule=data_module)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]

  | Name      | Type      | Params
----------------------------------------
0 | model     | DeepSTARR | 683 K 
1 | criterion | MSELoss   | 0     
----------------------------------------
683 K     Trainable params
0         Non-trainable params
683 K     Total params
2.735     Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [10]:
# load checkpoint for model with best validation performance
robust_deepstarr = robust_model.load_model_from_checkpoint(robust_deepstarr, ckpt_finetune_path+'.ckpt')

# evaluate best model
pred = utils.get_predictions(robust_deepstarr, data_module.x_test, batch_size=100)
results = utils.evaluate_model(data_module.y_test, pred, task='regression') # task is 'binary' or 'regression'

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4]
  rank_zero_warn(


Predicting: 0it [00:00, ?it/s]

TypeError: only size-1 arrays can be converted to Python scalars