In [9]:
!pip install polars
!pip install lightning

!pip install --extra-index-url https://pypi.nvidia.com --upgrade nvidia-dali-cuda120
!pip install --extra-index-url https://pypi.nvidia.com --upgrade nvidia-dali-tf-plugin-cuda120



In [10]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [11]:
import random
import os
import glob

import matplotlib.pyplot as plt
import gc

import numpy as np
import pandas as pd
import polars as pl

import math
import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.optim as optim
from torchmetrics.classification import BinaryAveragePrecision
import lightning as L
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint, LearningRateMonitor,TQDMProgressBar

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import average_precision_score as APS

import tensorflow as tf


In [4]:
%mkdir /content/drive/MyDrive/BELKA_model/work

In [12]:
class CFG:
    DEBUG = True


    N_ROWS = None
    EPOCHS = 15
    BATCH_SIZE = 4096
    NUM_TRAINS = 91_854_569
    NUM_VALIDS = 6_561_041
    STEPS_PER_EPOCH_TRAIN = (NUM_TRAINS -1) //BATCH_SIZE +1
    STEPS_PER_EPOCH_VALID = (NUM_VALIDS -1) //BATCH_SIZE +1

    NBR_FOLDS = 15

    NUM_CLASSES = 3
    SEQ_LENGTH = 142

    if DEBUG:
        N_ROWS = 10_000_000
        EPOCHS = 6
        NBR_FOLDS = 2

    SELECTED_FOLDS = [0]

    PREPROCESS = False

    SAVE_EVERY = 3

    DATA_SOURCE = '/content/drive/MyDrive/BELKA_model'
    TRAINS = glob.glob(os.path.join(DATA_SOURCE, 'train/*')).sort()
    TRAIN_IDXS = glob.glob(os.path.join(DATA_SOURCE, 'tf_idx', 'train_*.idx')).sort()
    VALIDS = glob.glob(os.path.join(DATA_SOURCE, 'valid/*')).sort()
    VALID_IDXS = glob.glob(os.path.join(DATA_SOURCE, 'tf_idx', 'valid_*.idx')).sort()

    SEED = 2024
    WORK_DIR = '/content/drive/MyDrive/BELKA_model/work'

    FEATURES = [f'enc{i}' for i in range(142)]
    TARGETS = ['bind1', 'bind2', 'bind3']
    COLUMNS = FEATURES + TARGETS


    MODEL_PARAMS = {
        'enc_dict_size': 37,
        'channels': 128
        ,
        'rese_layer_size': [3,3,3

                           ] if DEBUG else [3,3,3]
    }


In [13]:
def set_seeds(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
set_seeds(seed= CFG.SEED)


In [14]:
from nvidia.dali import pipeline_def, Pipeline
import nvidia.dali.fn as fn
import nvidia.dali.types as types
import nvidia.dali.tfrecord as tfrec
from nvidia.dali.plugin.pytorch import DALIGenericIterator

@pipeline_def
def belka_pipeline(num_gpus, device, paths, idxs, is_train=True):
    device_id = Pipeline.current().device_id

    inputs = fn.readers.tfrecord(
        path = paths,
        index_path = idxs,
        features={
            "x": tfrec.FixedLenFeature([CFG.SEQ_LENGTH], tfrec.int64, 0),
            "y": tfrec.FixedLenFeature([CFG.NUM_CLASSES], tfrec.float32, .0)
        },
        random_shuffle=is_train,
        num_shards=num_gpus,
        shard_id=device_id,
        initial_fill=CFG.BATCH_SIZE,
        seed=CFG.SEED
    )
    x = inputs['x']
    y = inputs['y']
    if device=='gpu':
        x.gpu()
        y.gpu()
    return x,y

In [16]:
train_loader = BelkaDataLoader(CFG.TRAINS, CFG.NUM_TRAINS, CFG.STEPS_PER_EPOCH_TRAIN)
valid_loader = BelkaDataLoader(CFG.VALIDS, CFG.NUM_VALIDS, CFG.STEPS_PER_EPOCH_VALID)

In [17]:
class MyModel(L.LightningModule):
    def __init__(self, input_dim=142, input_dim_embedding=37, hidden_dim=128, num_filters=32, output_dim=3, lr=1e-3, weight_decay=1e-6):
        super(MyModel, self).__init__()
        self.save_hyperparameters()

        self.embedding = nn.Embedding(num_embeddings=self.hparams.input_dim_embedding, embedding_dim=self.hparams.hidden_dim, padding_idx=0)
        self.conv1 = nn.Conv1d(in_channels=self.hparams.hidden_dim, out_channels=self.hparams.num_filters, kernel_size=3, stride=1, padding=0)
        self.conv2 = nn.Conv1d(in_channels=self.hparams.num_filters, out_channels=self.hparams.num_filters*2, kernel_size=3, stride=1, padding=0)
        self.conv3 = nn.Conv1d(in_channels=self.hparams.num_filters*2, out_channels=self.hparams.num_filters*3, kernel_size=3, stride=1, padding=0)
        self.global_max_pool = nn.AdaptiveMaxPool1d(1)
        self.fc1 = nn.Linear(self.hparams.num_filters*3, 1024)
        self.dropout = nn.Dropout(0.1)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.output = nn.Linear(512, self.hparams.output_dim)

    def forward(self, x):
        x = self.embedding(x).permute(0,2,1)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = self.global_max_pool(x).squeeze(2)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.output(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        x, y = torch.from_numpy(x.copy()).cuda(), torch.from_numpy(y.copy()).cuda()
        logits = self(x)
        loss = F.binary_cross_entropy_with_logits(logits, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        x, y = torch.from_numpy(x.copy()).cuda(), torch.from_numpy(y.copy()).cuda()

        logits = self(x)
        loss = F.binary_cross_entropy_with_logits(logits, y)
        self.log('val_loss', loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.weight_decay)
        return optimizer
demo = MyModel()

In [18]:
early_stop_callback = EarlyStopping(
    monitor= 'val_loss',
    mode= 'min',
    patience= 3,
    verbose= True
)

checkpoint_callback = ModelCheckpoint(
    dirpath= f'{CFG.WORK_DIR}/models/',
    filename= f'model-{{val_loss}}',
    monitor= 'val_loss',
    save_top_k= 1,
    verbose= True,
)

progress_bar_callback = TQDMProgressBar(refresh_rate=1)

lr_monitor = LearningRateMonitor(logging_interval='epoch')

callbacks = [
    early_stop_callback,
    checkpoint_callback,
    progress_bar_callback,
    lr_monitor,
]

In [19]:
fold = 0

trainer = L.Trainer(
        max_epochs= CFG.EPOCHS,
        callbacks= callbacks,
        accelerator= 'auto',
        enable_progress_bar= True,
        devices= 'auto'
    )


INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [20]:
print('training begin')
trainer.fit(demo, train_dataloaders= train_loader, val_dataloaders= valid_loader)
print('finished')



training begin


INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name            | Type              | Params | Mode 
--------------------------------------------------------------
0 | embedding       | Embedding         | 4.7 K  | train
1 | conv1           | Conv1d            | 12.3 K | train
2 | conv2           | Conv1d            | 6.2 K  | train
3 | conv3           | Conv1d            | 18.5 K | train
4 | global_max_pool | AdaptiveMaxPool1d | 0      | train
5 | fc1             | Linear            | 99.3 K | train
6 | dropout         | Dropout           | 0      | train
7 | fc2             | Linear            | 1.0 M  | train
8 | fc3             | Linear            | 524 K  | train
9 | output          | Linear            | 1.5 K  | train
--------------------------------------------------------------
1.7 M     Trainable params
0         Non-trainable params
1.7 M     Total params
6.868     Total estimated mo

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

MisconfigurationException: We could not infer the batch_size from the batch. Either simplify its structure or provide the batch_size as `self.log(..., batch_size=batch_size)`.

In [21]:

class SELayer(nn.Module):
    def __init__(self,channel, reduction=16):
        super().__init__()

        self.avg_pooling = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid(),
        )

    def forward(self, x):
        batch, channel, _ = x.shape

        y = self.avg_pooling(x).view(batch, channel)
        y = self.fc(y).view(batch,channel, 1)
        y = x * y.expand_as(x)

        return y



def make_conv1(in_channels, out_channels):
    conv = nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
                          kernel_size=1, stride=1,padding='same',bias=True)
    nn.init.kaiming_normal_(conv.weight)
    return conv

def make_conv3(in_channels, out_channels, stride=1):
    conv3 = nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
                          kernel_size=3, stride=1, padding='same', bias=True)
    nn.init.kaiming_normal_(conv3.weight)
    return conv3



class BottleneckReSELayer(nn.Module):

    def __init__(self,input_size, output_size):
        super().__init__()

        self.is_io_same = input_size == output_size

        hidden_size = output_size // 4

        conv1_1 = make_conv1(input_size, hidden_size)
        conv3 = make_conv3(hidden_size,hidden_size)
        conv1_2 = make_conv1(hidden_size, output_size)
        conv1_3 = make_conv1(input_size, output_size)

        self.fc1 = nn.Sequential(
            conv1_1,
            nn.BatchNorm1d(num_features=hidden_size),
            nn.ReLU(inplace=True),
            conv3,
            nn.BatchNorm1d(num_features=hidden_size),
            nn.ReLU(inplace=True),
            conv1_2,
            nn.BatchNorm1d(num_features=output_size),
            SELayer(channel=output_size)
        )

        self.fc2 = nn.Sequential(
            conv1_3,
            nn.BatchNorm1d(num_features=output_size)
        )

    def forward(self, x):
        identity = x
        x = self.fc1(x)
        if not self.is_io_same:
            identity = self.fc2(identity)

        out = x + identity
        out = F.relu(out)

        return out


class ReSEModel(nn.Module):
    def __init__(self, enc_dict_size:int, channels:int, rese_layer_size:list, num_class=3):
        super().__init__()

        self.embedding = nn.Embedding(
            num_embeddings=enc_dict_size,
            embedding_dim=channels,
            padding_idx=0,
        )
        self.btl_rese_layers, out_dim = self._make_rese_layers(channels, rese_layer_size)
        # self.btl_rese_layers = BottleneckReSELayer(channels, channels)
        out_dim = channels
        self.global_max_pool = nn.AdaptiveMaxPool1d(1)
        self.mlp_head = nn.Sequential(
            nn.Linear(out_dim, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),
            nn.Linear(1024, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),
            nn.Linear(1024,512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),
            nn.Linear(512, num_class)
        )

    def forward(self, x):
        x = self.embedding(x).permute(0,2,1)
        x = self.btl_rese_layers(x)
        x = self.global_max_pool(x).squeeze(2)
        x = self.mlp_head(x)

        return x


    def _make_rese_layers(self, channels:int, layer_size:list):
        btl_rese_layers = []
        dim = channels
        for i, num_layers in enumerate(layer_size, 1):
            btl_rese_layers.append(BottleneckReSELayer(dim, channels*i))
            for j in range(num_layers-1):
                btl_rese_layers.append(BottleneckReSELayer(channels*i, channels*i))
            dim = channels * i
        btl_rese_layers = nn.Sequential(*btl_rese_layers)
        return btl_rese_layers, dim

In [23]:
model =ReSEModel(37, 64, [1])
model

ReSEModel(
  (embedding): Embedding(37, 64, padding_idx=0)
  (btl_rese_layers): Sequential(
    (0): BottleneckReSELayer(
      (fc1): Sequential(
        (0): Conv1d(64, 16, kernel_size=(1,), stride=(1,), padding=same)
        (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv1d(16, 16, kernel_size=(3,), stride=(1,), padding=same)
        (4): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU(inplace=True)
        (6): Conv1d(16, 64, kernel_size=(1,), stride=(1,), padding=same)
        (7): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (8): SELayer(
          (avg_pooling): AdaptiveAvgPool1d(output_size=1)
          (fc): Sequential(
            (0): Linear(in_features=64, out_features=4, bias=False)
            (1): ReLU(inplace=True)
            (2): Linear(in_features=4, out_features=64, bias=False)
           

In [None]:
import torch
import torch.nn.functional as F
from torch.optim import Adam

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
optimizer = Adam(params=model.parameters(), lr=0.0001)
model.to(device)
for epoch in range(3):
    model.train()
    for X,y in train_loader:
        X, y = torch.from_numpy(X.copy()).to(device), torch.from_numpy(y.copy()).to(device)
        optimizer.zero_grad()
        logits = model(X)
        loss = F.binary_cross_entropy_with_logits(logits, y)
        print('train_loss', loss)
        loss.backward()
        optimizer.step()
        del loss
        gc.collect()

print('finished!')

cuda:0
train_loss tensor(0.7765, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.6679, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.5768, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.4986, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.4293, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.3662, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.3086, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.2560, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.2087, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.1674, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tra

Exception ignored in: <function _xla_gc_callback at 0x795bf6e8bf40>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/jax/_src/lib/__init__.py", line 98, in _xla_gc_callback
    def _xla_gc_callback(*args):
KeyboardInterrupt: 


train_loss tensor(0.0321, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0292, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0260, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0291, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0268, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0266, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0237, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0266, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0193, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0246, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss

Exception ignored in: <function _xla_gc_callback at 0x795bf6e8bf40>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/jax/_src/lib/__init__.py", line 98, in _xla_gc_callback
    def _xla_gc_callback(*args):
KeyboardInterrupt: 


train_loss tensor(0.0261, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0366, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0398, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0450, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0409, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0406, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0549, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0611, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0695, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss tensor(0.0677, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
train_loss

In [None]:
!nvidia-smi
