# Imports

In [None]:
%%capture
%pip install catboost optuna

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import sklearn
import os
import random
import optuna

from tqdm.auto import tqdm
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.model_selection import train_test_split
from collections import defaultdict, OrderedDict
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier, CatBoostRegressor, Pool

In [None]:
###imporing utils from file
from time_frequency_domain_features import *

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


## seed

In [None]:
def seed_everything(seed):

    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

# Loading Data

In [None]:
%%capture
!gdown 1Rt0I7Svrx77tFMCsNubEQ-cDY8hD-iCk
!gdown 1GWyzUaz_mOwYDbLuopjroIcfngjSJWkD
!unzip r_peaks.zip

In [None]:
"""
 'NORM' : 0,
 'IMI': 1,
 'NDT': 2,
 'NST_': 3,
 'LVH': 4,
 'LAFB': 5,
 'IRBBB': 6,
 'IVCD': 7,
 'ASMI': 8,
 'AMI': 9,
 'ISCAL': 10,
 '1AVB': 11,
 'ILMI': 12,
 'ISC_': 13,
 'CRBBB': 14,
 'CLBBB': 15,
 'LAO/LAE': 16}
 """

"\n 'NORM' : 0,\n 'IMI': 1,\n 'NDT': 2,\n 'NST_': 3,\n 'LVH': 4,\n 'LAFB': 5,\n 'IRBBB': 6,\n 'IVCD': 7,\n 'ASMI': 8,\n 'AMI': 9,\n 'ISCAL': 10,\n '1AVB': 11,\n 'ILMI': 12,\n 'ISC_': 13,\n 'CRBBB': 14,\n 'CLBBB': 15,\n 'LAO/LAE': 16}\n "

# ECGNET

## model

In [None]:
class ECGNet(nn.Module):
  def __init__(self, embedding_size=264, dropout=False, num_layers=2):
    super(ECGNet, self).__init__()


    self.num_layers = num_layers
    self.dropout = dropout

    #layer1
    self.layer1_conv2d = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(1, 25), stride=(1, 2), bias=True)


    #layer2
    self.layer2_conv2d = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm2d(num_features=32)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv2d(32, 64, kernel_size=(1, 15), stride=(1, 1), bias=True)),
        ("bn2", nn.BatchNorm2d(num_features=64)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv2d(64, 64, kernel_size=(1, 15), stride=(1, 2),  bias=True)),
        ("bn3", nn.BatchNorm2d(num_features=64)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv2d(64, 32, kernel_size=(1, 15), stride=(1, 1), bias=True)),
    ]))
    self.layer2_seModule = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv2d(32, 16, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv2d(16, 32, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    #layer3
    self.layer3_conv2d_block1 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm2d(num_features=32)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv2d(32, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=True)),
        ("bn2", nn.BatchNorm2d(num_features=64)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=True)),
        ("bn3", nn.BatchNorm2d(num_features=64)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv2d(64, 32, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=True)),
    ]))
    self.layer3_seModule_block1 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv2d(32, 16, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv2d(16, 32, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer3_conv2d_block2 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm2d(num_features=32)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv2d(32, 64, kernel_size=(5, 1), padding=(2, 0), bias=True)),
        ("bn2", nn.BatchNorm2d(num_features=64)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv2d(64, 64, kernel_size=(5, 1), padding=(2, 0), bias=True)),
        ("bn3", nn.BatchNorm2d(num_features=64)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv2d(64, 32, kernel_size=(5, 1), padding=(2, 0), bias=True)),
    ]))
    self.layer3_seModule_block2 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv2d(32, 16, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv2d(16, 32, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer3_conv2d_block3 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm2d(num_features=32)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv2d(32, 64, kernel_size=(7, 1), padding=(3, 0), bias=True)),
        ("bn2", nn.BatchNorm2d(num_features=64)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv2d(64, 64, kernel_size=(7, 1), padding=(3, 0), bias=True)),
        ("bn3", nn.BatchNorm2d(num_features=64)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv2d(64, 32, kernel_size=(7, 1), padding=(3, 0), bias=True)),
    ]))
    self.layer3_seModule_block3 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv2d(32, 16, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv2d(16, 32, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    #layer4
    self.layer4_conv1d_short_block1 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 384, kernel_size=3, stride=9, bias=True)),
    ]))

    self.layer4_conv1d_block1 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 768, kernel_size=3, stride=2, bias=True)),
        ("bn2", nn.BatchNorm1d(num_features=768)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv1d(768, 768, kernel_size=3, stride=1, bias=True)),
        ("bn3", nn.BatchNorm1d(num_features=768)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv1d(768, 1536, kernel_size=3, stride=2, bias=True)),
        ("bn4", nn.BatchNorm1d(num_features=1536)),
        ("act4", nn.ReLU()),
        ("cn4", nn.Conv1d(1536, 384, kernel_size=3, stride=2, bias=True)),
    ]))
    self.layer4_seModule_block1 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv1d(384, 48, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv1d(48, 384, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer4_conv1d_short_block2 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 384, kernel_size=5, stride=9, bias=True)),
    ]))

    self.layer4_conv1d_block2 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 768, kernel_size=5, stride=2, padding=2, bias=True)),
        ("bn2", nn.BatchNorm1d(num_features=768)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv1d(768, 768, kernel_size=5, stride=2, padding=1, bias=True)),
        ("bn3", nn.BatchNorm1d(num_features=768)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv1d(768, 1536, kernel_size=5, stride=1, padding=2, bias=True)),
        ("bn4", nn.BatchNorm1d(num_features=1536)),
        ("act4", nn.ReLU()),
        ("cn4", nn.Conv1d(1536, 384, kernel_size=5, stride=2, padding=1, bias=True)),
    ]))
    self.layer4_seModule_block2 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv1d(384, 48, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv1d(48, 384, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer4_conv1d_short_block3 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 384, kernel_size=7, stride=9, bias=True)),
    ]))

    self.layer4_conv1d_block3 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 768, kernel_size=7, stride=2, padding=2, bias=True)),
        ("bn2", nn.BatchNorm1d(num_features=768)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv1d(768, 768, kernel_size=7, stride=2, padding=1, bias=True)),
        ("bn3", nn.BatchNorm1d(num_features=768)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv1d(768, 1536, kernel_size=7, stride=1, padding=3, bias=True)),
        ("bn4", nn.BatchNorm1d(num_features=1536)),
        ("act4", nn.ReLU()),
        ("cn4", nn.Conv1d(1536, 384, kernel_size=7, stride=2, padding=2, bias=True)),
    ]))
    self.layer4_seModule_block3 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv1d(384, 48, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv1d(48, 384, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer5_avg_pool1 = nn.AvgPool1d(kernel_size=10)
    self.layer5_avg_pool2 = nn.AvgPool1d(kernel_size=10)
    self.layer5_avg_pool3 = nn.AvgPool1d(kernel_size=10)

    cur_hidden_dim = 1152
    fc_layres = []
    for i in range(num_layers - 1):
      fc_layres.append((f"ln{i+1}", nn.Linear(cur_hidden_dim, embedding_size)))
      cur_hidden_dim = embedding_size
      fc_layres.append((f"act{i+1}", nn.ReLU()))
      if dropout and i % 2 == 0:
        fc_layres.append((f"dp{i // 2}", nn.Dropout(p=dropout)))

    fc_layres.append((f"ln{num_layers}", nn.Linear(cur_hidden_dim, 1)))
    fc_layres.append((f"sigmoid", nn.Sigmoid()))

    self.fc = nn.Sequential(OrderedDict(fc_layres))
  def forward(self, x):
    #layer1
    x = self.layer1_conv2d(x)

    #layer2
    x = self.layer2_conv2d(x)
    u = x
    x = x.view(x.size(0), x.size(1), -1).mean(-1).view(x.size(0), x.size(1), 1, 1)
    x = self.layer2_seModule(x)
    x = u * x

    #layer3
    x1 = self.layer3_conv2d_block1(x)
    u1 = x1
    x1 = x1.view(x1.size(0), x1.size(1), -1).mean(-1).view(x1.size(0), x1.size(1), 1, 1)
    x1 = self.layer3_seModule_block1(x1)
    x1 = u1 * x1

    x2 = self.layer3_conv2d_block2(x)
    u2 = x2
    x2 = x2.view(x2.size(0), x2.size(1), -1).mean(-1).view(x2.size(0), x2.size(1), 1, 1)
    x2 = self.layer3_seModule_block2(x2)
    x2 = u2 * x2

    x3 = self.layer3_conv2d_block3(x)
    u3 = x3
    x3 = x3.view(x3.size(0), x3.size(1), -1).mean(-1).view(x3.size(0), x3.size(1), 1, 1)
    x3 = self.layer3_seModule_block3(x3)
    x3 = u3 * x3

    #layer4
    x1 = torch.flatten(x1, start_dim=1, end_dim=2)
    x2 = torch.flatten(x2, start_dim=1, end_dim=2)
    x3 = torch.flatten(x3, start_dim=1, end_dim=2)

    x1_short = self.layer4_conv1d_short_block1(x1)

    x1 = self.layer4_conv1d_block1(x1)
    u1 = x1
    x1 = x1.view(x1.size(0), x1.size(1), -1).mean(-1).view(x1.size(0), x1.size(1), 1, 1).flatten(2, 3)
    x1 = self.layer4_seModule_block1(x1)
    x1 = u1 * x1
    x1 = x1 + x1_short

    x2_short = self.layer4_conv1d_short_block2(x2)

    x2 = self.layer4_conv1d_block2(x2)
    u2 = x2
    x2 = x2.view(x2.size(0), x2.size(1), -1).mean(-1).view(x2.size(0), x2.size(1), 1, 1).flatten(2, 3)
    x2 = self.layer4_seModule_block2(x2)
    x2 = u2 * x2
    x2 = x2 + x2_short

    x3_short = self.layer4_conv1d_short_block3(x3)

    x3 = self.layer4_conv1d_block3(x3)
    u3 = x3
    x3 = x3.view(x3.size(0), x3.size(1), -1).mean(-1).view(x3.size(0), x3.size(1), 1, 1).flatten(2, 3)
    x3 = self.layer4_seModule_block3(x3)
    x3 = u3 * x3
    x3 = x3 + x3_short

    x1 = self.layer5_avg_pool1(x1)
    x2 = self.layer5_avg_pool2(x2)
    x3 = self.layer5_avg_pool3(x3)

    x = torch.cat((x1, x2, x3), dim=1).flatten(1)

    x = self.fc(x)

    return x
  def embed(self, x):
    #layer1
    x = self.layer1_conv2d(x)

    #layer2
    x = self.layer2_conv2d(x)
    u = x
    x = x.view(x.size(0), x.size(1), -1).mean(-1).view(x.size(0), x.size(1), 1, 1)
    x = self.layer2_seModule(x)
    x = u * x

    #layer3
    x1 = self.layer3_conv2d_block1(x)
    u1 = x1
    x1 = x1.view(x1.size(0), x1.size(1), -1).mean(-1).view(x1.size(0), x1.size(1), 1, 1)
    x1 = self.layer3_seModule_block1(x1)
    x1 = u1 * x1

    x2 = self.layer3_conv2d_block2(x)
    u2 = x2
    x2 = x2.view(x2.size(0), x2.size(1), -1).mean(-1).view(x2.size(0), x2.size(1), 1, 1)
    x2 = self.layer3_seModule_block2(x2)
    x2 = u2 * x2

    x3 = self.layer3_conv2d_block3(x)
    u3 = x3
    x3 = x3.view(x3.size(0), x3.size(1), -1).mean(-1).view(x3.size(0), x3.size(1), 1, 1)
    x3 = self.layer3_seModule_block3(x3)
    x3 = u3 * x3

    #layer4
    x1 = torch.flatten(x1, start_dim=1, end_dim=2)
    x2 = torch.flatten(x2, start_dim=1, end_dim=2)
    x3 = torch.flatten(x3, start_dim=1, end_dim=2)

    x1_short = self.layer4_conv1d_short_block1(x1)

    x1 = self.layer4_conv1d_block1(x1)
    u1 = x1
    x1 = x1.view(x1.size(0), x1.size(1), -1).mean(-1).view(x1.size(0), x1.size(1), 1, 1).flatten(2, 3)
    x1 = self.layer4_seModule_block1(x1)
    x1 = u1 * x1
    x1 = x1 + x1_short

    x2_short = self.layer4_conv1d_short_block2(x2)

    x2 = self.layer4_conv1d_block2(x2)
    u2 = x2
    x2 = x2.view(x2.size(0), x2.size(1), -1).mean(-1).view(x2.size(0), x2.size(1), 1, 1).flatten(2, 3)
    x2 = self.layer4_seModule_block2(x2)
    x2 = u2 * x2
    x2 = x2 + x2_short

    x3_short = self.layer4_conv1d_short_block3(x3)

    x3 = self.layer4_conv1d_block3(x3)
    u3 = x3
    x3 = x3.view(x3.size(0), x3.size(1), -1).mean(-1).view(x3.size(0), x3.size(1), 1, 1).flatten(2, 3)
    x3 = self.layer4_seModule_block3(x3)
    x3 = u3 * x3
    x3 = x3 + x3_short

    x1 = self.layer5_avg_pool1(x1)
    x2 = self.layer5_avg_pool2(x2)
    x3 = self.layer5_avg_pool3(x3)

    x = torch.cat((x1, x2, x3), dim=1).flatten(1)

    for i in range(self.num_layers - 1):
      x = getattr(self.fc, f"ln{i+1}")(x)
      x = getattr(self.fc, f"act{i+1}")(x)

    return x

## worker class

In [None]:
class ECGNetWorker():

  class MetricMonitor:
    def __init__(self, float_precision=3):
        self.float_precision = float_precision
        self.reset()

    def reset(self):
        self.metrics = defaultdict(lambda: {"val": 0, "count": 0, "avg": 0})

    def update(self, metric_name, val):
        metric = self.metrics[metric_name]

        metric["val"] += val
        metric["count"] += 1
        metric["avg"] = metric["val"] / metric["count"]

    def __str__(self):
        return " | ".join(
            [
                "{metric_name}: {avg:.{float_precision}f}".format(
                    metric_name=metric_name, avg=metric["avg"], float_precision=self.float_precision
                )
                for (metric_name, metric) in self.metrics.items()
            ]
        )

  class EcgPTBDataset(Dataset):
    def __init__(self, labels=[], path='/'):
        self.x_paths = [labels.iloc[i, 0] for i in range(len(labels))]
        self.labels = [labels.iloc[i, 1] for i in range(len(labels))]
        self.path = path

    def __len__(self):
        return len(self.x_paths)

    def __getitem__(self, idx):

        hr = torch.tensor(np.load(self.path + self.x_paths[idx] + '.npy'))[None, :, :]

        target = self.labels[idx]

        return hr, target



  def __init__(self, device="cpu"):

    self.device = torch.device(device)

    self.labels = None
    self.train_dataset = None
    self.val_dataset = None
    self.test_dataset = None

    self.model = None

    self.fitted_model = False

  def evaluate_data_for_ovr(self, labels, target_class):

    left_classes = [i for i in labels.result_class.unique() if i != target_class]
    num_others = (len(labels[labels.result_class == target_class]) * 2) // 15
    data = labels[labels.result_class == target_class]
    data.loc[:, ["result_class"]] = 1
    data.index = range(0, len(data))
    for cur_class in left_classes:
      cur_class_data = labels[(labels.result_class == cur_class)]
      cur_class_data = cur_class_data[~cur_class_data.record_name.isin(labels[labels.result_class != cur_class].record_name)]
      cur_frame = cur_class_data.sample(n=min(len(cur_class_data), num_others))
      cur_frame.loc[:, ["result_class"]] = 0
      data = pd.concat([data, cur_frame], axis=0)

    self.labels = data

    print("evaluate done")


  def create_torch_dataset(self, ecg_path=None, splits=[.8, .15, .05]):

    assert(np.sum(splits) == 1), "incorrect splits values, sum must be equal to 1"

    dataset = self.EcgPTBDataset(self.labels, ecg_path)

    if len(splits) == 3:
      self.train_dataset, self.val_dataset, self.test_dataset = random_split(dataset, lengths=splits)
    else:
      self.train_dataset, self.val_dataset = random_split(dataset, lengths=splits)

    print(f"train set len: {len(self.train_dataset)}, val set len: {len(self.val_dataset)}")

  def create_model(self, params={}):

    self.fitted_model = False

    try:
      model = ECGNet(**params)
      self.model = model.to(self.device)
      print("model succesfully build")

    except:
      raise Exception("error occured during model building")


  def load_model(self, path=False, params={}):

    model = ECGNet(**params)
    model = model.to(self.device)

    model.load_state_dict(state_dict=torch.load(path, map_location=self.device))

    self.model = model

  def train_model(self, n_epochs=10, checkpoints=None, checkpoints_strategy="max", loss="BCELoss", optimizer="Adam", lr=1e-3, scheduler="ReduceLROnPlateau", eval_metric="f1_score", num_workers=1, batch_size=64, shuffle_train=True, shuffle_val=False):

    train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=shuffle_train, num_workers=0 if self.device else num_workers, pin_memory=True if self.device else False)
    val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=shuffle_val, num_workers=0 if self.device else num_workers, pin_memory=True if self.device else False)

    loss_fn = getattr(torch.nn, loss)()
    optimizer = getattr(torch.optim, optimizer)(self.model.parameters(), lr=lr)
    scheduler = getattr(torch.optim.lr_scheduler, scheduler)(optimizer)

    n_epochs = n_epochs

    max_score = -1
    best_epoch = 1

    for epoch in range(n_epochs):
      self.train_epoch(train_loader, loss_fn, eval_metric, optimizer, epoch)
      val_score, loss_v = self.validate_epoch(val_loader, loss_fn, eval_metric, epoch)
      scheduler.step(val_score)

      if checkpoints is not None:

        stmt = False

        if checkpoints_strategy == "max":
          stmt = max_score < val_score
        elif type(checkpoints_strategy) == float:
          stmt = val_score >= checkpoints_strategy

        if stmt:
          torch.save(self.model.state_dict(), f'{checkpoints}/score:{val_score}.pth')

      if max_score < val_score:
        best_epoch = epoch
        max_score = val_score

    self.fitted_model = True
    print(f"Training done, best epoch: {best_epoch}, best score: {max_score}")

  def train_epoch(self, train_loader, loss_fn, eval_metric, optimizer, epoch):
    metric_monitor = self.MetricMonitor(float_precision=4)
    self.model.train()
    metric = getattr(sklearn.metrics, eval_metric)
    stream = tqdm(train_loader)
    for i, batch in enumerate(stream, start=1):
        x_batch, y_batch = batch
        y_batch = y_batch.to(self.device, non_blocking=True)
        x_batch = x_batch.to(self.device, non_blocking=True)
        output = self.model(x_batch.float()).view(1, -1)[0]
        loss = loss_fn(output, y_batch.float())
        output = (output > 0.5).to(torch.int32)
        score = metric(y_batch.cpu(), output.cpu())
        metric_monitor.update("Loss", loss)
        metric_monitor.update(eval_metric, score)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        stream.set_description(f"Epoch: {epoch}. Train.  {metric_monitor}")

  def validate_epoch(self, val_loader, loss_fn, eval_metric, epoch):
    metric_monitor = self.MetricMonitor(float_precision=4)
    self.model.eval()
    metric = getattr(sklearn.metrics, eval_metric)
    stream = tqdm(val_loader)
    with torch.no_grad():
        for i, batch in enumerate(stream, start=1):
            x_batch, y_batch = batch
            y_batch = y_batch.to(self.device, non_blocking=True)
            x_batch = x_batch.to(self.device, non_blocking=True)
            output = self.model(x_batch.float()).view(1, -1)[0]
            loss = loss_fn(output, y_batch.float())
            output = (output > 0.5).to(torch.int32)
            score = metric(y_batch.cpu(), output.cpu())
            metric_monitor.update("Loss", loss)
            metric_monitor.update(eval_metric, score)
            stream.set_description(f"Epoch: {epoch}. Validation. {metric_monitor}")
    return metric_monitor.metrics[eval_metric]["avg"], metric_monitor.metrics["Loss"]["avg"]

  def inference_model(self, eval_metric="f1_score"):

    targets = []
    preds = []
    self.model.eval()
    metric = getattr(sklearn.metrics, eval_metric)

    with torch.no_grad():
      for hr, target in self.test_dataset:
        targets.append(target)
        hr = hr.to(self.device).unsqueeze(0)
        output = self.model(hr.float()).view(1, -1)[0]
        output = (output > 0.5).to(torch.int32)
        preds.append(output.cpu().item())

    score = metric(np.array(targets), np.array(preds))

    print(f"len test set: {len(targets)}, score: {score}")
    return score

  def optuna_objective(self, trial):

    lr_base = trial.suggest_float("lr_base", 1e-3, 1e-2)
    optimizer = trial.suggest_categorical("optimizer", ["Adam", "Adagrad", "RMSprop"])
    num_layers = trial.suggest_int("num_layers", 2, 4)
    embedding_size = trial.suggest_int("embedding_size", 200, 400)
    # dropout = trial.suggest_categorical("dropout", [False, 0.1])

    self.create_model(params={"embedding_size" : embedding_size, "dropout" : False, "num_layers" : num_layers})

    self.train_model(n_epochs=5, checkpoints=None, optimizer=optimizer, lr=lr_base, batch_size=256)

    metric = self.inference_model()
    print(metric)

    return metric

  def tune_model(self, n_trials=None):

    study = optuna.create_study(direction="maximize")
    study.optimize(self.optuna_objective, n_trials=n_trials, show_progress_bar=True)

    return study.best_trial


## Training model

### creating worker object

In [None]:
labels = pd.read_csv("/content/train_val_labels.csv")
ecg_path = "/content/r_peaks/signals/"

worker = ECGNetWorker(device="cuda")

### loading data

In [None]:
worker.evaluate_data_for_ovr(labels, target_class=12)
worker.create_torch_dataset(ecg_path=ecg_path)

evaluate done
train set len: 8320, val set len: 1560


### tuning model

In [None]:
best_trial = worker.tune_model(n_trials=20)

[I 2024-04-08 10:09:00,446] A new study created in memory with name: no-name-e0b235f1-27a4-4ed3-9de2-39174264d328


  0%|          | 0/20 [00:00<?, ?it/s]

model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 4, best score: 0.8252040445780684
0.856368563685637
[I 2024-04-08 10:12:15,608] Trial 0 finished with value: 0.856368563685637 and parameters: {'lr_base': 0.004386179205939589, 'optimizer': 'Adam', 'num_layers': 2, 'embedding_size': 334}. Best is trial 0 with value: 0.856368563685637.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 0, best score: 0.0
0.0
[I 2024-04-08 10:14:43,153] Trial 1 finished with value: 0.0 and parameters: {'lr_base': 0.009900997308958579, 'optimizer': 'RMSprop', 'num_layers': 4, 'embedding_size': 379}. Best is trial 0 with value: 0.856368563685637.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 0, best score: 0.4871965168876233
0.49346879535558785
[I 2024-04-08 10:17:11,420] Trial 2 finished with value: 0.49346879535558785 and parameters: {'lr_base': 0.009868066934495934, 'optimizer': 'Adagrad', 'num_layers': 4, 'embedding_size': 273}. Best is trial 0 with value: 0.856368563685637.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 4, best score: 0.7428810665650679
0.742857142857143
[I 2024-04-08 10:19:47,193] Trial 3 finished with value: 0.742857142857143 and parameters: {'lr_base': 0.007739554101126395, 'optimizer': 'Adam', 'num_layers': 3, 'embedding_size': 255}. Best is trial 0 with value: 0.856368563685637.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 4, best score: 0.8601927295027398
0.8967551622418879
[I 2024-04-08 10:22:25,830] Trial 4 finished with value: 0.8967551622418879 and parameters: {'lr_base': 0.00565565213498923, 'optimizer': 'Adagrad', 'num_layers': 3, 'embedding_size': 209}. Best is trial 4 with value: 0.8967551622418879.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 0, best score: 0.0
0.0
[I 2024-04-08 10:24:53,398] Trial 5 finished with value: 0.0 and parameters: {'lr_base': 0.00646852588309481, 'optimizer': 'RMSprop', 'num_layers': 2, 'embedding_size': 330}. Best is trial 4 with value: 0.8967551622418879.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 0, best score: 0.0
0.0
[I 2024-04-08 10:27:20,921] Trial 6 finished with value: 0.0 and parameters: {'lr_base': 0.001970877172570499, 'optimizer': 'RMSprop', 'num_layers': 3, 'embedding_size': 275}. Best is trial 4 with value: 0.8967551622418879.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 0, best score: 0.4871965168876233
0.49346879535558785
[I 2024-04-08 10:29:48,850] Trial 7 finished with value: 0.49346879535558785 and parameters: {'lr_base': 0.007485271040295854, 'optimizer': 'Adagrad', 'num_layers': 3, 'embedding_size': 208}. Best is trial 4 with value: 0.8967551622418879.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 0, best score: 0.4871965168876233
0.49346879535558785
[I 2024-04-08 10:32:16,324] Trial 8 finished with value: 0.49346879535558785 and parameters: {'lr_base': 0.009567235277819854, 'optimizer': 'Adagrad', 'num_layers': 2, 'embedding_size': 313}. Best is trial 4 with value: 0.8967551622418879.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 4, best score: 0.8797967299684136
0.8895705521472392
[I 2024-04-08 10:34:54,295] Trial 9 finished with value: 0.8895705521472392 and parameters: {'lr_base': 0.005443997107040949, 'optimizer': 'Adagrad', 'num_layers': 4, 'embedding_size': 241}. Best is trial 4 with value: 0.8967551622418879.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 4, best score: 0.9185023489875166
0.9398280802292264
[I 2024-04-08 10:37:32,340] Trial 10 finished with value: 0.9398280802292264 and parameters: {'lr_base': 0.0031265442735055114, 'optimizer': 'Adagrad', 'num_layers': 3, 'embedding_size': 400}. Best is trial 10 with value: 0.9398280802292264.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 4, best score: 0.9127225931129346
0.9337175792507204
[I 2024-04-08 10:40:11,160] Trial 11 finished with value: 0.9337175792507204 and parameters: {'lr_base': 0.0030251022383772745, 'optimizer': 'Adagrad', 'num_layers': 3, 'embedding_size': 397}. Best is trial 10 with value: 0.9398280802292264.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 2, best score: 0.890643736469783
0.9192200557103063
[I 2024-04-08 10:42:49,647] Trial 12 finished with value: 0.9192200557103063 and parameters: {'lr_base': 0.002039792764288547, 'optimizer': 'Adagrad', 'num_layers': 3, 'embedding_size': 400}. Best is trial 10 with value: 0.9398280802292264.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 4, best score: 0.8915696259124833
0.9436201780415431
[I 2024-04-08 10:45:28,083] Trial 13 finished with value: 0.9436201780415431 and parameters: {'lr_base': 0.003325613288738196, 'optimizer': 'Adagrad', 'num_layers': 3, 'embedding_size': 370}. Best is trial 13 with value: 0.9436201780415431.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 4, best score: 0.8655105153606123
0.9425287356321839
[I 2024-04-08 10:48:06,184] Trial 14 finished with value: 0.9425287356321839 and parameters: {'lr_base': 0.003690535038218829, 'optimizer': 'Adagrad', 'num_layers': 2, 'embedding_size': 358}. Best is trial 13 with value: 0.9436201780415431.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 3, best score: 0.8146092693514532
0.534412955465587
[I 2024-04-08 10:50:44,800] Trial 15 finished with value: 0.534412955465587 and parameters: {'lr_base': 0.003822142977207979, 'optimizer': 'Adam', 'num_layers': 2, 'embedding_size': 359}. Best is trial 13 with value: 0.9436201780415431.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 3, best score: 0.9168597052083103
0.9352941176470588
[I 2024-04-08 10:53:31,748] Trial 16 finished with value: 0.9352941176470588 and parameters: {'lr_base': 0.001239148824686492, 'optimizer': 'Adagrad', 'num_layers': 2, 'embedding_size': 359}. Best is trial 13 with value: 0.9436201780415431.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 0, best score: 0.4871965168876233
0.49346879535558785
[I 2024-04-08 10:56:02,959] Trial 17 finished with value: 0.49346879535558785 and parameters: {'lr_base': 0.004544203445079189, 'optimizer': 'Adagrad', 'num_layers': 2, 'embedding_size': 356}. Best is trial 13 with value: 0.9436201780415431.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 0, best score: 0.0
0.0
[I 2024-04-08 10:58:31,815] Trial 18 finished with value: 0.0 and parameters: {'lr_base': 0.003148325987121643, 'optimizer': 'RMSprop', 'num_layers': 4, 'embedding_size': 304}. Best is trial 13 with value: 0.9436201780415431.
model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 0, best score: 0.4871965168876233
0.49346879535558785
[I 2024-04-08 11:01:00,944] Trial 19 finished with value: 0.49346879535558785 and parameters: {'lr_base': 0.005046731348385693, 'optimizer': 'Adam', 'num_layers': 2, 'embedding_size': 335}. Best is trial 13 with value: 0.9436201780415431.


In [None]:
best_trial.params

{'lr_base': 0.003325613288738196,
 'optimizer': 'Adagrad',
 'num_layers': 3,
 'embedding_size': 370}

### training model

In [None]:
worker.create_model({"embedding_size" : 370, "dropout" : False, "num_layers" : 3})
worker.train_model(n_epochs=10, checkpoints="/content/drive/MyDrive/БВ/checkpoints", checkpoints_strategy=0.9, loss="BCELoss", lr=0.003325613288738196, optimizer="Adagrad", num_workers=4, eval_metric="f1_score", batch_size=256)

model succesfully build


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

Training done, best epoch: 8, best score: 0.9284939789227805


### loading and testing model

In [None]:
worker.load_model("/content/drive/MyDrive/checkpoints/ecgnet_irbbb/score:0.9215240916197165.pth", {"embedding_size" : 512, "dropout" : 0.15})

In [None]:
worker.inference_model()

0.9489489489489489

## Estimating confidence

In [None]:
def create_conf_interval(target, predictions, num_bootstraping):

  target = np.array(target)
  predictions = np.array(predictions)

  acc_test = np.mean(predictions == target)

  rng = np.random.RandomState(seed=12345)
  idx = np.arange(target.shape[0])
  test_accuracies = []

  for i in tqdm(range(num_bootstraping)):

      pred_idx = rng.choice(idx, size=idx.shape[0], replace=True)
      acc_test_boot = np.mean(predictions[pred_idx] == target[pred_idx])
      test_accuracies.append(acc_test_boot)

  bootstrap_train_mean = np.mean(test_accuracies)
  return bootstrap_train_mean, (np.percentile(test_accuracies, 2.5), np.percentile(test_accuracies, 97.5))

In [None]:
###ECGNet

def create_conf_interval_ecgnet(worker, metric, num_bootstraping):
  targets = []
  preds = []
  worker.model.eval()
  metric = getattr(sklearn.metrics, metric)

  with torch.no_grad():
    for hr, target in worker.test_dataset:
      targets.append(target)
      hr = hr.to(worker.device).unsqueeze(0)
      output = worker.model(hr.float()).view(1, -1)[0]
      output = (output > 0.5).to(torch.int32)
      preds.append(output.cpu().item())

  return create_conf_interval(targets, preds, num_bootstraping)

In [None]:
mean, estimated_borders = create_conf_interval_ecgnet(worker, "f1_score", 1000)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [None]:
print(f"mean score: {mean}, lower bound: {estimated_borders[0]}, upper bound: {estimated_borders[1]}")

mean score: 0.9674816955684007, lower bound: 0.9518304431599229, upper bound: 0.9826589595375722


# Creating dataset for CatBoost

## Extracting features function

In [None]:
def count_time_features(data):

  result_mean_fea = mean_fea(data)
  result_rms_fea = rms_fea(data)
  result_sr_fea = sr_fea(data)
  result_am_fea = am_fea(data)
  result_skew_fea = skew_fea(data)
  result_kurt_fea = kurt_fea(data)
  result_max_fea = max_fea(data)
  result_min_fea = min_fea(data)
  result_pp_fea = pp_fea(data)
  result_var_fea = var_fea(data)
  result_waveform_index_fea = waveform_index(data)
  result_peak_index_fea = peak_index(data)
  result_impluse_factor_fea = impluse_factor(data)

  return np.array([
      result_mean_fea,
      result_rms_fea,
      result_var_fea,
      result_waveform_index_fea,
      result_peak_index_fea,
      result_sr_fea,
      result_am_fea,
      result_skew_fea,
      result_kurt_fea,
      result_max_fea,
      result_min_fea,
      result_pp_fea,
      result_impluse_factor_fea
  ])

#count all frequency features
def count_freq_features(data):

  result_fft_mean = fft_mean(data)
  result_fft_var = fft_var(data)
  result_fft_std = fft_std(data)
  result_fft_entropy = fft_entropy(data)
  result_fft_energy = fft_energy(data)
  result_fft_skew = fft_skew(data)
  result_fft_kurt = fft_kurt(data)
  result_fft_shape_mean = fft_shape_mean(data)
  result_fft_shape_std = fft_shape_std(data)

  return np.array([
      result_fft_mean,
      result_fft_var,
      result_fft_std,
      result_fft_entropy,
      result_fft_energy,
      result_fft_skew,
      result_fft_kurt,
      result_fft_shape_mean,
      result_fft_shape_std
  ])

### extracting all features
def get_all_features(ecg_signal):


    tmp_raw = np.array(ecg_signal)
    time_features_tmp=[]
    fre_features_tmp=[]

    for j in range(0,12):

        tmp_lead = tmp_raw[j]
        result_time_features = count_time_features(tmp_lead)
        result_fre_features = count_freq_features(tmp_lead)
        time_features_tmp.append(result_time_features)
        fre_features_tmp.append(result_fre_features)


    return np.array(time_features_tmp), np.array(fre_features_tmp)

## make dataset function

**`Идеи по составлению датасета:`**



*   добавить к качестве фичи предсказание ЭКГНета
*   брать только фичи с номерами k*n из эмбеддинга(k - параметр)



In [None]:
embeds = []
def make_dataset(model=False, data=False, root_path=False, device=False, len_embeds=False, len_features=False, save_path=False, step=1):
  global embeds

  def get_signal_embeds(model, loader, device):

    model.eval()
    model = model.to(device)
    all_embeds = np.array([])
    print("getting embeds")
    for x in tqdm(loader):
      x = x.to(device)
      with torch.no_grad():
        all_embeds = np.append(all_embeds, model.embed(x.float()).cpu().flatten().numpy())

    return all_embeds

  device = torch.device(device)

  x_paths = [data.iloc[i, 0] for i in range(len(data))]
  hrs = [torch.tensor(np.load(root_path + x_path + '.npy'))[None, :, :] for x_path in x_paths]
  loader = DataLoader(hrs, batch_size=256, shuffle=False)
  embeds = get_signal_embeds(model, loader, device)
  embeds = pd.DataFrame(embeds.reshape(-1, len_embeds), columns=[f"e_{i}" for i in range(0, len_embeds)])

  labels = [data.iloc[i, 1] for i in range(len(data))]
  cb_dataset = np.zeros((1, len_features + 1))

  print(len(embeds), len(labels), len(hrs))
  print("making dataset")
  for i in tqdm(range(len(hrs))):
    t_f, f_f = get_all_features(hrs[i][0])
    features = np.append(t_f, f_f)
    cb_dataset = np.append(cb_dataset, np.append(np.array(labels[i]), features).reshape(1, -1))

  cb_dataset_reshaped = np.reshape(cb_dataset, (-1, + len_features + 1))

  cb_dataset_reshaped = pd.DataFrame(cb_dataset_reshaped, columns=["target"]+[f"f_{i}" for i in range(0, len_features)])
  cb_dataset_reshaped = cb_dataset_reshaped.drop(index=[0])

  cb_dataset_reshaped = pd.concat([cb_dataset_reshaped, embeds], axis=1)

  if save_path:
    cb_dataset_reshaped.to_csv(f"{save_path}/dataset_cbm.csv", index=False)

  return cb_dataset_reshaped

In [None]:
df = make_dataset(worker.model, worker.labels, root_path="/content/r_peaks/signals/", device="cuda", len_embeds=370, len_features=264, save_path="/content")

getting embeds


  0%|          | 0/41 [00:00<?, ?it/s]

10399 10399 10399
making dataset


  0%|          | 0/10399 [00:00<?, ?it/s]

In [None]:
df.drop([0, 10399], inplace=True)

# Training CatBoost

## worker class

In [None]:
class CatBoostModel:
  def __init__(self, seed=42, device="CPU"):

    """data params"""
    self.data = None
    self.train_data = None
    self.val_data = None
    self.test_data = None

    """global params"""
    self.model = None
    self.best_score = None
    self.best_model = None

    self.device = device
    self.seed = seed

    self.fitted_model = False
    self.loaded_model = False

    """optuna params"""
    self.best_trial = None
    self.best_optuna_model = None
    self.best_optuna_score = None


  def init_data(self, data, target_column = "target", splits=[.8, .15, .05], shuffle_train=True, shuffle_valid=False, embedding_features=[]):

    assert(np.sum(splits) == 1), "incorrect splits values, sum must be equal to 1"

    self.data = data

    X = data.drop(columns=[target_column])
    if type(target_column) == str:
      y = data.loc[:, target_column]
    else:
      y = data.iloc[:, target_column]

    y = np.array(y, dtype=np.int8)
    X = np.array(X, dtype=np.float32)

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=(splits[1] + splits[2]), random_state=self.seed, shuffle=shuffle_train)

    self.train_data = Pool(data=X_train, label=y_train.reshape(-1, 1), embedding_features=embedding_features)

    if len(splits) == 3:
      X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=(splits[2] / (splits[1] + splits[2])), random_state=self.seed, shuffle=shuffle_valid)
      self.test_data = Pool(data=X_test, label=y_test.reshape(-1, 1), embedding_features=embedding_features)

    self.val_data = Pool(data=X_val, label=y_val.reshape(-1, 1), embedding_features=embedding_features)


  def init_model(self, parameters={}, task="classification"):

    self.fitted_model = False

    if task == "classification":
      self.model = CatBoostClassifier(**parameters, random_seed=self.seed, task_type=self.device)

    else:
      self.model = CatBoostRegressor(**parameters, random_seed=self.seed, task_type=self.device)

  def load_model(self, path):

    self.init_model()
    self.model.load_model(path)
    self.fitted_model = True

  def fit_model(self, parameters={}):

    if self.train_data is None:
      raise Exception("Training data can not be None")

    if self.val_data is None:
      raise Exception("Validation data can not be None")

    self.model.fit(X=self.train_data, eval_set=self.val_data, **parameters)

    self.fitted_model = True

    print("Training Done")

  #inf_data в формате [X, y]
  def inference_model(self, inf_data=None, predict_proba=False, evaluate_metrics=["f1_score", "accuracy_score"], main_metric="f1_score", task_type="GPU"):

    assert(self.fitted_model), "there is no trained model"
    assert(main_metric in evaluate_metrics), "there is no trained model"

    data = self.test_data if inf_data is None else inf_data

    if not predict_proba:
      predictions = self.model.predict(data, task_type=self.device)

    else:
      predictions = self.model.predict_proba(data, task_type=self.device)
      return predictions

    metrics = {key:0 for key in evaluate_metrics}
    for metric_name in evaluate_metrics:
      metrics[metric_name] = getattr(sklearn.metrics, metric_name)(data.get_label(), predictions)

    if self.best_score is None or self.best_score < metrics[main_metric]:
      self.best_score = metrics[main_metric]
      self.best_model = self.model

    return metrics

  #editable
  def optuna_objective(self, metric_name, trial):

    param = {
        "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
        "depth": trial.suggest_int("depth", 1, 12),
        "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
        "bootstrap_type": trial.suggest_categorical(
            "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
        ),
        "task_type" : self.device
    }

    if param["bootstrap_type"] == "Bayesian":
        param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
    elif param["bootstrap_type"] == "Bernoulli":
        param["subsample"] = trial.suggest_float("subsample", 0.1, 1)

    optuna_gbm = CatBoostClassifier(**param)

    optuna_gbm.fit(self.train_data, eval_set=self.val_data, verbose=0, early_stopping_rounds=100)

    predictions = optuna_gbm.predict(self.test_data)
    metric = getattr(sklearn.metrics, metric_name)(self.test_data.get_label(), predictions)

    if self.best_optuna_score is None or self.best_optuna_score < metric:
      self.best_optuna_score = metric
      self.best_optuna_model = optuna_gbm

    return metric

  def tune_params(self, metric_name="f1_score", direction="maximize", n_trials=10, timeout_study=1000, task_type="CPU", show_progress_bar=True):
    study = optuna.create_study(direction=direction)
    study.optimize(lambda x: self.optuna_objective(metric_name, x), n_trials=n_trials, timeout=timeout_study, show_progress_bar=show_progress_bar)

    self.best_trial = study.best_trial

    print(f"Count trial: {len(study.trials)}")

  def get_best_params_optuna(self):

    assert(self.best_trial is not None), "No study have been done"

    trial = self.best_trial

    print(f"Best metric value: {trial.value}")

    print("Best trial params: ")
    for key, value in trial.params.items():
        print(f"{key}: {value}", end="\n")



## model initialization

In [None]:
cb_data = pd.read_csv("/content/dataset_cbm.csv")
# embedding_features = [f"e_{i}" for i in range(0, 288)]

gbm = CatBoostModel(seed=42, device="CPU")
gbm.init_data(cb_data, target_column="target", splits=[0.75, 0.1, 0.15], embedding_features=[])

  arr = np.asarray(values, dtype=dtype)


## tuning params

In [None]:
gbm.tune_params(n_trials=20, timeout_study=3600)
gbm.get_best_params_optuna()

[I 2024-04-08 12:11:12,086] A new study created in memory with name: no-name-0c053556-e4ac-4133-b2a7-6246582d7c94


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2024-04-08 12:11:15,852] Trial 0 finished with value: 0.967992240543162 and parameters: {'objective': 'CrossEntropy', 'colsample_bylevel': 0.07858536406599222, 'depth': 2, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'subsample': 0.1408686626948248}. Best is trial 0 with value: 0.967992240543162.
[I 2024-04-08 12:11:26,953] Trial 1 finished with value: 0.97265625 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.030118379260155828, 'depth': 6, 'boosting_type': 'Plain', 'bootstrap_type': 'Bernoulli', 'subsample': 0.5561660458905046}. Best is trial 1 with value: 0.97265625.
[I 2024-04-08 12:12:42,281] Trial 2 finished with value: 0.975609756097561 and parameters: {'objective': 'Logloss', 'colsample_bylevel': 0.042607983947643875, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bernoulli', 'subsample': 0.9752328070068176}. Best is trial 2 with value: 0.975609756097561.
[I 2024-04-08 12:16:27,918] Trial 3 finished with value: 0.9718172983479106 and p

In [None]:
gbm.best_optuna_model.save_model("/content/drive/MyDrive/finals/bigcalls/optuna50_best.cbm")

In [None]:
gbm.best_trial.params

{'objective': 'Logloss',
 'colsample_bylevel': 0.06862982519841486,
 'depth': 8,
 'boosting_type': 'Ordered',
 'bootstrap_type': 'Bernoulli',
 'subsample': 0.9885331636414759}

## fit model on basic params

In [None]:
class_params = gbm.best_trial.params
# class_params["iterations"] = 1000

gbm.init_model(parameters=class_params, task="classification")

In [None]:
fit_params = {
    "verbose" : 50,
}

gbm.fit_model(parameters=fit_params)

Learning rate set to 0.052633
0:	learn: 0.5805515	test: 0.5779505	best: 0.5779505 (0)	total: 224ms	remaining: 3m 43s
50:	learn: 0.0363867	test: 0.0304287	best: 0.0304287 (50)	total: 11.8s	remaining: 3m 40s
100:	learn: 0.0226356	test: 0.0258995	best: 0.0258995 (100)	total: 22.6s	remaining: 3m 21s
150:	learn: 0.0155648	test: 0.0238599	best: 0.0238267 (148)	total: 33.3s	remaining: 3m 6s
200:	learn: 0.0111168	test: 0.0227637	best: 0.0226911 (199)	total: 44.7s	remaining: 2m 57s
250:	learn: 0.0087817	test: 0.0221533	best: 0.0221533 (250)	total: 56.1s	remaining: 2m 47s
300:	learn: 0.0072226	test: 0.0216010	best: 0.0216010 (300)	total: 1m 8s	remaining: 2m 39s
350:	learn: 0.0064037	test: 0.0212882	best: 0.0212790 (347)	total: 1m 18s	remaining: 2m 25s
400:	learn: 0.0057981	test: 0.0209434	best: 0.0209327 (397)	total: 1m 30s	remaining: 2m 14s
450:	learn: 0.0053008	test: 0.0208651	best: 0.0208229 (436)	total: 1m 41s	remaining: 2m 3s
500:	learn: 0.0047809	test: 0.0208561	best: 0.0208229 (436)	total

## Inference

In [None]:
gbm.inference_model(task_type="CPU")

{'f1_score': 0.9766536964980543, 'accuracy_score': 0.9846153846153847}

In [None]:
gbm.model.save_model("/content/cb_0.976.cbm")

In [None]:
!python --version

Python 3.10.12


## Estimating confidence

In [None]:
def create_conf_interval_gbm(gbm, num_boostrapping):
  preds = gbm.model.predict(gbm.test_data)
  target = gbm.test_data.get_label()

  return create_conf_interval(target, preds, num_boostrapping)

In [None]:
mean_gbm, estimated_borders_gbm = create_conf_interval_gbm(gbm, 1000)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [None]:
print(f"mean score: {mean_gbm}, lower bound: {estimated_borders_gbm[0]}, upper bound: {estimated_borders_gbm[1]}")

mean score: 0.9846910256410256, lower bound: 0.9788301282051282, upper bound: 0.9903846153846154


In [None]:
import sys
modulenames = set(sys.modules) & set(globals())
allmodules = [sys.modules[name] for name in modulenames]

In [None]:
allmodules

[<module 'sklearn' from '/usr/local/lib/python3.10/dist-packages/sklearn/__init__.py'>,
 <module 'random' from '/usr/lib/python3.10/random.py'>,
 <module 'sys' (built-in)>,
 <module 'optuna' from '/usr/local/lib/python3.10/dist-packages/optuna/__init__.py'>,
 <module 'tqdm' from '/usr/local/lib/python3.10/dist-packages/tqdm/__init__.py'>,
 <module 'types' from '/usr/lib/python3.10/types.py'>,
 <module 'os' from '/usr/lib/python3.10/os.py'>,
 <module 'torch' from '/usr/local/lib/python3.10/dist-packages/torch/__init__.py'>]

# Blending предсказаний CatBoost и ECGNet

In [None]:
def blend(gbm, ecgnet):
  test_data = ...
  for hr, label in test_data:
    gbm.model.predict_proba()


In [None]:
"""
ECGNet module

classes:
  ECGNetModel:

    properties:


    methods:

    - make dataset(to train EcgNet)
    - split and put data into loaders(to train EcgNet)
    - train EcgNet
    - test EcgNet
    - tune objective of EcgNet
    - tune EcgNet
    - load EcgNet

  CatBoostModel:

    properties:


    methods:

utils:

  functions:


  1. create_dataset_for_catboost(**kwargs)
    - model (EcgNet)
    - step=1 (to get features from embedding)
    - signal_data
    - features=False (add features columns or not)

  2. create_features(signal)

  3. create_time_features(signal)

  4. create_freq_features(signal)

  datasets:

  1. EcgNetDataset() -> signal, target

"""

In [None]:
worker.labels.to_csv("ilmi.csv", index=False)