In [None]:
%%capture
!pip install wandb focal_loss_torch

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import scipy.stats as sts
import os

from tqdm.auto import tqdm
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.model_selection import train_test_split
from collections import defaultdict, OrderedDict
from focal_loss.focal_loss import FocalLoss
import warnings
warnings.filterwarnings('ignore')

#Подгрузка данных



In [None]:
!gdown 1Rt0I7Svrx77tFMCsNubEQ-cDY8hD-iCk #r_peaks.zip
!gdown 1GWyzUaz_mOwYDbLuopjroIcfngjSJWkD #labels
!unzip r_peaks.zip

In [None]:
"""
 'NORM' : 0,
 'IMI': 1,
 'NDT': 2,
 'NST_': 3,
 'LVH': 4,
 'LAFB': 5,
 'IRBBB': 6,
 'IVCD': 7,
 'ASMI': 8,
 'AMI': 9,
 'ISCAL': 10,
 '1AVB': 11,
 'ILMI': 12,
 'ISC_': 13,
 'CRBBB': 14,
 'CLBBB': 15,
 'LAO/LAE': 16}
 """

#Разделение датасета
Для одного target_class, все остальные классы обозначаются как 0

In [None]:
labels = pd.read_csv("train_val_labels.csv")
#в target_class номер класса для обучения одной из сетей
target_class = 8
left_classes = [i for i in labels.result_class.unique() if i != target_class]
num_others = (len(labels[labels.result_class == target_class]) * 2) // 15
data = labels[labels.result_class == target_class]
data.loc[:, ["result_class"]] = 1
data.index = range(0, len(data))
for cur_class in left_classes:
  cur_class_data = labels[(labels.result_class == cur_class)]
  cur_class_data = cur_class_data[~cur_class_data.record_name.isin(labels[labels.result_class != cur_class].record_name)]
  cur_frame = cur_class_data.sample(n=min(len(cur_class_data), num_others))
  cur_frame.loc[:, ["result_class"]] = 0
  data = pd.concat([data, cur_frame], axis=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, ["result_class"]] = 1


## Dataset

In [None]:
class EcgPTBDataset(Dataset):
    def __init__(self, labels, path='/'):
        self.x_paths = [labels.iloc[i, 0] for i in range(len(labels))]
        self.labels = [labels.iloc[i, 1] for i in range(len(labels))]
        self.path = path

    def __len__(self):
        return len(self.x_paths)

    def __getitem__(self, idx):

        hr = torch.tensor(np.load(self.path + self.x_paths[idx] + '.npy'))[None, :, :]

        target = self.labels[idx]

        return hr, target

In [None]:
ptb_set = EcgPTBDataset(data, path="/content/r_peaks/signals/")

valid_data, train_data = random_split(ptb_set, lengths=[0.1, 0.9])

BATCH_SIZE = 64
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=1)
valid_loader = DataLoader(valid_data, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True, num_workers=1)

## ECGNET

In [None]:
class ECGNet(nn.Module):
  def __init__(self):
    super(ECGNet, self).__init__()
    #layer1
    self.layer1_conv2d = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(1, 25), stride=(1, 2), bias=True)


    #layer2
    self.layer2_conv2d = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm2d(num_features=32)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv2d(32, 64, kernel_size=(1, 15), stride=(1, 1), bias=True)),
        ("bn2", nn.BatchNorm2d(num_features=64)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv2d(64, 64, kernel_size=(1, 15), stride=(1, 2),  bias=True)),
        ("bn3", nn.BatchNorm2d(num_features=64)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv2d(64, 32, kernel_size=(1, 15), stride=(1, 1), bias=True)),
    ]))
    self.layer2_seModule = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv2d(32, 16, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv2d(16, 32, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    #layer3
    self.layer3_conv2d_block1 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm2d(num_features=32)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv2d(32, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=True)),
        ("bn2", nn.BatchNorm2d(num_features=64)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=True)),
        ("bn3", nn.BatchNorm2d(num_features=64)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv2d(64, 32, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=True)),
    ]))
    self.layer3_seModule_block1 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv2d(32, 16, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv2d(16, 32, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer3_conv2d_block2 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm2d(num_features=32)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv2d(32, 64, kernel_size=(5, 1), padding=(2, 0), bias=True)),
        ("bn2", nn.BatchNorm2d(num_features=64)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv2d(64, 64, kernel_size=(5, 1), padding=(2, 0), bias=True)),
        ("bn3", nn.BatchNorm2d(num_features=64)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv2d(64, 32, kernel_size=(5, 1), padding=(2, 0), bias=True)),
    ]))
    self.layer3_seModule_block2 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv2d(32, 16, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv2d(16, 32, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer3_conv2d_block3 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm2d(num_features=32)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv2d(32, 64, kernel_size=(7, 1), padding=(3, 0), bias=True)),
        ("bn2", nn.BatchNorm2d(num_features=64)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv2d(64, 64, kernel_size=(7, 1), padding=(3, 0), bias=True)),
        ("bn3", nn.BatchNorm2d(num_features=64)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv2d(64, 32, kernel_size=(7, 1), padding=(3, 0), bias=True)),
    ]))
    self.layer3_seModule_block3 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv2d(32, 16, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv2d(16, 32, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    #layer4
    self.layer4_conv1d_short_block1 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 384, kernel_size=3, stride=9, bias=True)),
    ]))

    self.layer4_conv1d_block1 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 768, kernel_size=3, stride=2, bias=True)),
        ("bn2", nn.BatchNorm1d(num_features=768)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv1d(768, 768, kernel_size=3, stride=1, bias=True)),
        ("bn3", nn.BatchNorm1d(num_features=768)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv1d(768, 1536, kernel_size=3, stride=2, bias=True)),
        ("bn4", nn.BatchNorm1d(num_features=1536)),
        ("act4", nn.ReLU()),
        ("cn4", nn.Conv1d(1536, 384, kernel_size=3, stride=2, bias=True)),
    ]))
    self.layer4_seModule_block1 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv1d(384, 48, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv1d(48, 384, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer4_conv1d_short_block2 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 384, kernel_size=5, stride=9, bias=True)),
    ]))

    self.layer4_conv1d_block2 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 768, kernel_size=5, stride=2, padding=2, bias=True)),
        ("bn2", nn.BatchNorm1d(num_features=768)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv1d(768, 768, kernel_size=5, stride=2, padding=1, bias=True)),
        ("bn3", nn.BatchNorm1d(num_features=768)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv1d(768, 1536, kernel_size=5, stride=1, padding=2, bias=True)),
        ("bn4", nn.BatchNorm1d(num_features=1536)),
        ("act4", nn.ReLU()),
        ("cn4", nn.Conv1d(1536, 384, kernel_size=5, stride=2, padding=1, bias=True)),
    ]))
    self.layer4_seModule_block2 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv1d(384, 48, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv1d(48, 384, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer4_conv1d_short_block3 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 384, kernel_size=7, stride=9, bias=True)),
    ]))

    self.layer4_conv1d_block3 = nn.Sequential(OrderedDict([
        ("bn1", nn.BatchNorm1d(num_features=384)),
        ("act1", nn.ReLU()),
        ("cn1", nn.Conv1d(384, 768, kernel_size=7, stride=2, padding=2, bias=True)),
        ("bn2", nn.BatchNorm1d(num_features=768)),
        ("act2", nn.ReLU()),
        ("cn2", nn.Conv1d(768, 768, kernel_size=7, stride=2, padding=1, bias=True)),
        ("bn3", nn.BatchNorm1d(num_features=768)),
        ("act3", nn.ReLU()),
        ("cn3", nn.Conv1d(768, 1536, kernel_size=7, stride=1, padding=3, bias=True)),
        ("bn4", nn.BatchNorm1d(num_features=1536)),
        ("act4", nn.ReLU()),
        ("cn4", nn.Conv1d(1536, 384, kernel_size=7, stride=2, padding=2, bias=True)),
    ]))
    self.layer4_seModule_block3 = nn.Sequential(OrderedDict([
        ("fc1", nn.Conv1d(384, 48, kernel_size=1, bias=True)),
        ("act", nn.ReLU()),
        ("fc2", nn.Conv1d(48, 384, kernel_size=1, bias=True)),
        ("gate", nn.Sigmoid())
    ]))

    self.layer5_avg_pool1 = nn.AvgPool1d(kernel_size=10)
    self.layer5_avg_pool2 = nn.AvgPool1d(kernel_size=10)
    self.layer5_avg_pool3 = nn.AvgPool1d(kernel_size=10)

    self.fc = nn.Sequential(OrderedDict([
        ("ln1", nn.Linear(1152, 288)),
        ("dp", nn.Dropout(p=0.2)),
        ("act", nn.ReLU()),
        ("ln2", nn.Linear(288, 1)),
        ("sigmoid", nn.Sigmoid())
    ]))

  def forward(self, x, embeds=False):
    #layer1
    x = self.layer1_conv2d(x)

    #layer2
    x = self.layer2_conv2d(x)
    u = x
    x = x.view(x.size(0), x.size(1), -1).mean(-1).view(x.size(0), x.size(1), 1, 1)
    x = self.layer2_seModule(x)
    x = u * x

    #layer3
    x1 = self.layer3_conv2d_block1(x)
    u1 = x1
    x1 = x1.view(x1.size(0), x1.size(1), -1).mean(-1).view(x1.size(0), x1.size(1), 1, 1)
    x1 = self.layer3_seModule_block1(x1)
    x1 = u1 * x1

    x2 = self.layer3_conv2d_block2(x)
    u2 = x2
    x2 = x2.view(x2.size(0), x2.size(1), -1).mean(-1).view(x2.size(0), x2.size(1), 1, 1)
    x2 = self.layer3_seModule_block2(x2)
    x2 = u2 * x2

    x3 = self.layer3_conv2d_block3(x)
    u3 = x3
    x3 = x3.view(x3.size(0), x3.size(1), -1).mean(-1).view(x3.size(0), x3.size(1), 1, 1)
    x3 = self.layer3_seModule_block3(x3)
    x3 = u3 * x3

    #layer4
    x1 = torch.flatten(x1, start_dim=1, end_dim=2)
    x2 = torch.flatten(x2, start_dim=1, end_dim=2)
    x3 = torch.flatten(x3, start_dim=1, end_dim=2)

    # x1 = x1.unsqueeze(1)
    # x2 = x2.unsqueeze(1)
    # x3 = x3.unsqueeze(1)

    x1_short = self.layer4_conv1d_short_block1(x1)

    x1 = self.layer4_conv1d_block1(x1)
    u1 = x1
    x1 = x1.view(x1.size(0), x1.size(1), -1).mean(-1).view(x1.size(0), x1.size(1), 1, 1).flatten(2, 3)
    x1 = self.layer4_seModule_block1(x1)
    x1 = u1 * x1
    x1 = x1 + x1_short

    x2_short = self.layer4_conv1d_short_block2(x2)

    x2 = self.layer4_conv1d_block2(x2)
    u2 = x2
    x2 = x2.view(x2.size(0), x2.size(1), -1).mean(-1).view(x2.size(0), x2.size(1), 1, 1).flatten(2, 3)
    x2 = self.layer4_seModule_block2(x2)
    x2 = u2 * x2
    x2 = x2 + x2_short

    x3_short = self.layer4_conv1d_short_block3(x3)

    x3 = self.layer4_conv1d_block3(x3)
    u3 = x3
    x3 = x3.view(x3.size(0), x3.size(1), -1).mean(-1).view(x3.size(0), x3.size(1), 1, 1).flatten(2, 3)
    x3 = self.layer4_seModule_block3(x3)
    x3 = u3 * x3
    x3 = x3 + x3_short

    x1 = self.layer5_avg_pool1(x1)
    x2 = self.layer5_avg_pool2(x2)
    x3 = self.layer5_avg_pool3(x3)

    x = torch.cat((x1, x2, x3), dim=1).flatten(1)

    if embeds:
        return self.fc.ln1(x)

    x = self.fc(x)

    return x

## Metrics

In [None]:
def calculate_accuracy(output, target):
    train_accuracy = torch.sum(target == output) / len(target)
    return train_accuracy

def calculate_f1(preds, labels):
    tp = torch.sum(preds[labels == preds] == 1)
    preds_p = torch.sum(preds == 1)
    labels_p = torch.sum(labels == 1)
    recall = (tp / labels_p if labels_p != 0 else 0)
    precision = (tp / preds_p if preds_p != 0 else 0)
    if recall + precision == 0: return 0
    return (2 * recall * precision) / (recall + precision)

class MetricMonitor:
    def __init__(self, float_precision=3):
        self.float_precision = float_precision
        self.reset()

    def reset(self):
        self.metrics = defaultdict(lambda: {"val": 0, "count": 0, "avg": 0})

    def update(self, metric_name, val):
        metric = self.metrics[metric_name]

        metric["val"] += val
        metric["count"] += 1
        metric["avg"] = metric["val"] / metric["count"]

    def __str__(self):
        return " | ".join(
            [
                "{metric_name}: {avg:.{float_precision}f}".format(
                    metric_name=metric_name, avg=metric["avg"], float_precision=self.float_precision
                )
                for (metric_name, metric) in self.metrics.items()
            ]
        )

## Train/Valid part

In [None]:
def train(train_loader, model, criterion, optimizer, epoch, device):
    metric_monitor = MetricMonitor(float_precision=4)
    model.train()
    stream = tqdm(train_loader)
    for i, batch in enumerate(stream, start=1):
        x_batch, y_batch = batch
        y_batch = y_batch.to(device, non_blocking=True)
        x_batch = x_batch.to(device, non_blocking=True)
        output = model(x_batch.float()).view(1, -1)[0]
        loss = criterion(output, y_batch.float())
        output = (output > 0.5).to(torch.int32)
        accuracy = calculate_accuracy(output, y_batch)
        f1 = calculate_f1(output, y_batch)
        metric_monitor.update("Loss", loss)
        metric_monitor.update("Accuracy", accuracy)
        metric_monitor.update("F1", f1)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        stream.set_description(
            "Epoch: {epoch}. Train.  {metric_monitor}".format(epoch=epoch, metric_monitor=metric_monitor)
        )

In [None]:
def validate(val_loader, model, criterion, epoch, device):
    metric_monitor = MetricMonitor(float_precision=4)
    model.eval()
    stream = tqdm(val_loader)
    with torch.no_grad():
        for i, batch in enumerate(stream, start=1):
            x_batch, y_batch = batch
            y_batch = y_batch.to(device, non_blocking=True)
            x_batch = x_batch.to(device, non_blocking=True)
            output = model(x_batch.float()).view(1, -1)[0]
            loss = criterion(output, y_batch.float())
            output = (output > 0.5).to(torch.int32)
            accuracy = calculate_accuracy(output, y_batch)
            f1 = calculate_f1(output, y_batch)
            metric_monitor.update("Loss", loss)
            metric_monitor.update("Accuracy", accuracy)
            metric_monitor.update("F1", f1)
            stream.set_description(
                "Epoch: {epoch}. Validation. {metric_monitor}".format(epoch=epoch, metric_monitor=metric_monitor)
            )
    return metric_monitor.metrics["F1"]["avg"], metric_monitor.metrics["Accuracy"]["avg"], metric_monitor.metrics["Loss"]["avg"]

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ECGNet()
model = model.to(device)

learning_rate = 3e-5
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

loss_fn = FocalLoss(gamma=1.8)

In [None]:
!wandb login #ключ api wandb
import wandb

wandb.init(
    project="Ecg_one_vs_rest",

    config={
        "architecture": "ecg_net",
        "dataset": "r_peaks",
    }
)

In [None]:
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
num_epochs = 10
max_f1 = 0.85
for epoch in range(num_epochs):
  train(train_loader, model, loss_fn, optimizer, epoch, device)
  f1_v, acc_v, loss_v = validate(valid_loader, model, loss_fn, epoch, device)
  scheduler.step(f1_v)
  wandb.log({"F1": f1_v, "Acc": acc_v, 'loss': loss_v})
  if f1_v > max_f1:
    max_f1 = f1_v
    torch.save(model.state_dict(), f'/content/drive/MyDrive/models/{f1_v}.pth')

##Примера создания Catboost

In [None]:
#извлечение фич
epsn = 1e-8

def mean_fea(a):
	return np.mean(a)

def rms_fea(a):
	return np.sqrt(np.mean(np.square(a)))

def sr_fea(a):
	return np.square(np.mean(np.sqrt(np.abs(a))))

def am_fea(a):
	return np.mean(np.abs(a))

def skew_fea(a):
	return np.mean((a-mean_fea(a))**3)

def kurt_fea(a):
	return np.mean((a-mean_fea(a))**4)

def max_fea(a):
	return np.max(a)

def min_fea(a):
	return np.min(a)

def pp_fea(a):
	return max_fea(a)-min_fea(a)

def var_fea(a):
	n = len(a)
	return np.sum((a-mean_fea(a))**2)/(n-1)

def waveform_index(a):
	return rms_fea(a)/(am_fea(a)+epsn)

def peak_index(a):
	return max_fea(a)/(rms_fea(a)+epsn)

def impluse_factor(a):
	return max_fea(a)/(am_fea(a)+epsn)

def tolerance_index(a):
	return max_fea(a)/(sr_fea(a)+epsn)

def skew_index(a):
	n = len(a)
	temp1 = np.sum((a-mean_fea(a))**3)
	temp2 = (np.sqrt(var_fea(a)))**3
	return temp1/((n-1)*temp2)

def kurt_index(a):
	n = len(a)
	temp1 = np.sum((a-mean_fea(a))**4)
	temp2 = (np.sqrt(var_fea(a)))**4
	return temp1/((n-1)*temp2)

def fft_fft(sequence_data):
	fft_trans = np.abs(np.fft.fft(sequence_data))
	dc = fft_trans[0]
	freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
	freq_sum_ = np.sum(freq_spectrum)
	return dc, freq_spectrum, freq_sum_

def fft_mean(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	return np.mean(freq_spectrum)

def fft_var(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	return np.var(freq_spectrum)

def fft_std(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	return np.std(freq_spectrum)

def fft_std2(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	return np.std(freq_spectrum)

def fft_entropy(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	pr_freq = freq_spectrum * 1.0 / _freq_sum_
	entropy = -1 * np.sum([np.log2(p+1e-5) * p for p in pr_freq])
	return entropy

def fft_energy(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	return np.sum(freq_spectrum ** 2) / len(freq_spectrum)

def fft_skew(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	_fft_mean, _fft_std = fft_mean(sequence_data), fft_std(sequence_data)
	return np.mean([0 if _fft_std < epsn else np.power((x - _fft_mean) / _fft_std, 3)
					for x in freq_spectrum])

def fft_kurt(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	_fft_mean, _fft_std = fft_mean(sequence_data), fft_std(sequence_data)
	return np.mean([0 if _fft_std < epsn else np.power((x - _fft_mean) / _fft_std, 4)
					for x in freq_spectrum])

def fft_shape_mean(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	shape_sum = np.sum([x * freq_spectrum[x]
						for x in range(len(freq_spectrum))])
	return 0 if _freq_sum_ < epsn else shape_sum * 1.0 / _freq_sum_

def fft_shape_std(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	shape_mean = fft_shape_mean(sequence_data)
	var = np.sum([0 if _freq_sum_ < epsn else np.power((x - shape_mean), 2) * freq_spectrum[x]
				  for x in range(len(freq_spectrum))]) / _freq_sum_
	return np.sqrt(var)

def fft_shape_skew(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	shape_mean = fft_shape_mean(sequence_data)
	return np.sum([np.power((x - shape_mean), 3) * freq_spectrum[x]
				   for x in range(len(freq_spectrum))]) / _freq_sum_

def fft_shape_kurt(sequence_data):
	def fft_fft(sequence_data):
		fft_trans = np.abs(np.fft.fft(sequence_data))
		# dc = fft_trans[0]
		freq_spectrum = fft_trans[1:int(np.floor(len(sequence_data) * 1.0 / 2)) + 1]
		_freq_sum_ = np.sum(freq_spectrum)
		return freq_spectrum, _freq_sum_
	freq_spectrum, _freq_sum_ = fft_fft(sequence_data)
	shape_mean = fft_shape_mean(sequence_data)
	return np.sum([np.power((x - shape_mean), 4) * freq_spectrum[x] - 3
				   for x in range(len(freq_spectrum))]) / _freq_sum_

In [None]:
#агрегация всех фич
def count_time_features(data):

  result_mean_fea = mean_fea(data)
  result_rms_fea = rms_fea(data)
  result_sr_fea = sr_fea(data)
  result_am_fea = am_fea(data)
  result_skew_fea = skew_fea(data)
  result_kurt_fea = kurt_fea(data)
  result_max_fea = max_fea(data)
  result_min_fea = min_fea(data)
  result_pp_fea = pp_fea(data)
  result_var_fea = var_fea(data)
  result_waveform_index_fea = waveform_index(data)
  result_peak_index_fea = peak_index(data)
  result_impluse_factor_fea = impluse_factor(data)

  return np.array([
      result_mean_fea,
      result_rms_fea,
      result_var_fea,
      result_waveform_index_fea,
      result_peak_index_fea,
      result_sr_fea,
      result_am_fea,
      result_skew_fea,
      result_kurt_fea,
      result_max_fea,
      result_min_fea,
      result_pp_fea,
      result_impluse_factor_fea
  ])

def count_freq_features(data):

  result_fft_mean = fft_mean(data)
  result_fft_var = fft_var(data)
  result_fft_std = fft_std(data)
  result_fft_entropy = fft_entropy(data)
  result_fft_energy = fft_energy(data)
  result_fft_skew = fft_skew(data)
  result_fft_kurt = fft_kurt(data)
  result_fft_shape_mean = fft_shape_mean(data)
  result_fft_shape_std = fft_shape_std(data)

  return np.array([
      result_fft_mean,
      result_fft_var,
      result_fft_std,
      result_fft_entropy,
      result_fft_energy,
      result_fft_skew,
      result_fft_kurt,
      result_fft_shape_mean,
      result_fft_shape_std
  ])

def get_all_features(ecg_signal):


    tmp_raw = np.array(ecg_signal)
    time_features_tmp=[]
    fre_features_tmp=[]

    for j in range(0,12):

        tmp_lead = tmp_raw[j]
        result_time_features = count_time_features(tmp_lead)
        result_fre_features = count_freq_features(tmp_lead)
        time_features_tmp.append(result_time_features)
        fre_features_tmp.append(result_fre_features)


    return np.array(time_features_tmp), np.array(fre_features_tmp)

In [None]:
def get_signal_embed(signal, model, device = torch.device("cuda")):

  model.eval()
  model = model.to(device)
  signal = signal.to(device)
  with torch.no_grad():
    pred_by_model = model(signal.float().unsqueeze(0))
    embeds = model.get_embeds(signal.float().unsqueeze(0))

  return np.array(embeds.cpu())

In [None]:
x_paths = [data.iloc[i, 0] for i in range(len(data))]
labels = [data.iloc[i, 1] for i in range(len(data))]
root_path = "/content/r_peaks/signals/"

In [None]:
#создание датасета для catboost на основе всех эмбеддингов ECGNet
cb_dataset = np.zeros((1, 553), dtype=np.int16)

for x_path, label in zip(tqdm(x_paths), labels):
  hr = torch.tensor(np.load(root_path + x_path + '.npy'))[None, :, :]
  t_f, f_f = get_all_features(hr[0])
  features = np.append(t_f, f_f)
  X = get_signal_embed(hr, models_dict[0]["model"], torch.device("cuda"))
  # print(X.shape, features.shape)
  X = np.append(X, features)
  cb_dataset = np.append(cb_dataset, np.append(X, np.array(label)).reshape(1, -1))

In [None]:
cb_dataset_reshaped = np.reshape(cb_dataset, (-1, 553))
cb_dataset_reshaped = pd.DataFrame(cb_dataset_reshaped, columns=[[f"e_{i}" for i in range(0, 288)] + [f"f_{i}" for i in range(0, 264)] + ["target"]])

cb_dataset_reshaped = cb_dataset_reshaped.drop(index=[0])