## Import Necessary Libraries

In [1]:
## Import necessary libraries
import pandas as pd
import numpy as np
import random 
from urllib.parse import quote, unquote
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

## Import libraries for the model
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from tqdm.notebook import trange
from sklearn.metrics import f1_score, classification_report

## Set path for saving model training results  
import os
os.makedirs('./result', exist_ok=True)

## Set Cuda for computation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

## Set random seed
def set_seed(seed_val):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

# Set seed
seed_val = 77
set_seed(seed_val)

cuda


## Selecting Data Columns
* Tag names are loaded in sequential order.
* The process of selecting the required tag names from the tag name list.

In [2]:
# Function to display tag names
def show_column(URL):
    
    # Load tag name data
    df = pd.read_csv(URL)
    
    # Convert to list format
    df = df.values.reshape(-1)
    
    return df.tolist()

In [3]:
## Set parameters for displaying tag names
table = 'ecg'

NAME_URL = f'http://127.0.0.1:5654/db/tql/datahub/api/v1/get-tag-names.tql?table={table}'

## Generate tag name list 
name = show_column(NAME_URL)

In [4]:
name

['mit_bih_test_0',
 'mit_bih_test_1',
 'mit_bih_test_10',
 'mit_bih_test_100',
 'mit_bih_test_101',
 'mit_bih_test_102',
 'mit_bih_test_103',
 'mit_bih_test_104',
 'mit_bih_test_105',
 'mit_bih_test_106',
 'mit_bih_test_107',
 'mit_bih_test_108',
 'mit_bih_test_109',
 'mit_bih_test_11',
 'mit_bih_test_110',
 'mit_bih_test_111',
 'mit_bih_test_112',
 'mit_bih_test_113',
 'mit_bih_test_114',
 'mit_bih_test_115',
 'mit_bih_test_116',
 'mit_bih_test_117',
 'mit_bih_test_118',
 'mit_bih_test_119',
 'mit_bih_test_12',
 'mit_bih_test_120',
 'mit_bih_test_121',
 'mit_bih_test_122',
 'mit_bih_test_123',
 'mit_bih_test_124',
 'mit_bih_test_125',
 'mit_bih_test_126',
 'mit_bih_test_127',
 'mit_bih_test_128',
 'mit_bih_test_129',
 'mit_bih_test_13',
 'mit_bih_test_130',
 'mit_bih_test_131',
 'mit_bih_test_132',
 'mit_bih_test_133',
 'mit_bih_test_134',
 'mit_bih_test_135',
 'mit_bih_test_136',
 'mit_bih_test_137',
 'mit_bih_test_138',
 'mit_bih_test_139',
 'mit_bih_test_14',
 'mit_bih_test_140',
 

## Converting TAG Name Format
* After checking all the Tag Names from the ecg dataset in the previous step, extract only the columns to be used and convert them into parameter format.
* Use tag names related to the mit_bih.

In [5]:
# Set the desired train, test tag names
tags_train = name[188:376]
tags_test = name[:188]

# Wrap each item in the list with single quotes and separate with commas
tags_train = ",".join(f"'{tag}'" for tag in tags_train)
tags_test = ",".join(f"'{tag}'" for tag in tags_test)

# Check the selected train, test tag names
print(tags_train)
print(tags_test)

'mit_bih_train_0','mit_bih_train_1','mit_bih_train_10','mit_bih_train_100','mit_bih_train_101','mit_bih_train_102','mit_bih_train_103','mit_bih_train_104','mit_bih_train_105','mit_bih_train_106','mit_bih_train_107','mit_bih_train_108','mit_bih_train_109','mit_bih_train_11','mit_bih_train_110','mit_bih_train_111','mit_bih_train_112','mit_bih_train_113','mit_bih_train_114','mit_bih_train_115','mit_bih_train_116','mit_bih_train_117','mit_bih_train_118','mit_bih_train_119','mit_bih_train_12','mit_bih_train_120','mit_bih_train_121','mit_bih_train_122','mit_bih_train_123','mit_bih_train_124','mit_bih_train_125','mit_bih_train_126','mit_bih_train_127','mit_bih_train_128','mit_bih_train_129','mit_bih_train_13','mit_bih_train_130','mit_bih_train_131','mit_bih_train_132','mit_bih_train_133','mit_bih_train_134','mit_bih_train_135','mit_bih_train_136','mit_bih_train_137','mit_bih_train_138','mit_bih_train_139','mit_bih_train_14','mit_bih_train_140','mit_bih_train_141','mit_bih_train_142','mit_bih_

## Load ECG Dataset
* Load the entire dataset upon data loading.

    * Label description:

        * N (Normal): 0
            * Normal heartbeat
            * Indicates a normal heart rhythm, reflecting regular electrical activity in the ECG.
        * S (Supraventricular ectopic beat): 1
            * Supraventricular ectopic beat
            * Abnormal heartbeats originating in the atria or atrioventricular node, representing abnormal beats that start from the upper chambers of the heart.
        * V (Ventricular ectopic beat): 2
            * Ventricular ectopic beat
            * Abnormal heartbeats originating from the ventricles, representing fast or abnormal electrical activity in the ventricles.
        * F (Fusion of ventricular and normal beat): 3
            * Fusion of ventricular and normal beat
            * Occurs when a normal heartbeat and a ventricular ectopic beat happen simultaneously, leading to a fused heartbeat appearance.
        * Q (Unknown beat): 4
            * Unknown beat
            * Represents beats that cannot be classified, typically due to insufficient information or difficulty in classifying the specific beat.

In [6]:
# Data loading parameter settings

# Set the tag table name
table = 'ecg'
# Set the train, test tag names
name_train = quote(tags_train, safe=":/")
name_test = quote(tags_test, safe=":/")
# Set the time format  
timeformat = 'default'
# Set the data start time
start_time = quote('2024-10-14 00:00:00')
# Set the data end time
end_time = quote('2024-12-29 00:03:00')

In [7]:
# Data loading function
def data_load(table, name, start_time, end_time, timeformat):
    
    # Load data  
    df = pd.read_csv(f'http://127.0.0.1:5654/db/tql/datahub/api/v1/select-rawdata.tql?table={table}&name={name}&start={start_time}&end={end_time}&timeformat={timeformat}')
    
    # Convert to data grouped by the time
    df = df.pivot_table(index='TIME', columns='NAME', values='VALUE', aggfunc='first').reset_index()
    
    # Set TIME column
    df['TIME'] = pd.to_datetime(df['TIME'], format='%Y-%m-%d %H:%M:%S')
    
    # Determine the label column dynamically (for train and test sets)
    label_col = [col for col in df.columns if col.endswith('_label')][0]

    # Sort column names in numerical order, excluding the 'TIME' and the dynamic label column
    df = df.reindex(['TIME'] + sorted([col for col in df.columns if col not in ['TIME', label_col] and col.split('_')[-1].isdigit()], key=lambda x: int(x.split('_')[-1])) + [label_col], axis=1)
    
    # Convert label column data to integer type
    df[label_col] = df[label_col].astype(int)

    return df

In [8]:
# Load data
train = data_load(table, name_train, start_time, end_time, timeformat)
test = data_load(table, name_test, start_time, end_time, timeformat)

In [9]:
# Split the data into train, validation, and test sets
train, valid = train_test_split(train, test_size=0.1, shuffle=False)

train = train.reset_index(drop=True)
valid = valid.reset_index(drop=True)
test = test.reset_index(drop=True)

## Data Preprocessing

* 1 MinMax Scaling
* 2 PCA

### 1. Applying MinMaxScaler

In [10]:
# Scaler Setup
scaler = MinMaxScaler()

# Apply Scaler
train_ = scaler.fit_transform(train.iloc[:,1:-1].values)
valid_ = scaler.transform(valid.iloc[:,1:-1].values)
test_ = scaler.transform(test.iloc[:,1:-1].values)

# Set DataFrames
train_scaled = pd.DataFrame(train_)
valid_scaled = pd.DataFrame(valid_)
test_scaled = pd.DataFrame(test_)

### 2. Applying PCA (Principal Component Analysis)

In [11]:
## Applying PCA
# Select principal components explaining 95% of the variance
pca = PCA(n_components=0.95)

# Apply PCA
train_scaled_ = pca.fit_transform(train_scaled)
valid_scaled_ = pca.transform(valid_scaled)
test_scaled_ = pca.transform(test_scaled)

# Set DataFrames
train_scaled_ = pd.DataFrame(train_scaled_)
valid_scaled_ = pd.DataFrame(valid_scaled_)
test_scaled_ = pd.DataFrame(test_scaled_)

# Add labels
train_scaled_['label'] = train['mit_bih_train_label'].values
valid_scaled_['label'] = valid['mit_bih_train_label'].values
test_scaled_['label'] = test['mit_bih_test_label'].values

print(train_scaled_['label'].value_counts())
print(valid_scaled_['label'].value_counts())
print(test_scaled_['label'].value_counts())

label
0    65228
4     5805
2     5184
1     1998
3      582
Name: count, dtype: int64
label
0    7242
4     626
2     604
1     225
3      59
Name: count, dtype: int64
label
0    18117
4     1608
2     1448
1      556
3      162
Name: count, dtype: int64


## Dataset & Loader Setup

In [12]:
class ECG_Dataset(Dataset):

    def __init__(self, df):
        self.freq_data = df.iloc[:,:-1]
        self.label = df.iloc[:,-1:].squeeze()

    def __len__(self):
        return len(self.freq_data)

    def __getitem__(self, index):

        input_time_data = self.freq_data.iloc[index,:]
        input_time_data = torch.Tensor(input_time_data).expand(1, input_time_data.shape[0])
        label = self.label[index]

        return input_time_data, label

In [13]:
# Set up datasets  
train_ = ECG_Dataset(train_scaled_)
valid_ = ECG_Dataset(valid_scaled_)
test_ = ECG_Dataset(test_scaled_)

# Set up data loaders
train_dataloader = DataLoader(train_, batch_size=512, shuffle=True)
valid_dataloader = DataLoader(valid_, batch_size=512, shuffle=True)
test_dataloader = DataLoader(test_, batch_size=1, shuffle=False)

In [14]:
# Verify DataLoader application and check the shape of the input data
print(list(train_dataloader)[0][0].shape)

torch.Size([512, 1, 35])


## Model Configuration
* Using ResNet1d model.

In [15]:
## ResNet 1D Model Setup
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out

class ResNet1D(nn.Module):
    def __init__(self, block, layers, num_classes=4):
        super(ResNet1D, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv1d(1, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv1d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(out_channels),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [16]:
# Model configuration parameters
# Learning rate
learning_rate = 0.01

# Model configuration
model = ResNet1D(ResidualBlock, [2, 2, 2, 2], num_classes=5).to(device)

# Adjust weights for each class
class_weights = torch.tensor([1.0, 5.0, 1.0, 5.0, 1.0]).to(device)

# Configure loss function and optimizer
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Check the model architecture
print(model)

ResNet1D(
  (conv1): Conv1d(1, 64, kernel_size=(7,), stride=(2,), padding=(3,))
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool1d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResidualBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), paddi

## Model Training

* Save the model with the Best F1 Score based on the validation data during training.

In [17]:
# Initialize training loss
train_loss = []
# Initialize training accuracy
train_acc = []
# Initialize total step
total_step = len(train_dataloader)
# Set number of epochs
epoch_in = trange(100, desc='training')
# Initialize best F1 Score value
best_f1= 0

# Start model training
for epoch in epoch_in:
    model.train()
    running_loss = 0.0
    correct = 0
    total=0

    preds_ = []
    targets_ = []

    for batch_idx, train_data in enumerate(train_dataloader):

        inputs = train_data[0].to(device).float()
        labels = train_data[1].to(device).long().squeeze()

        optimizer.zero_grad()

        # Input to the model
        outputs = model(inputs)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Set label predictions 
        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==labels).item()
        total += labels.size(0)
        
    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'\ntrain loss: {np.mean(train_loss)}, train acc: {(100 * correct / total):.4f}')
    
    # Perform validation at the end of each epoch and save the model with the best performance
    with torch.no_grad():
        model.eval()
        
        for batch_idx, valid_data in enumerate(valid_dataloader):

            inputs_v = valid_data[0].to(device).float()
            labels_v = valid_data[1].to(device).long().squeeze() 
            
            outputs_v = model(inputs_v)
            
            # Set label predictions
            _,pred_v = torch.max(outputs_v, dim=1)
            target_v = labels_v.view_as(pred_v)
            
            preds_.append(pred_v)
            targets_.append(target_v)
            
        # Combine predictions and labels collected from all batches
        preds_ = torch.cat(preds_).detach().cpu().numpy()
        targets_ = torch.cat(targets_).detach().cpu().numpy()
        
        f1score = f1_score(targets_, preds_,  average='macro')
        if best_f1 < f1score:
            best_f1 = f1score
            # Save the best model 
            with open("./result/ECG_HeartBeat_General.txt", "a") as text_file:
                print('epoch=====',epoch, file=text_file)
                print(classification_report(targets_, preds_, digits=4), file=text_file)
            torch.save(model, f'./result/ECG_HeartBeat_General.pt') 
        epoch_in.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_f1}")

training:   0%|          | 0/100 [00:00<?, ?it/s]

  return F.conv1d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass



train loss: 0.5517776709098321, train acc: 88.9666

train loss: 0.41100094370640716, train acc: 94.6356

train loss: 0.3474797205233471, train acc: 95.5442

train loss: 0.3073169313274421, train acc: 96.1610

train loss: 0.28032552522498294, train acc: 96.3920

train loss: 0.25926292981565385, train acc: 96.6839

train loss: 0.2431209825967648, train acc: 96.8704

train loss: 0.22951460080281771, train acc: 97.0545

train loss: 0.21780475341643693, train acc: 97.1661

train loss: 0.20768087327286797, train acc: 97.3413

train loss: 0.19900040455831058, train acc: 97.4098

train loss: 0.19123293478364425, train acc: 97.4263

train loss: 0.1840641477380644, train acc: 97.6154

train loss: 0.1775759693852009, train acc: 97.7030

train loss: 0.17158821093329749, train acc: 97.7448

train loss: 0.16599316247239976, train acc: 97.8692

train loss: 0.16087136379421055, train acc: 97.9822

train loss: 0.15633973515892208, train acc: 98.0063

train loss: 0.15231585096802133, train acc: 97.9403

## Model Testing

In [18]:
# Load the best model
model_ = torch.load(f'./result/ECG_HeartBeat_General.pt')

In [19]:
# Model testing
preds_test = []
target_test = []
with torch.no_grad():
    model_.eval()
    for batch_idx, test_data in enumerate(test_dataloader):
        inputs_t = test_data[0].to(device).float()
        labels_t =  test_data[1].to(device).long().squeeze() 
        
        outputs_t = model_(inputs_t)
        
        _,pred_t = torch.max(outputs_t, dim=1)
        targets_t = labels_t.view_as(pred_t).to(device)

        preds_test.append(pred_t)
        target_test.append(targets_t)
        
    # Combine predictions and labels collected from all batches
    preds_test = torch.cat(preds_test).detach().cpu().numpy()
    target_test = torch.cat(target_test).detach().cpu().numpy()

  return F.conv1d(input, weight, bias, self.stride,


## Model Performance Evaluation

In [20]:
print(classification_report(target_test, preds_test))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99     18117
           1       0.80      0.77      0.79       556
           2       0.95      0.92      0.94      1448
           3       0.72      0.76      0.74       162
           4       0.99      0.97      0.98      1608

    accuracy                           0.98     21891
   macro avg       0.89      0.88      0.89     21891
weighted avg       0.98      0.98      0.98     21891

