# ECG Heartbeat Classification

## Import Necessary Libraries

In [1]:
## Import necessary libraries
import pandas as pd
import numpy as np
import random 
from urllib.parse import quote, unquote
from datetime import timedelta
import joblib

## Import libraries for the model
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm.notebook import trange
from sklearn.metrics import f1_score, classification_report

## Set path for saving model training results  
import os
os.makedirs('./result', exist_ok=True)

## Set Cuda for computation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

## Set random seed
def set_seed(seed_val):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

# Set seed
seed_val = 77
set_seed(seed_val)

cuda


## Selecting Data Columns
* Tag names are loaded in sequential order.
* The process of selecting the required tag names from the tag name list.

In [2]:
# Function to display tag names
def show_column(URL):
    
    # Load tag name data
    df = pd.read_csv(URL)
    
    # Convert to list format
    df = df.values.reshape(-1)
    
    return df.tolist()

In [3]:
## Set parameters for displaying tag names
table = 'ecg'

NAME_URL = f'http://127.0.0.1:5654/db/tql/datahub/api/v1/get-tag-names.tql?table={table}'

## Generate tag name list 
name = show_column(NAME_URL)

In [4]:
name

['mit_bih_test_0',
 'mit_bih_test_1',
 'mit_bih_test_10',
 'mit_bih_test_100',
 'mit_bih_test_101',
 'mit_bih_test_102',
 'mit_bih_test_103',
 'mit_bih_test_104',
 'mit_bih_test_105',
 'mit_bih_test_106',
 'mit_bih_test_107',
 'mit_bih_test_108',
 'mit_bih_test_109',
 'mit_bih_test_11',
 'mit_bih_test_110',
 'mit_bih_test_111',
 'mit_bih_test_112',
 'mit_bih_test_113',
 'mit_bih_test_114',
 'mit_bih_test_115',
 'mit_bih_test_116',
 'mit_bih_test_117',
 'mit_bih_test_118',
 'mit_bih_test_119',
 'mit_bih_test_12',
 'mit_bih_test_120',
 'mit_bih_test_121',
 'mit_bih_test_122',
 'mit_bih_test_123',
 'mit_bih_test_124',
 'mit_bih_test_125',
 'mit_bih_test_126',
 'mit_bih_test_127',
 'mit_bih_test_128',
 'mit_bih_test_129',
 'mit_bih_test_13',
 'mit_bih_test_130',
 'mit_bih_test_131',
 'mit_bih_test_132',
 'mit_bih_test_133',
 'mit_bih_test_134',
 'mit_bih_test_135',
 'mit_bih_test_136',
 'mit_bih_test_137',
 'mit_bih_test_138',
 'mit_bih_test_139',
 'mit_bih_test_14',
 'mit_bih_test_140',
 

## Converting TAG Name Format
* After checking all the Tag Names from the ecg dataset in the previous step, extract only the columns to be used and convert them into parameter format.
* Use tag names related to the mit_bih.

In [5]:
# Set the desired train, test tag names
tags_train = name[188:376]
tags_test = name[:188]

# Wrap each item in the list with single quotes and separate with commas
tags_train = ",".join(f"'{tag}'" for tag in tags_train)
tags_test = ",".join(f"'{tag}'" for tag in tags_test)

# Check the selected train, test tag names
print(tags_train)
print(tags_test)

'mit_bih_train_0','mit_bih_train_1','mit_bih_train_10','mit_bih_train_100','mit_bih_train_101','mit_bih_train_102','mit_bih_train_103','mit_bih_train_104','mit_bih_train_105','mit_bih_train_106','mit_bih_train_107','mit_bih_train_108','mit_bih_train_109','mit_bih_train_11','mit_bih_train_110','mit_bih_train_111','mit_bih_train_112','mit_bih_train_113','mit_bih_train_114','mit_bih_train_115','mit_bih_train_116','mit_bih_train_117','mit_bih_train_118','mit_bih_train_119','mit_bih_train_12','mit_bih_train_120','mit_bih_train_121','mit_bih_train_122','mit_bih_train_123','mit_bih_train_124','mit_bih_train_125','mit_bih_train_126','mit_bih_train_127','mit_bih_train_128','mit_bih_train_129','mit_bih_train_13','mit_bih_train_130','mit_bih_train_131','mit_bih_train_132','mit_bih_train_133','mit_bih_train_134','mit_bih_train_135','mit_bih_train_136','mit_bih_train_137','mit_bih_train_138','mit_bih_train_139','mit_bih_train_14','mit_bih_train_140','mit_bih_train_141','mit_bih_train_142','mit_bih_

## Load ECG Dataset
* Load the entire dataset upon data loading.

    * Label description:

        * N (Normal): 0
            * Normal heartbeat
            * Indicates a normal heart rhythm, reflecting regular electrical activity in the ECG.
        * S (Supraventricular ectopic beat): 1
            * Supraventricular ectopic beat
            * Abnormal heartbeats originating in the atria or atrioventricular node, representing abnormal beats that start from the upper chambers of the heart.
        * V (Ventricular ectopic beat): 2
            * Ventricular ectopic beat
            * Abnormal heartbeats originating from the ventricles, representing fast or abnormal electrical activity in the ventricles.
        * F (Fusion of ventricular and normal beat): 3
            * Fusion of ventricular and normal beat
            * Occurs when a normal heartbeat and a ventricular ectopic beat happen simultaneously, leading to a fused heartbeat appearance.
        * Q (Unknown beat): 4
            * Unknown beat
            * Represents beats that cannot be classified, typically due to insufficient information or difficulty in classifying the specific beat.

In [6]:
# Data loading function
def data_load(table, name, start_time, end_time, timeformat):
    
    # Load data  
    df = pd.read_csv(f'http://127.0.0.1:5654/db/tql/datahub/api/v1/select-rawdata.tql?table={table}&name={name}&start={start_time}&end={end_time}&timeformat={timeformat}')
    
    # Convert to data grouped by the time
    df = df.pivot_table(index='TIME', columns='NAME', values='VALUE', aggfunc='first').reset_index()
    
    # Set TIME column
    df['TIME'] = pd.to_datetime(df['TIME'], format='%Y-%m-%d %H:%M:%S')
    
    # Determine the label column dynamically (for train and test sets)
    label_col = [col for col in df.columns if col.endswith('_label')][0]

    # Sort column names in numerical order, excluding the 'TIME' and the dynamic label column
    df = df.reindex(['TIME'] + sorted([col for col in df.columns if col not in ['TIME', label_col] and col.split('_')[-1].isdigit()], key=lambda x: int(x.split('_')[-1])) + [label_col], axis=1)
    
    # Convert label column data to integer type
    df[label_col] = df[label_col].astype(int)

    return df

In [None]:
# Data time loading function
def time_data_load(table, name, start_time, end_time, timeformat):
    
    target = 'TIME'
    
    # Load the data 
    df = pd.read_csv(f"http://127.0.0.1:5654/db/tql/datahub/api/v1/select-rawdata.tql?target={target}&table={table}&name={name}&start={start_time}&end={end_time}&timeformat={timeformat}")
    
    # Create a dummy value column for resampling
    df['value'] = 0
    
    # Perform resampling
    df['TIME'] = pd.to_datetime(df['TIME'])
    df.set_index('TIME', inplace=True)
    # Determine resampling units based on the data and perform resampling
    df = df.resample('1T').mean()
    
    # Remove missing values
    df = df.dropna()
    
    # Remove the dummy value column
    df = df.drop(['value'], axis=1)
    
    return df

In [8]:
# Time update function
# Update start and end times based on batch size
def update_time(time_df, start_time, batch_size):
    
    # Calculate how many data points need to be loaded
    time = batch_size -1 
    
    # Check the index number of the current time
    # If not found, set to the first index as there is no data for the current time
    try:
        index_now = time_df.index.get_loc(start_time)
    except KeyError:
        index_now = 0
    
    # Set the end time for the batch data based on the current time 
    end_time_ = str(time_df.index[index_now + time] + timedelta(seconds=1))
    
    # Set the index number for the next start time
    index_next = index_now + time + 1
    
    # Set the next start time
    next_start_time_ = str(time_df.index[index_next])
    
    # URL encoding
    start_time_ = quote(start_time)
    end_time_ = quote(end_time_)
    next_start_time_ = quote(next_start_time_)
    
    return start_time_, end_time_, next_start_time_, index_next

In [9]:
# Function to calculate the maximum and minimum values for selected tag names
def set_minmax_value(table, name, start_time_train, end_time_train):
    
    # URL encoding
    start = quote(start_time_train)
    end = quote(end_time_train)
    
    # Load Min, Max data
    df_ = pd.read_csv(f'http://127.0.0.1:5654/db/tql/datahub/api/v1/select-scale.tql?table={table}&name={name}&start={start}&end={end}')
    
    # Set Min, Max values
    Min = df_.iloc[:,1:-1].T
    Max = df_.iloc[:,2:].T
    
    return Min, Max 

In [10]:
# Function to calculate Mean values for selected tag names
def set_mean_value(table, name, start_time_train, end_time_train, len):
    
    # URL encoding
    start = quote(start_time_train)
    end = quote(end_time_train)

    # Parameter settings
    timeunit = 'min'
    func = 'Sum' 
    timesize = 10

    # Load Sum data
    df = pd.read_csv(f'http://127.0.0.1:5654/db/tql/datahub/api/v1/select-rollup.tql?timeunit={timeunit}&timesize={timesize}&func={func}&table={table}&name={name}&start={start}&end={end}')

    # Convert to data grouped by the time
    df = df.pivot_table(index='mtime', columns='name', values='Sum(value)', aggfunc='first').reset_index()

    # Set TIME column
    df['TIME'] = pd.to_datetime(df['mtime'], format='%Y-%m-%d %H:%M:%S')

    # Create DataFrame
    df = pd.DataFrame(df.iloc[:,1:-2].sum(), columns=['sum']).reset_index()

    # Calculate Global Mean
    df['mean'] = df['sum'] / len

    # Remove the Sum(value) column
    df = df.drop('sum', axis=1)
    df = df.iloc[:, -1:].values.reshape(-1)

    return df

## Data Preprocessing

* 1 MinMax Scaling
* 2 PCA

### 1. Min-Max Scaling Setup
* Set up a Min-Max Scaler that uses the maximum and minimum values, as the entire dataset is not loaded due to the process concept.

In [11]:
# Definition of the MinMaxScaler class
class MinMaxScaler_custom:
    def __init__(self):
        self.min_ = None
        self.max_ = None

    # Set scale values based on the specified parameters
    def transform(self, X, min_values, max_values):
        X = np.array(X)
        self.min_ = np.array(min_values)
        self.max_ = np.array(max_values)
        
        if self.min_ is None or self.max_ is None:
            raise ValueError("Min and Max values are not set.")
        
        scale = (self.max_ - self.min_)
        if np.any(scale == 0):
            raise ValueError("Min and Max values are the same, resulting in a scale of 0.")
        
        return (X - self.min_) / scale
    
    # Normalize data based on calculated scale values
    def fit_transform(self, X, min_values, max_values):
        """Set parameters and then transform X"""
        return self.transform(X, min_values, max_values)

    # Inverse the normalized data back to original values
    def inverse_transform(self, X_scaled):
        """Inverse the transformation and return original values"""
        if self.min_ is None or self.max_ is None:
            raise ValueError("Min and Max values are not set.")
        
        X_scaled = np.array(X_scaled)
        scale = (self.max_ - self.min_)
        
        return X_scaled * scale + self.min_

### 2. PCA(Principal Component Analysis) Setup

In [12]:
class PCA_custom:
    def __init__(self, n_components):
        self.n_components = n_components
        self.mean_ = None
        self.components_ = None

    def fit(self, X, mean=None):
        # Calculate the mean value of the data or use the provided mean value
        if mean is None:
            self.mean_ = np.mean(X, axis=0)
        else:
            self.mean_ = mean
        # Center the data based on the mean
        X_centered = X - self.mean_
        # Calculate the covariance matrix
        cov_matrix = np.cov(X_centered, rowvar=False)
        # Calculate eigenvalues and eigenvectors
        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
        # Sort the eigenvalues in descending order
        sorted_indices = np.argsort(eigenvalues)[::-1]
        self.components_ = eigenvectors[:, sorted_indices][:, :self.n_components]

    def transform(self, X):
        # Transform the data using the principal components
        X_centered = X - self.mean_
        return np.dot(X_centered, self.components_)

    def fit_transform(self, X, mean=None):
        self.fit(X, mean)
        return self.transform(X)

## Model Configuration
* Using ResNet1d model.

In [13]:
## ResNet 1D Model Setup
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out

class ResNet1D(nn.Module):
    def __init__(self, block, layers, num_classes=4):
        super(ResNet1D, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv1d(1, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv1d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(out_channels),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [14]:
# Model configuration parameters
# Learning rate
learning_rate = 0.01

# Model configuration
model = ResNet1D(ResidualBlock, [2, 2, 2, 2], num_classes=5).to(device)

# Adjust weights for each class
class_weights = torch.tensor([1.0, 5.0, 1.0, 5.0, 1.0]).to(device)

# Configure loss function and optimizer
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Check the model architecture
print(model)

ResNet1D(
  (conv1): Conv1d(1, 64, kernel_size=(7,), stride=(2,), padding=(3,))
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool1d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResidualBlock(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), paddi

## Model Training
* Proceed by loading only the necessary batch size of data for training.
* Save the model with the highest F1 score based on the validation data during training.

In [15]:
# Model training function
def train(table, name_train, timeformat, model, batch_size, epochs, scaler, Min, Max, Mean, time_df_train, time_df_valid):
    
    # Initialize training loss & accuracy
    train_loss = []
    train_acc = []

    # Initialize best F1 Score value
    best_f1= 0

    # Start model training
    for epoch in epochs:
        model.train()
        
        running_loss = 0.0
        total_step = 0
        correct = 0
        total=0
        
        # Set initial start time
        start_time_ = str(time_df_train.index[0])
        
        # Set end time
        end_time_train = str(time_df_train.index[-1])
        
        # Use a while loop to call data
        while start_time_ < end_time_train:
            
            # Set the time for loading data based on the batch size 
            start_time_, end_time_, next_start_time_, index_next= update_time(time_df_train, start_time_, batch_size)
        
            # Load batch data 
            data = data_load(table, name_train, start_time_, end_time_, timeformat)
            
            # Apply MinMax scaler
            data_scaled = scaler.fit_transform(data.iloc[:, 1:-1].values, Min.iloc[:,:-1], Max.iloc[:,:-1])
            
            # Check if PCA model already exists (For the first iteration)
            if start_time_ == quote(str(time_df_train.index[0])):
                
                # Set the desired number of principal components
                pca = PCA_custom(n_components=35)
                
                # Fit the PCA model
                pca.fit(data_scaled, Mean)
                joblib.dump(pca, f'./result/pca.pkl')
            
            # For subsequent iterations, load the saved PCA model    
            else:  
                
                pca = joblib.load(f'./result/pca.pkl')
                
            # Apply PCA
            data_scaled = pca.transform(data_scaled)
            
            # Set DataFrames
            data_ = pd.DataFrame(data_scaled)  
            data_['label'] = data.iloc[:,-1:].values
            
            # Print if the loaded data is empty
            if len(data_) == 0:
                print("No data available.")
            
            # Input the data into the model when it accumulates to the batch size
            if len(data_) == batch_size:
            
                # Check total batch count
                total_step += 1
                
                # Convert data to numpy arrays
                input_data = np.array(data_.iloc[:, :-1])
                input_target = np.array(data_.iloc[:, -1:])
                
                # Convert data to Tensor
                input_data = torch.tensor(input_data, dtype=torch.float32).to(device).float()
                input_target = torch.tensor(input_target, dtype=torch.float32).to(device).long().squeeze()
                
                # Optimize the optimizer
                optimizer.zero_grad()
                
                # Input to the model
                outputs = model(input_data.unsqueeze(1))
        
                # Calculate loss
                loss = criterion(outputs, input_target)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                
                # Set label predictions
                _,pred = torch.max(outputs, dim=1)
                correct += torch.sum(pred==input_target).item()
                total += input_target.size(0)   
                
                # Reset batch data
                data_ = 0

            # Set the next start time   
            start_time_ = unquote(next_start_time_)
            
            # Prevent fetching beyond the last time
            if index_next + batch_size >= len(time_df_train):
                break
            
        train_acc.append(100 * correct / total)
        train_loss.append(running_loss/total_step)
        print(f'\ntrain loss: {np.mean(train_loss)}, train acc: {(100 * correct / total):.4f}')
        
        # Perform validation at the end of each epoch and save the model with the best performance
        with torch.no_grad():
            
            model.eval()
            
            preds_v = []
            targets_v = []
                
            # Set initial start time
            start_time_v = str(time_df_valid.index[0])
            
            # Set end time
            end_time_valid = str(time_df_valid.index[-1])
            
            # Use a while loop to call data 
            while start_time_v < end_time_valid:
                
                # Set the time for loading data based on the batch size
                start_time_v, end_time_v, next_start_time_v, index_next_v = update_time(time_df_valid, start_time_v, batch_size)
                
                # Load batch data 
                data_v = data_load(table, name_train, start_time_v, end_time_v, timeformat)
                
                # Apply MinMax scaler
                data_scaled_v = scaler.fit_transform(data_v.iloc[:, 1:-1].values, Min.iloc[:,:-1], Max.iloc[:,:-1])
                
                # Apply PCA
                data_scaled_v = pca.transform(data_scaled_v)
                
                # Set DataFrames
                data_ = pd.DataFrame(data_scaled_v)  
                data_['label'] = data_v.iloc[:,-1:].values
                
                # Print if the loaded data is empty
                if len(data_) == 0:
                    print("No data available.")
                
                # Input the data into the model when it accumulates to the batch size
                if len(data_) == batch_size:
                    
                    # Convert data to numpy arrays
                    input_data_v = np.array(data_.iloc[:,:-1])
                    input_target_v = np.array(data_.iloc[:, -1:])

                    # Convert data to Tensor
                    input_data_v = torch.tensor(input_data_v, dtype=torch.float32).to(device).float()
                    input_target_v = torch.tensor(input_target_v, dtype=torch.float32).to(device).long().squeeze()
                    
                    # Input to the model
                    outputs_v = model(input_data_v.unsqueeze(1))
                    
                    # Set label predictions 
                    _,pred_v = torch.max(outputs_v, dim=1)
                    target_v = input_target_v.view_as(pred_v)

                    preds_v.append(pred_v)
                    targets_v.append(target_v)
                    
                    # Reset batch data
                    data_ = 0
                
                # Set the next start time    
                start_time_v = unquote(next_start_time_v)
                
                # Prevent fetching beyond the last time
                if index_next_v + batch_size >= len(time_df_valid):
                    break
            
            # Combine predictions and labels collected from all batches
            preds_v = torch.cat(preds_v).detach().cpu().numpy()
            targets_v = torch.cat(targets_v).detach().cpu().numpy()
            
            f1score = f1_score(targets_v, preds_v,  average='macro')
            if best_f1 < f1score:
                best_f1 = f1score
                # Save the best model 
                with open("./result/ECG_HeartBeat_New_Batch.txt", "a") as text_file:
                    print('epoch=====',epoch, file=text_file)
                    print(classification_report(targets_v, preds_v, digits=4), file=text_file)
                torch.save(model, f'./result/ECG_HeartBeat_New_Batch.pt') 
            epochs.set_postfix_str(f"epoch = {epoch},  f1_score = {f1score}, best_f1 = {best_f1}")
            
    return model, scaler, pca

In [16]:
########################################### Training Parameter Settings ################################################
# Set tag table name
table = 'ecg'
# Set tag name
name_train = quote(tags_train, safe=":/")
# Set time format
timeformat = 'default'
# Set the start time for the train data
start_time_train = '2024-10-14 00:00:00'
# Set the end time for the train data
end_time_train = '2024-12-07 17:16:00'
# Set train batch size
batch_size = 1024
# Set number of epochs
epochs = trange(100, desc='training')
# Set Min, Max value 
Min, Max = set_minmax_value(table, name_train, start_time_train, end_time_train)
# Set scalers
scaler = MinMaxScaler_custom()
# Load training time list 
time_df_train = time_data_load(table, name_train, quote(start_time_train), quote(end_time_train), timeformat)
# Set Mean
Mean = set_mean_value(table, name_train, start_time_train, end_time_train, len(time_df_train))
########################################### validation Parameter Settings ################################################
# Set the start time for the validation data
start_time_valid = '2024-12-07 17:17:00'
# Set the end time for the validation data
end_time_valid = '2024-12-13 19:12:00'
# Load validation time list
time_df_valid = time_data_load(table, name_train, quote(start_time_valid), quote(end_time_valid), timeformat)
########################################### Proceed with training ################################################
model, scaler, pca = train(table, name_train, timeformat, model, batch_size, epochs, scaler, Min, Max, Mean, time_df_train, time_df_valid)

training:   0%|          | 0/100 [00:00<?, ?it/s]

  return F.conv1d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass



train loss: 0.5912496722058246, train acc: 88.8646

train loss: 0.4353831749605505, train acc: 94.4028

train loss: 0.3640984951665527, train acc: 95.6993

train loss: 0.3214868417401847, train acc: 96.1708

train loss: 0.29232934476121475, train acc: 96.4420

train loss: 0.27079266579331535, train acc: 96.4767

train loss: 0.252842793829347, train acc: 96.8159

train loss: 0.2379818110187587, train acc: 96.9752

train loss: 0.225365149401869, train acc: 97.1577

train loss: 0.21424199073624456, train acc: 97.2682

train loss: 0.20469525799101762, train acc: 97.3723

train loss: 0.19621081922709813, train acc: 97.4147

train loss: 0.18859040881186603, train acc: 97.6383

train loss: 0.18149913185367123, train acc: 97.6742

train loss: 0.174977035758396, train acc: 97.8117

train loss: 0.1690489673178251, train acc: 97.7950

train loss: 0.16351410534936392, train acc: 97.9556

train loss: 0.15833528624161294, train acc: 98.0636

train loss: 0.1538212116049226, train acc: 97.9801

train

## Model Testing

In [17]:
# Model testing function  
def test(table, name_test, timeformat, model, batch_size, scaler, Min, Max, pca, time_df_test):
    
            with torch.no_grad():
                
                model.eval()
                
                # Initial settings 
                preds_t = []
                targets_t = []
                    
                # Set the initial start time
                start_time_t = str(time_df_test.index[0])
                
                # Set the end time
                end_time_test = str(time_df_test.index[-1])
                
                # Use a while loop to call data  
                while start_time_t < end_time_test:
                    
                    # Set the time for loading data based on the batch size
                    start_time_t, end_time_t, next_start_time_t, index_next_t = update_time(time_df_test, start_time_t, batch_size)
                    
                    # Load batch data
                    data_t = data_load(table, name_test, start_time_t, end_time_t, timeformat)
                    
                    # Apply MinMax scaler
                    data_scaled_t = scaler.fit_transform(data_t.iloc[:, 1:-1].values, Min.iloc[:,:-1], Max.iloc[:,:-1])

                    # Apply PCA
                    data_scaled_t = pca.transform(data_scaled_t)
                    
                    # Set DataFrames
                    data_ = pd.DataFrame(data_scaled_t)  
                    data_['label'] = data_t.iloc[:,-1:].values

                    # Print if the loaded data is empty
                    if len(data_) == 0:
                        print("No data available.")
                    
                    # Input the data into the model when it accumulates to the batch size
                    if len(data_) == batch_size:
                        
                        # Convert data to numpy arrays
                        input_data_t = np.array(data_.iloc[:,:-1])
                        input_target_t = np.array(data_.iloc[:, -1:])

                        # Convert data to Tensor
                        input_data_t = torch.tensor(input_data_t, dtype=torch.float32).to(device).float()
                        input_target_t = torch.tensor(input_target_t, dtype=torch.float32).to(device).long().squeeze()
                        
                        # Input to the model
                        outputs_t = model(input_data_t.unsqueeze(1))
                        
                        # Set label predictions
                        _,pred_t = torch.max(outputs_t, dim=1)
                        target_t = input_target_t.view_as(pred_t)
        
                        preds_t.append(pred_t)
                        
                        targets_t.append(target_t)
                        
                        # Reset batch data
                        data_ = []
                        
                    # Set the next start time   
                    start_time_t = unquote(next_start_time_t)
                    
                    # Prevent fetching beyond the last time
                    if index_next_t + batch_size >= len(time_df_test):
                        break
                        
                # Combine predictions and labels collected from all batches
                preds_t = torch.cat(preds_t).detach().cpu().numpy()
                targets_t = torch.cat(targets_t).detach().cpu().numpy()
                    
            return targets_t, preds_t

In [18]:
########################################### Test Parameter Settings ################################################
# Load the best model
model_ = torch.load(f'./result/ECG_HeartBeat_New_Batch.pt') 
# Set the start time for the test data
start_time_test = '2024-12-13 19:13:00'
# Set the end time for the test data
end_time_test = '2024-12-29 00:03:00'
# Set the test batch size
batch_size_test = 1024
# Set tag name
name_test = quote(tags_test, safe=":/")
# Load the test time list
time_df_test = time_data_load(table, name_test, quote(start_time_test), quote(end_time_test), timeformat)
pca = joblib.load(f'./result/pca.pkl')
######################################## Proceed with testing #############################################
targets_t, preds_t = test(table, name_test, timeformat, model_, batch_size, scaler, Min, Max, pca, time_df_test)

## Model Performance Evaluation

In [19]:
print(classification_report(targets_t, preds_t))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99     17805
           1       0.79      0.77      0.78       549
           2       0.95      0.91      0.93      1421
           3       0.77      0.75      0.76       159
           4       0.99      0.97      0.98      1570

    accuracy                           0.98     21504
   macro avg       0.90      0.88      0.89     21504
weighted avg       0.98      0.98      0.98     21504

