In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
import wfdb
import pandas as pd
import sys
import torch
import os
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import ast
import scipy.stats as stats
import statsmodels
import HiguchiFractalDimension as hfd
import pywt
from statsmodels.tsa.ar_model import AutoReg
import neurokit2 as nk
import wandb
from ssqueezepy import ssq_cwt
import cv2
import tqdm


Function to compute AR Coefficient,Wavelet variance,shannon entropy,Fractal Dimension

In [3]:
def compute_features(signal):
    # Auto-regressive coefficients
    ar_order = 20
    ar_model = statsmodels.tsa.ar_model.AutoReg(signal,lags=20)
    ar_fit = ar_model.fit()
    ar_coeffs = ar_fit.params

    # Shannon's entropy
    prob_density, _ = np.histogram(signal, bins=256, density=True)
    prob_density = prob_density[prob_density > 0]  # Remove zero probabilities
    entropy = -np.sum(prob_density * np.log2(prob_density))

    # Wavelet variance (using 'db2')
    coeffs = pywt.wavedec(signal, 'db2')
    wavelet_var = np.var(coeffs[-1])  # Variance of the last detail coefficients

    # Higuchi fractal dimension
    fractal_dim = hfd.hfd(signal)

    # Mean
    mean_val = np.mean(signal)

    # Kurtosis
    kurtosis_val = stats.kurtosis(signal)

    # Skewness
    skewness_val = stats.skew(signal)
    # Combine all features into a single vector
    features = np.concatenate([
        ar_coeffs,
        [entropy],
        [wavelet_var],
        [fractal_dim],
        [mean_val],
        [kurtosis_val],
        [skewness_val]
    ])

    return features


In [5]:
#definition of data path and excel file path
path = '/home/abhishek/rashad_internship/Physionet/ptb-xl-1.0.3/'
excel = '/home/abhishek/rashad_internship/Physionet/ptb-xl-1.0.3/ptbxl_database.csv'

In [6]:
#First 1000 files are read.
df = pd.read_csv(excel)
df = df[:2000]

Alexnet Model Definition

In [8]:
import torch
import torch.nn as nn
import torchvision.models as models

# Load the pre-trained AlexNet model
alexnet = models.alexnet(pretrained=True)

# Modify the model to extract features
# We will keep all layers except the final classifier layers
class AlexNetFeatureExtractor(nn.Module):
    def __init__(self):
        super(AlexNetFeatureExtractor, self).__init__()
        self.features = alexnet.features
        self.avgpool = alexnet.avgpool
        # self.classifier = nn.Sequential(*list(alexnet.classifier.children())[:-1]) # Remove the last classifier layer

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        # x = self.classifier(x)
        return x

# Instantiate the modified AlexNet model
model = AlexNetFeatureExtractor()

Function to convert 1D signal to spectrogram

In [9]:

def onedim_to_twodim(data):
    # Perform Continuous Wavelet Transform (CWT) using Morlet wavelet
    # Replace with your actual CWT function (ssq_cwt) or use another STFT method
    Txo, _, Wxo, scales_xo, _ = ssq_cwt(data, 'morlet')
    Wxo /= np.sqrt(scales_xo)
    
    # Plot the CWT coefficients and save as an image
    plt.imshow(np.abs(Wxo), aspect='auto', cmap='jet')
    plt.axis('off')  # Remove axes for a cleaner image
    plt.savefig('temp_spectrogram.png', bbox_inches='tight', pad_inches=0)
    plt.close()

    # Load the saved image
    image = cv2.imread('temp_spectrogram.png')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

    # Resize the image to 224x224
    resized_image = cv2.resize(image, (224, 224))

    # Convert to tensor and add batch dimension
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard normalization for ImageNet
    ])
    tensor_image = transform(resized_image).unsqueeze(0)  # Add batch dimension
    
    return tensor_image


Feature extraction of signal using Alexnet 

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
x_data = []
y_data = []
count = 0

for idx, row in df.iterrows():
        # Check if we have reached the desired number of data points
        
        
        # Read the signal data
    filename = row['filename_hr']
    signal, _ = wfdb.rdsamp(path + filename, channels=[0])
    signal = signal.flatten().astype(np.float32)
    # Calculate features
    im = onedim_to_twodim(signal)
    # features = compute_features(signal)
    im = im.to(device)
    with torch.no_grad():  # Disable gradient calculation for inference
        features = model(im)
    features = features.cpu()
    features = features.numpy()

    x_data.append(features)

        # Determine the label
    scp_code_dict = ast.literal_eval(row['scp_codes'])
    first_key = max(scp_code_dict, key=scp_code_dict.get)
    label = 0 if first_key == 'NORM' else 1
    y_data.append(label)

  w = np.imag(dWx / Wx) / (2*pi)


In [12]:
x_data_np = np.array(x_data)
y_data_np = np.array(y_data)

In [None]:
#Reshaping the signal from 1000,1,9216 to 1000,9216
x_data_np = np.reshape(x_data_np, (1000, 9216))

Selecting Top 1000 features using PCC

In [None]:
x_train_df = pd.DataFrame(x_data_np)
y_train_series = pd.Series(y_data_np)

# Compute Pearson correlation for each feature
correlations = x_train_df.apply(lambda col: col.corr(y_train_series))

# Select the top k features based on absolute correlation
k = 1000  # Number of top features to select
top_k_features = correlations.abs().sort_values(ascending=False).head(k).index

# Extract the top k features from the original data
x_train_selected = x_train_df.iloc[:, top_k_features].values

# Convert the selected features back to tensor if needed
# x_train_selected_tensor = torch.tensor(x_train_selected)

Feature Extraction using Handcrafted features

In [43]:
featureset2 = []
y_data_2 = []


for idx, row in df.iterrows():
        # Check if we have reached the desired number of data points
        
        
        # Read the signal data
    filename = row['filename_hr']
    signal, _ = wfdb.rdsamp(path + filename, channels=[0])
    signal = signal.flatten().astype(np.float32)
    # Calculate features
    # im = onedim_to_twodim(signal)
    features = compute_features(signal)
    featureset2.append(features)

        # Determine the label
    scp_code_dict = ast.literal_eval(row['scp_codes'])
    first_key = max(scp_code_dict, key=scp_code_dict.get)
    label = 0 if first_key == 'NORM' else 1
    y_data_2.append(label)

In [44]:
featureset2_np = np.array(featureset2)
y_data_2_np = np.array(y_data_2)

Combining HF and CNN Features

In [54]:
combined_array = np.concatenate((x_train_selected, featureset2_np), axis=1)

Classifier

In [55]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Convert to pandas DataFrame for easier manipulation
x_train_selected_df = pd.DataFrame(combined_array)
# y_train_series = pd.Series(y_data_np)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    x_train_selected_df, y_train_series, test_size=0.2, random_state=42
)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train an SVM classifier
svm = SVC()
svm.fit(X_train, y_train)

# Predict on the test set
y_pred = svm.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.7650
