In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import wfdb
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTENC
from sklearn.model_selection import StratifiedKFold


In [2]:
def collect_and_label(dataset): 
    

    df = pd.read_csv(dataset)
    alpha = df['scp_codes'].str.split("'").str[1].str[-2:]=='MI'  
    beta = df['scp_codes'].str.split("'").str[1]=='NORM'       
    df = df[alpha|beta]
    df['label'] = df['scp_codes'].str.split("'").str[1]      
      
    inst_c1 = df[df['label'] == 'NORM']
    inst_c1 = inst_c1.sample(n = 3500, random_state = 1)
    
    inst_c2 = df[df['label'] == 'IMI']
    inst_c2 = inst_c2.sample(n = 2000, random_state = 1)
    

    df_new = pd.concat([inst_c1, inst_c2], ignore_index = True)
    df_new = df_new.sample(frac = 1, random_state=42)
    
    return  df_new

In [3]:
def div_and_label(dataframe):  
    
    X = dataframe[['ecg_id', 'filename_hr']]   
    X = X.to_numpy()
    y = dataframe['label']
    encoder = LabelEncoder()
    encoder.fit(y)
    encoded_y = encoder.transform(y)
    
    return (X, encoded_y)

In [4]:
alpha = collect_and_label("C:/Users/Pushpam/Downloads/ptbxl_database.csv")  
gamma0, gamma1 = div_and_label(alpha)
gamma0 = gamma0[:,1]

print(gamma0.shape)
print(gamma1.shape)

(5500,)
(5500,)


In [5]:
print(gamma0)

['records500/06000/06288_hr' 'records500/18000/18391_hr'
 'records500/04000/04140_hr' ... 'records500/19000/19682_hr'
 'records500/18000/18181_hr' 'records500/07000/07602_hr']


In [6]:
gamma1

array([0, 1, 0, ..., 0, 0, 1])

In [7]:
from scipy import signal
import matplotlib.pyplot as plt

In [8]:
import wfdb
import numpy as np
import matplotlib.pyplot as plt
import neurokit2 as nk
from scipy.signal import butter, filtfilt, detrend

directory = 'D:/Internship/MIDataset/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1'

X = []
for itr in range(gamma0.shape[0]):
    record_name = str(gamma0[itr])
    
    signal, meta_val = wfdb.rdsamp(directory + '/' + record_name)
    value = signal.T
    ecg_signals = value

    X.append(detrend(ecg_signals))
    

In [9]:
X = np.array(X)
X.shape

(5500, 12, 5000)

In [10]:
y_data = gamma1;
print(y_data.shape)

(5500,)


In [11]:
# windowing X in xnew
# xnew = np.zeros((5*(X.shape[0]),12,1000))
xold = X
yold = y_data
xnew = []
ynew = []
patient_ids = []
for i in range(X.shape[0]):
    xnew.append(X[i,:,0:1000])
    xnew.append(X[i,:,1000:2000])
    xnew.append(X[i,:,2000:3000])
    xnew.append(X[i,:,3000:4000])
    xnew.append(X[i,:,4000:5000])
    for j in range(5):
        ynew.append(y_data[i])
        patient_ids.append(i)
X = np.array(xnew)
y_data = np.array(ynew)

In [12]:
import numpy as np
from scipy.signal import coherence
from glob import glob
import scipy.io as sio
import scipy.signal as sig

def hilphase(y1,y2):
    sig1_hill=sig.hilbert(y1)
    sig2_hill=sig.hilbert(y2)
    pdt=(np.inner(sig1_hill,np.conj(sig2_hill))/(np.sqrt(np.inner(sig1_hill,
               np.conj(sig1_hill))*np.inner(sig2_hill,np.conj(sig2_hill)))))
    phase = np.angle(pdt)
    return phase

def hilphaselag(y1,y2):
    sig1_hill=sig.hilbert(y1)
    sig2_hill=sig.hilbert(y2)
    pdt=sum(np.sign(np.angle(sig1_hill[:])-np.angle(sig2_hill[:])))/len(sig1_hill)
    phase = abs(pdt)
    return phase

cross_corr_matrices_list = []
mae_matrices_list = []
rmse_matrices_list = []
coherence_matrices_list = []
# feature_1_matrices_list = []
# feature_2_matrices_list = []

for patient_data in X:
    cross_corr_matrix = np.zeros((12, 12))
    mae_matrix = np.zeros((12, 12))
    rmse_matrix = np.zeros((12, 12))
    coherence_matrix = np.zeros((12, 12))
    feature_1_matrix = np.zeros((12, 12))
    feature_2_matrix = np.zeros((12, 12))
    
    for i in range(12):
        for j in range(i, 12):
            lead_i = patient_data[i]
            lead_j = patient_data[j]
            
            cross_corr = np.corrcoef(lead_i, lead_j)[0, 1]
            
            mae = np.mean(np.abs(lead_i - lead_j))
            rmse = np.sqrt(np.mean((lead_i - lead_j) ** 2))
            
            f, coh = coherence(lead_i, lead_j)  
            coherence_value = np.mean(coh)  # Store the average coherence value
            
#             feature_1 = hilphase(lead_i, lead_j)
#             feature_2 = hilphaselag(lead_i, lead_j)
            
            cross_corr_matrix[i, j] = cross_corr
            cross_corr_matrix[j, i] = cross_corr
            
            mae_matrix[i, j] = mae
            mae_matrix[j, i] = mae
            rmse_matrix[i, j] = rmse
            rmse_matrix[j, i] = rmse
            
            coherence_matrix[i, j] = coherence_value
            coherence_matrix[j, i] = coherence_value
            
#             feature_1_matrix[i, j] = feature_1
#             feature_1_matrix[j, i] = feature_1
            
#             feature_2_matrix[i, j] = feature_2
#             feature_2_matrix[j, i] = feature_2            
    
    # Append the matrices to the corresponding lists
    cross_corr_matrices_list.append(cross_corr_matrix)
    
    mae_matrices_list.append(mae_matrix)
    rmse_matrices_list.append(rmse_matrix)
    
    coherence_matrices_list.append(coherence_matrix)
    
#     feature_1_matrices_list.append(feature_1_matrix)
#     feature_2_matrices_list.append(feature_2_matrix)

In [28]:
def create_K(A_i, B_i, C_i, D_i):
    top_row = np.concatenate((A_i, B_i), axis=1)
    bottom_row = np.concatenate((C_i, D_i), axis=1)
    return np.concatenate((top_row, bottom_row), axis=0)
#     return np.concatenate((A_i, D_i), axis=1)

K_matrices=[]
for i in range(X.shape[0]):
    K_i = create_K(cross_corr_matrices_list[i], feature_1_matrices_list[i], feature_2_matrices_list[i], coherence_matrices_list[i])
    K_matrices.append(K_i)

In [29]:
t = np.array(K_matrices).reshape(X.shape[0],24,24,1)
print(t.shape)

(27500, 24, 12, 1)


In [30]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import scale

In [31]:
# X_train, X_test, y_train, y_test = train_test_split(t, y_data,
#                                                     test_size=0.3,
#                                                     random_state=42)

In [32]:
# X_train.shape

In [33]:
from tensorflow.keras.layers import Flatten, Activation
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D, AvgPool2D

In [34]:
# K.clear_session()


# model = Sequential()

# model.add(Conv2D(10, (3, 3), input_shape=(24, 24, 1)))
# model.add(MaxPool2D(pool_size=(2, 2)))
# model.add(Flatten())

# model.add(Dense(128, activation='relu'))
# model.add(Dense(10, activation='relu'))
# model.add(Dense(1, activation='sigmoid'))

# model.compile(loss='BinaryCrossentropy',
#               optimizer='rmsprop',
#               metrics=['accuracy'])

In [35]:
# model.summary()

In [36]:
# y_train[0:10]

In [37]:
# model.fit(X_train, y_train, batch_size=16,
#           epochs=20, verbose=1, validation_split=0.25)

In [38]:
# model.evaluate(X_test, y_test)

In [39]:
# predictions = model.predict(X_test)
# y_pred = np.round(predictions).astype(int).transpose()
# print(y_pred[0,0:10])

In [40]:
patient_ids = np.array(patient_ids)

In [41]:
# Group-k-fold

from sklearn.metrics import accuracy_score

from sklearn.model_selection import GroupKFold

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GroupKFold

import tensorflow as tf
class SilentHistory(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        pass

gkf = GroupKFold(n_splits=5)

sum_accuracy = 0
for train_idx, test_idx in gkf.split(t, y_data, groups=patient_ids):
    X_train, X_test = t[train_idx], t[test_idx]
    y_train, y_test = y_data[train_idx], y_data[test_idx]

    K.clear_session()


    model = Sequential()

    model.add(Conv2D(10, (3, 3), input_shape=(24, 24, 1)))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Flatten())

    model.add(Dense(128, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='BinaryCrossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    history = model.fit(X_train, y_train,
                    batch_size=16,
                    epochs=20,
                    verbose=0,  # Set verbose to 0 to suppress epoch logging
                    validation_split=0.25,
                    callbacks=[SilentHistory()])
    
    predictions = model.predict(X_test)
    y_pred = np.round(predictions).astype(int).transpose()
    y_pred = y_pred[0]

    y_pred_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_pred_majority.append(majority_vote)

    y_test_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_test_majority.append(majority_vote)

    accuracy = accuracy_score(y_test_majority, y_pred_majority)
    sum_accuracy += accuracy
    print(confusion_matrix(y_test_majority, y_pred_majority))
    print(accuracy)

average_accuracy = sum_accuracy / 5
print("Average Accuracy:", average_accuracy)

[[273 121]
 [ 58 648]]
0.8372727272727273
[[312 120]
 [ 77 591]]
0.8209090909090909
[[288 115]
 [ 77 620]]
0.8254545454545454
[[230 157]
 [ 43 670]]
0.8181818181818182
[[272 112]
 [ 77 639]]
0.8281818181818181
Average Accuracy: 0.826
