In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import wfdb
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTENC
from sklearn.model_selection import StratifiedKFold


In [2]:
def collect_and_label(dataset): 
    

    df = pd.read_csv(dataset)
    alpha = df['scp_codes'].str.split("'").str[1].str[-2:]=='MI'  
    beta = df['scp_codes'].str.split("'").str[1]=='NORM'       
    df = df[alpha|beta]
    df['label'] = df['scp_codes'].str.split("'").str[1]      
      
    inst_c1 = df[df['label'] == 'NORM']
    inst_c1 = inst_c1.sample(n = 1000, random_state = 1)
    
#     inst_c2 = df[df['label'] == 'ALMI']
#     inst_c2 = inst_c2.sample(n = 164, random_state = 1)

    inst_c3 = df[df['label'] == 'ILMI']
    inst_c3 = inst_c3.sample(n = 393, random_state = 1)

    df_new = pd.concat([inst_c1, inst_c3], ignore_index = True)
    df_new = df_new.sample(frac = 1, random_state=42)
    
    return  df_new

In [3]:
def div_and_label(dataframe):  
    
    X = dataframe[['ecg_id', 'filename_hr']]   
    X = X.to_numpy()
    y = dataframe['label']
    encoder = LabelEncoder()
    encoder.fit(y)
    encoded_y = encoder.transform(y)
    
    return (X, encoded_y)


In [4]:
alpha = collect_and_label("C:/Users/Pushpam/Downloads/ptbxl_database.csv")  
gamma0, gamma1 = div_and_label(alpha)
gamma0 = gamma0[:,1]

print(gamma0.shape)
print(gamma1.shape)

(1393,)
(1393,)


In [5]:
y_data = gamma1;
print(y_data)

[1 0 1 ... 0 1 0]


In [6]:
import wfdb
import numpy as np
import matplotlib.pyplot as plt
import neurokit2 as nk
from scipy.signal import butter, filtfilt, detrend

directory = 'D:/Internship/MIDataset/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1'

X = []
for itr in range(gamma0.shape[0]):
    record_name = str(gamma0[itr])
    
    signal, meta_val = wfdb.rdsamp(directory + '/' + record_name)
    value = signal.T
    ecg_signals = value

    X.append(detrend(ecg_signals))


In [7]:
X = np.array(X)

In [8]:
# # # windowing X in xnew
# xnew = []
# ynew = []
# patient_ids = []
# for i in range(964):
#     xnew.append(X[i,:,0:1000])
#     xnew.append(X[i,:,1000:2000])
#     xnew.append(X[i,:,2000:3000])
#     xnew.append(X[i,:,3000:4000])
#     xnew.append(X[i,:,4000:5000])
#     for j in range(5):
#         ynew.append(y_data[i])
#         patient_ids.append(i)
# patient_ids = np.array(patient_ids)


xnew = []
ynew = []
patient_ids = []
window_size = 1000  

for i in range(X.shape[0]):
    for j in range(5):  
        start = j * window_size
        end = (j + 1) * window_size
        xnew.append(X[i, :, start:end])
        ynew.append(y_data[i]) 
        patient_ids.append(i)

xnew = np.array(xnew)
ynew = np.array(ynew)
patient_ids = np.array(patient_ids)


In [9]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket
from sklearn.metrics import accuracy_score
from sklearn.linear_model import SGDClassifier, RidgeClassifierCV

In [29]:
# rocket=Rocket()
rocket=Rocket(num_kernels=1000)
rocket.fit(xnew)
xt=rocket.transform(xnew)

In [30]:
# np.save(r"xt_ILMI_1000+393_kernels1000.npy", xt)
# np.save(r"ynew_ILMI_1000+393_kernels1000.npy", ynew)
# np.save(r"patient_ids_ILMI_1000+393_kernels1000.npy",patient_ids)

In [12]:
# xt = np.load(r"xt_ALMI_800+164_kernels5000.npy")
# ynew = np.load(r"ynew_ALMI_800+164_kernels5000.npy")
# patient_id = np.load(r"patient_id_ILMI_1000+393.npy")

In [56]:
xt = np.load("xt_ILMI_1000+393_kernels2000.npy")
ynew = np.load("ynew_ILMI_1000+393_kernels2000.npy")
patient_ids = np.load("patient_ids_ILMI_1000+393_kernels2000.npy")

In [57]:
xt.shape

(6965, 4000)

In [48]:
xt = np.concatenate((xt,all_features),axis=1)
print(xt.shape)

(6965, 2576)


In [58]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import GroupKFold
from sklearn.linear_model import SGDClassifier
import numpy as np

gkf = GroupKFold(n_splits=5)

sum_accuracy = 0
sum_sensitivity = 0
sum_specificity = 0
sum_f1 = 0

for train_idx, test_idx in gkf.split(xt, ynew, groups=patient_ids):
    X_train, X_test = xt[train_idx], xt[test_idx]
    y_train, y_test = ynew[train_idx], ynew[test_idx]

    classifier = SGDClassifier()
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    y_pred_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_pred_majority.append(majority_vote)

    y_test_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_test_majority.append(majority_vote)

    accuracy = accuracy_score(y_test_majority, y_pred_majority)
    sum_accuracy += accuracy

    tn, fp, fn, tp = confusion_matrix(y_test_majority, y_pred_majority).ravel()

    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    f1 = f1_score(y_test_majority, y_pred_majority)

    sum_sensitivity += sensitivity
    sum_specificity += specificity
    sum_f1 += f1

    print(confusion_matrix(y_test_majority, y_pred_majority))
    print("Accuracy:", accuracy)
    print("Sensitivity:", sensitivity)
    print("Specificity:", specificity)
#     print("F1-Score:", f1)
#     print()

average_accuracy = sum_accuracy / 5
average_sensitivity = sum_sensitivity / 5
average_specificity = sum_specificity / 5
average_f1 = sum_f1 / 5

print("Average Accuracy:", average_accuracy)
print("Average Sensitivity:", average_sensitivity)
print("Average Specificity:", average_specificity)
print("Average F1-Score:", average_f1)


[[ 74   8]
 [  1 196]]
Accuracy: 0.967741935483871
Sensitivity: 0.9949238578680203
Specificity: 0.9024390243902439
[[ 77   1]
 [  8 193]]
Accuracy: 0.967741935483871
Sensitivity: 0.9601990049751243
Specificity: 0.9871794871794872
[[ 84   1]
 [  6 188]]
Accuracy: 0.974910394265233
Sensitivity: 0.9690721649484536
Specificity: 0.9882352941176471
[[ 67   7]
 [  2 202]]
Accuracy: 0.9676258992805755
Sensitivity: 0.9901960784313726
Specificity: 0.9054054054054054
[[ 64  10]
 [  2 202]]
Accuracy: 0.9568345323741008
Sensitivity: 0.9901960784313726
Specificity: 0.8648648648648649
Average Accuracy: 0.9669709393775303
Average Sensitivity: 0.9809174369308687
Average Specificity: 0.9296248151915296
Average F1-Score: 0.9771713231592762


In [20]:
loaded_data = np.load('matrices_data.npz')
cross_corr_loaded = loaded_data['cross_corr']
coherence_loaded = loaded_data['coherence']
pli_loaded = loaded_data['pli']
plv_loaded = loaded_data['plv']

In [22]:
all_features = []
for i in range(6965):
    all_features.append(np.concatenate((cross_corr_loaded[i].ravel(),coherence_loaded[i].ravel(),pli_loaded[i].ravel(),plv_loaded[i].ravel()),axis=0))

In [23]:
np.array(all_features).shape

(6965, 576)

In [16]:
# from sklearn.model_selection import train_test_split, cross_val_score
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.preprocessing import scale

# from sklearn.model_selection import KFold
# kf=KFold(n_splits=5,shuffle=True)
# average = 0;
# for tr_idx, test_idx in kf.split(xt):
#     X_train, X_test=xt.iloc[tr_idx,:],xt.iloc[test_idx,:]
#     y_train, y_test=ynew[tr_idx],ynew[test_idx]

#     classifier=SGDClassifier()
#     classifier.fit(X_train,y_train)

#     ypred=classifier.predict(X_test)

#     from sklearn.metrics import confusion_matrix

#     print(confusion_matrix(y_test,ypred))
#     print('accuracy:', accuracy_score(y_test,ypred))
#     average+=accuracy_score(y_test,ypred)
# print('Avearge accuracy:', average/5)

In [17]:
# np.save('xt_100%_1140+360.npy',np.array(xt))
# np.save('ynew_100%_1140+360.npy',np.array(ynew))


In [60]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import GroupKFold
import numpy as np

gkf = GroupKFold(n_splits=5)

sum_accuracy = 0
sum_sensitivity = 0
sum_specificity = 0
sum_f1 = 0

for train_idx, test_idx in gkf.split(xt, ynew, groups=patient_ids):
    X_train, X_test = xt[train_idx], xt[test_idx]
    y_train, y_test = ynew[train_idx], ynew[test_idx]

    # Use a Random Forest classifier instead of SGDClassifier
    classifier = RandomForestClassifier(n_estimators=25, random_state=42)
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    y_pred_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_pred_majority.append(majority_vote)

    y_test_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_test_majority.append(majority_vote)

    accuracy = accuracy_score(y_test_majority, y_pred_majority)
    sum_accuracy += accuracy

    tn, fp, fn, tp = confusion_matrix(y_test_majority, y_pred_majority).ravel()

    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    f1 = f1_score(y_test_majority, y_pred_majority)

    sum_sensitivity += sensitivity
    sum_specificity += specificity
    sum_f1 += f1

    print(confusion_matrix(y_test_majority, y_pred_majority))
    print("Accuracy:", accuracy)

average_accuracy = sum_accuracy / 5
average_sensitivity = sum_sensitivity / 5
average_specificity = sum_specificity / 5
average_f1 = sum_f1 / 5

print("Average Accuracy:", average_accuracy)
print("Average Sensitivity:", average_sensitivity)
print("Average Specificity:", average_specificity)
print("Average F1-Score:", average_f1)


[[ 68  14]
 [  3 194]]
Accuracy: 0.9390681003584229
[[ 73   5]
 [  3 198]]
Accuracy: 0.9713261648745519
[[ 80   5]
 [  3 191]]
Accuracy: 0.9713261648745519
[[ 65   9]
 [  4 200]]
Accuracy: 0.9532374100719424
[[ 59  15]
 [  5 199]]
Accuracy: 0.9280575539568345
Average Accuracy: 0.9526030788272607
Average Sensitivity: 0.9820529271770271
Average Specificity: 0.8764035749688548
Average F1-Score: 0.9676772006232678


In [62]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import GroupKFold
import numpy as np

gkf = GroupKFold(n_splits=5)

sum_accuracy = 0
sum_sensitivity = 0
sum_specificity = 0
sum_f1 = 0

for train_idx, test_idx in gkf.split(xt, ynew, groups=patient_ids):
    X_train, X_test = xt[train_idx], xt[test_idx]
    y_train, y_test = ynew[train_idx], ynew[test_idx]

    # Use an SVM classifier instead of RandomForestClassifier
    classifier = SVC(kernel='poly', random_state=42)
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    y_pred_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_pred_majority.append(majority_vote)

    y_test_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_test_majority.append(majority_vote)

    accuracy = accuracy_score(y_test_majority, y_pred_majority)
    sum_accuracy += accuracy

    tn, fp, fn, tp = confusion_matrix(y_test_majority, y_pred_majority).ravel()

    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    f1 = f1_score(y_test_majority, y_pred_majority)

    sum_sensitivity += sensitivity
    sum_specificity += specificity
    sum_f1 += f1

    print(confusion_matrix(y_test_majority, y_pred_majority))
    print("Accuracy:", accuracy)

average_accuracy = sum_accuracy / 5
average_sensitivity = sum_sensitivity / 5
average_specificity = sum_specificity / 5
average_f1 = sum_f1 / 5

print("Average Accuracy:", average_accuracy)
print("Average Sensitivity:", average_sensitivity)
print("Average Specificity:", average_specificity)



[[ 75   7]
 [  4 193]]
Accuracy: 0.9605734767025089
[[ 74   4]
 [  9 192]]
Accuracy: 0.953405017921147
[[ 83   2]
 [  5 189]]
Accuracy: 0.974910394265233
[[ 67   7]
 [  3 201]]
Accuracy: 0.9640287769784173
[[ 66   8]
 [  4 200]]
Accuracy: 0.9568345323741008
Average Accuracy: 0.9619504396482814
Average Sensitivity: 0.9749664781405223
Average Specificity: 0.9274239961184007
