In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import wfdb
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTENC
from sklearn.model_selection import StratifiedKFold


In [2]:
def collect_and_label(dataset): 
    

    df = pd.read_csv(dataset)
    alpha = df['scp_codes'].str.split("'").str[1].str[-2:]=='MI'  
    beta = df['scp_codes'].str.split("'").str[1]=='NORM'       
    df = df[alpha|beta]
    df['label'] = df['scp_codes'].str.split("'").str[1]      
      
    inst_c1 = df[df['label'] == 'NORM']
    inst_c1 = inst_c1.sample(n = 4500, random_state = 1)
    
    inst_c2 = df[df['label'] == 'IMI']
    inst_c2 = inst_c2.sample(n = 1500, random_state = 1)
    

    df_new = pd.concat([inst_c1, inst_c2], ignore_index = True)
    df_new = df_new.sample(frac = 1, random_state=42)
    
    return  df_new

In [3]:
def div_and_label(dataframe):  
    
    X = dataframe[['ecg_id', 'filename_hr']]   
    X = X.to_numpy()
    y = dataframe['label']
    encoder = LabelEncoder()
    encoder.fit(y)
    encoded_y = encoder.transform(y)
    
    return (X, encoded_y)

In [4]:
alpha = collect_and_label("C:/Users/Pushpam/Downloads/ptbxl_database.csv")  
gamma0, gamma1 = div_and_label(alpha)
gamma0 = gamma0[:,1]

print(gamma0.shape)
print(gamma1.shape)

(6000,)
(6000,)


In [5]:
print(gamma0)

['records500/05000/05780_hr' 'records500/08000/08511_hr'
 'records500/02000/02452_hr' ... 'records500/18000/18091_hr'
 'records500/16000/16049_hr' 'records500/07000/07602_hr']


In [6]:
gamma1

array([1, 1, 1, ..., 0, 0, 1])

In [7]:
from scipy import signal
import matplotlib.pyplot as plt

In [8]:
import wfdb
import numpy as np
import matplotlib.pyplot as plt
import neurokit2 as nk
from scipy.signal import butter, filtfilt, detrend

directory = 'D:/Internship/MIDataset/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1'

X = []
for itr in range(gamma0.shape[0]):
    record_name = str(gamma0[itr])
    
    signal, meta_val = wfdb.rdsamp(directory + '/' + record_name)
    value = signal.T
    ecg_signals = value

    X.append(detrend(ecg_signals))
    

In [9]:
X = np.array(X)
X.shape

(6000, 12, 5000)

In [10]:
y_data = gamma1;
print(y_data.shape)

(6000,)


In [11]:
np.count_nonzero(y_data[0:1000])

757

In [12]:
# windowing X in xnew

xnew = []
ynew = []
patient_ids = []
for i in range(X.shape[0]):
    xnew.append(X[i,:,0:1000])
    xnew.append(X[i,:,1000:2000])
    xnew.append(X[i,:,2000:3000])
    xnew.append(X[i,:,3000:4000])
    xnew.append(X[i,:,4000:5000])
    for j in range(5):
        ynew.append(y_data[i])
        patient_ids.append(i)

In [13]:
xnew = np.array(xnew)
ynew = np.array(ynew)
patient_ids = np.array(patient_ids)

In [14]:
xnew.shape

(30000, 12, 1000)

In [15]:
from scipy.signal import coherence
from glob import glob
import scipy.io as sio
import scipy.signal as sig

cross_corr_matrices_list = []
mae_matrices_list = []
rmse_matrices_list = []
coherence_matrices_list = []


for patient_data in xnew:
    cross_corr_matrix = np.zeros((12, 12))
    mae_matrix = np.zeros((12, 12))
    rmse_matrix = np.zeros((12, 12))
    coherence_matrix = np.zeros((12, 12))

    
    for i in range(12):
        for j in range(i, 12):
            lead_i = patient_data[i]
            lead_j = patient_data[j]
            
            cross_corr = np.corrcoef(lead_i, lead_j)[0, 1]
            
            mae = np.mean(np.abs(lead_i - lead_j))
            rmse = np.sqrt(np.mean((lead_i - lead_j) ** 2))
            
            f, coh = coherence(lead_i, lead_j)  
            coherence_value = np.mean(coh)  # Storing the average coherence value
            
            
            cross_corr_matrix[i, j] = cross_corr
            cross_corr_matrix[j, i] = cross_corr
            
            mae_matrix[i, j] = mae
            mae_matrix[j, i] = mae
            
            rmse_matrix[i, j] = rmse
            rmse_matrix[j, i] = rmse
            
            coherence_matrix[i, j] = coherence_value
            coherence_matrix[j, i] = coherence_value      
    
    
    cross_corr_matrices_list.append(cross_corr_matrix)  
    mae_matrices_list.append(mae_matrix)
    rmse_matrices_list.append(rmse_matrix)  
    coherence_matrices_list.append(coherence_matrix)
    
def create_K(A_i, B_i, C_i, D_i):
    top_row = np.concatenate((A_i, B_i), axis=1)
    bottom_row = np.concatenate((C_i, D_i), axis=1)
    return np.concatenate((top_row, bottom_row), axis=0)

K_matrices=[]
for i in range(xnew.shape[0]):
    K_i = create_K(cross_corr_matrices_list[i], mae_matrices_list[i], rmse_matrices_list[i], coherence_matrices_list[i])
    K_matrices.append(K_i)

t = np.array(K_matrices).reshape(xnew.shape[0],24,24,1)
print(t.shape)

(30000, 24, 24, 1)


In [16]:
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket
from sklearn.metrics import accuracy_score
from sklearn.linear_model import SGDClassifier, RidgeClassifierCV

In [17]:
# rocket=Rocket()
rocket=Rocket(num_kernels=500)
rocket.fit(xnew)
xt=rocket.transform(xnew)

In [18]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import scale

In [19]:
# X_train, X_test, y_train, y_test = train_test_split(xt, ynew,
#                                                     test_size=0.3,
#                                                     random_state=42)

In [20]:
# classifier=SGDClassifier()
# classifier.fit(X_train,y_train)

In [50]:
# Only Relational Features Accuracy

from tensorflow.keras.layers import Flatten, Activation
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D, AvgPool2D
# Group-k-fold

from sklearn.metrics import accuracy_score

from sklearn.model_selection import GroupKFold

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GroupKFold

import tensorflow as tf

gkf = GroupKFold(n_splits=5)

sum_accuracy = 0
for train_idx, test_idx in gkf.split(t, ynew, groups=patient_ids):
    X_train, X_test = t[train_idx], t[test_idx]
    y_train, y_test = ynew[train_idx], ynew[test_idx]

    K.clear_session()


    model = Sequential()

    model.add(Conv2D(30, (3, 3), input_shape=(24, 24, 1)))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Flatten())

    model.add(Dense(1024, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='BinaryCrossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    model.fit(X_train, y_train,
                    batch_size=16,
                    epochs=10,
                    verbose=1, 
                    validation_split=0.2)

    
    predictions = model.predict(X_test)
    
    y_pred = np.round(predictions).astype(int).transpose()
    y_pred = y_pred[0]

    y_pred_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_pred_majority.append(majority_vote)

    y_test_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_test_majority.append(majority_vote)

    accuracy = accuracy_score(y_test_majority, y_pred_majority)
    sum_accuracy += accuracy
    print(confusion_matrix(y_test_majority, y_pred_majority))
    print(accuracy)

average_accuracy = sum_accuracy / 5
print("Average Accuracy:", average_accuracy)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[236  68]
 [104 792]]
0.8566666666666667
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[133 175]
 [ 16 876]]
0.8408333333333333
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[  0 294]
 [  0 906]]
0.755
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[211 102]
 [ 33 854]]
0.8875
Epoch 1/10
Epoch 2/10
  37/1200 [..............................] - ETA: 21s - loss: 0.5631 - accuracy: 0.7551

KeyboardInterrupt: 

In [23]:
(xnew).shape

(30000, 12, 1000)

In [58]:
# Only Rocket Features Accuracy

# Group-k-fold

from sklearn.model_selection import GroupKFold

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GroupKFold
gkf = GroupKFold(n_splits=5)

sum_accuracy = 0
for train_idx, test_idx in gkf.split(xt, ynew, groups=patient_ids):
    X_train, X_test = xt.iloc[train_idx], xt.iloc[test_idx]
    y_train, y_test = ynew[train_idx], ynew[test_idx]

    classifier = SGDClassifier()
    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)

    y_pred_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_pred_majority.append(majority_vote)

    y_test_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_test_majority.append(majority_vote)

    accuracy = accuracy_score(y_test_majority, y_pred_majority)
    sum_accuracy += accuracy
    print(confusion_matrix(y_test_majority, y_pred_majority))
    print(accuracy)

average_accuracy = sum_accuracy / 5
print("Average Accuracy:", average_accuracy)

[[221  83]
 [ 26 870]]
0.9091666666666667
[[187 121]
 [  7 885]]
0.8933333333333333
[[285   9]
 [191 715]]
0.8333333333333334
[[298  15]
 [155 732]]
0.8583333333333333
[[130 151]
 [  3 916]]
0.8716666666666667
Average Accuracy: 0.8731666666666668


In [25]:
def extract_upper_triangular(matrix):
    n = len(matrix)
    upper_triangular_array = []

    for i in range(n):
        for j in range(i, n):
            upper_triangular_array.append(matrix[i][j])

    return upper_triangular_array

def concatenate_upper_triangular_vectors(matrix1, matrix2, matrix3, matrix4):
    vector1 = extract_upper_triangular(matrix1)
    vector2 = extract_upper_triangular(matrix2)
    vector3 = extract_upper_triangular(matrix3)
    vector4 = extract_upper_triangular(matrix4)

    concatenated_vector = vector1 + vector2 + vector3 + vector4
    return concatenated_vector

t2=[]
for i in range(xnew.shape[0]):
    K_i = concatenate_upper_triangular_vectors(cross_corr_matrices_list[i], mae_matrices_list[i], rmse_matrices_list[i], coherence_matrices_list[i])
    t2.append(K_i)
t2 = np.array(t2)
print(t2.shape)

(30000, 312)


In [59]:
# Rocket features + Relational features

x_all = np.concatenate((xt,t2), axis=1)

gkf = GroupKFold(n_splits=5)


sum_accuracy = 0
for train_idx, test_idx in gkf.split(x_all, ynew, groups=patient_ids):
    X_train, X_test = x_all[train_idx], x_all[test_idx]
    y_train, y_test = ynew[train_idx], ynew[test_idx]

    classifier = SGDClassifier()
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    y_pred_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_pred_majority.append(majority_vote)

    y_test_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_test_majority.append(majority_vote)

    accuracy = accuracy_score(y_test_majority, y_pred_majority)
    sum_accuracy += accuracy
    print(confusion_matrix(y_test_majority, y_pred_majority))
    print(accuracy)

average_accuracy = sum_accuracy / 5
print("Average Accuracy:", average_accuracy)

[[183 121]
 [  6 890]]
0.8941666666666667
[[264  44]
 [ 61 831]]
0.9125
[[289   5]
 [218 688]]
0.8141666666666667
[[227  86]
 [ 12 875]]
0.9183333333333333
[[247  34]
 [ 76 843]]
0.9083333333333333
Average Accuracy: 0.8895000000000002


In [None]:
x_all.shape

In [None]:
# from sklearn.svm import SVC
# from sklearn.model_selection import GroupKFold
# from sklearn.metrics import confusion_matrix, accuracy_score

# gkf = GroupKFold(n_splits=5)

# sum_accuracy = 0
# for train_idx, test_idx in gkf.split(xt, ynew, groups=patient_ids):
#     X_train, X_test = xt.iloc[train_idx], xt.iloc[test_idx]
#     y_train, y_test = ynew[train_idx], ynew[test_idx]

#     # Use SVC instead of SGDClassifier
#     classifier = SVC(kernel='linear', C=1.0)
#     classifier.fit(X_train, y_train)

#     y_pred = classifier.predict(X_test)

#     y_pred_majority = []
#     for patient_idx in np.unique(patient_ids[test_idx]):
#         segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_pred_majority.append(majority_vote)

#     y_test_majority = []
#     for patient_idx in np.unique(patient_ids[test_idx]):
#         segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_test_majority.append(majority_vote)

#     accuracy = accuracy_score(y_test_majority, y_pred_majority)
#     sum_accuracy += accuracy
#     print(confusion_matrix(y_test_majority, y_pred_majority))
#     print(accuracy)

# average_accuracy = sum_accuracy / 5
# print("Average Accuracy:", average_accuracy)


In [None]:
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.model_selection import GroupKFold
# from sklearn.metrics import confusion_matrix, accuracy_score

# gkf = GroupKFold(n_splits=5)

# sum_accuracy = 0
# for train_idx, test_idx in gkf.split(xt, ynew, groups=patient_ids):
#     X_train, X_test = xt.iloc[train_idx], xt.iloc[test_idx]
#     y_train, y_test = ynew[train_idx], ynew[test_idx]

#     # Use RandomForestClassifier instead of SGDClassifier
#     classifier = RandomForestClassifier(n_estimators=150, random_state=42)
#     classifier.fit(X_train, y_train)

#     y_pred = classifier.predict(X_test)

#     y_pred_majority = []
#     for patient_idx in np.unique(patient_ids[test_idx]):
#         segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_pred_majority.append(majority_vote)

#     y_test_majority = []
#     for patient_idx in np.unique(patient_ids[test_idx]):
#         segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_test_majority.append(majority_vote)

#     accuracy = accuracy_score(y_test_majority, y_pred_majority)
#     sum_accuracy += accuracy
#     print(confusion_matrix(y_test_majority, y_pred_majority))
#     print(accuracy)

# average_accuracy = sum_accuracy / 5
# print("Average Accuracy:", average_accuracy)


In [None]:
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.model_selection import GroupKFold
# from sklearn.metrics import confusion_matrix, accuracy_score

# gkf = GroupKFold(n_splits=5)

# sum_accuracy = 0
# for train_idx, test_idx in gkf.split(xt, ynew, groups=patient_ids):
#     X_train, X_test = xt.iloc[train_idx], xt.iloc[test_idx]
#     y_train, y_test = ynew[train_idx], ynew[test_idx]

#     # Use KNN classifier instead of SGDClassifier
#     classifier = KNeighborsClassifier(n_neighbors=301)  # You can set the number of neighbors here
#     classifier.fit(X_train, y_train)

#     y_pred = classifier.predict(X_test)

#     y_pred_majority = []
#     for patient_idx in np.unique(patient_ids[test_idx]):
#         segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_pred_majority.append(majority_vote)

#     y_test_majority = []
#     for patient_idx in np.unique(patient_ids[test_idx]):
#         segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_test_majority.append(majority_vote)

#     accuracy = accuracy_score(y_test_majority, y_pred_majority)
#     sum_accuracy += accuracy
#     print(confusion_matrix(y_test_majority, y_pred_majority))
#     print(accuracy)

# average_accuracy = sum_accuracy / 5
# print("Average Accuracy:", average_accuracy)


In [None]:
np.count_nonzero(ynew[:])

In [73]:
# # # Using DNN for Rocket Features

# X_train, X_test, y_train, y_test = train_test_split(xt, ynew, test_size=0.3, random_state=42)

# K.clear_session()

# model = Sequential()

# # model.add(Conv2D(30, (3, 3)))
# # model.add(MaxPool2D(pool_size=(2, 2)))
# # model.add(Flatten())

# model.add(Dense(256, activation='relu')) 
# model.add(Dense(128, activation='relu')) 
# model.add(Dense(1, activation='sigmoid'))

# model.compile(loss='BinaryCrossentropy', optimizer='rmsprop', metrics=['accuracy'])

# model.fit(X_train, y_train, batch_size=20, epochs=20, verbose=1, 
#           validation_split=0.25)

# model.evaluate(X_test, y_test)







gkf = GroupKFold(n_splits=5)

sum_accuracy = 0
for train_idx, test_idx in gkf.split(xt, ynew, groups=patient_ids):
    X_train, X_test = xt.iloc[train_idx], xt.iloc[test_idx]
    y_train, y_test = ynew[train_idx], ynew[test_idx]
    
    K.clear_session()

    model = Sequential()

    # model.add(Conv2D(30, (3, 3)))
    # model.add(MaxPool2D(pool_size=(2, 2)))
    # model.add(Flatten())

    model.add(Dense(256, activation='relu')) 
    model.add(Dense(128, activation='relu')) 
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='BinaryCrossentropy', optimizer='rmsprop', metrics=['accuracy'])

    model.fit(X_train, y_train, batch_size=20, epochs=15, verbose=1, 
              validation_split=0.25)

#     model.evaluate(X_test, y_test)
    

    y_pred = model.predict(X_test)
    y_pred = np.round(y_pred > 0.5 ).astype(int).transpose()
    y_pred = y_pred[0]
    
    y_pred_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_pred_majority.append(majority_vote)

    y_test_majority = []
    for patient_idx in np.unique(patient_ids[test_idx]):
        segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
        majority_vote = np.bincount(segment_predictions).argmax()
        y_test_majority.append(majority_vote)

    accuracy = accuracy_score(y_test_majority, y_pred_majority)
    sum_accuracy += accuracy
    print(confusion_matrix(y_test_majority, y_pred_majority))
    print(accuracy)

average_accuracy = sum_accuracy / 5
print("Average Accuracy:", average_accuracy)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[[216  88]
 [ 19 877]]
0.9108333333333334
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[[286  22]
 [100 792]]
0.8983333333333333
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[[219  75]
 [ 38 868]]
0.9058333333333334
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
[[252  61]
 [ 24 863]]
0.9291666666666667
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
E

In [None]:
# from tensorflow.keras.layers import Flatten, Activation
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense
# import tensorflow.keras.backend as K
# from tensorflow.keras.layers import Conv2D
# from tensorflow.keras.layers import MaxPool2D, AvgPool2D


# # Group-k-fold

# from sklearn.metrics import accuracy_score

# from sklearn.model_selection import GroupKFold

# from sklearn.metrics import confusion_matrix
# from sklearn.model_selection import GroupKFold

# import tensorflow as tf

# # class SilentHistory(tf.keras.callbacks.Callback):
# #     def on_epoch_end(self, epoch, logs=None):
# #         pass

# gkf = GroupKFold(n_splits=5)

# sum_accuracy = 0
# for train_idx, test_idx in gkf.split(xt, ynew, groups=patient_ids):
#     X_train, X_test = xt.iloc[train_idx], xt.iloc[test_idx]
#     y_train, y_test = ynew[train_idx], ynew[test_idx]

#     K.clear_session()


#     model = Sequential()

# #     model.add(Conv2D(10, (3, 3), input_shape=(24, 24, 1)))
# #     model.add(MaxPool2D(pool_size=(2, 2)))
# #     model.add(Flatten())

# #     model.add(Dense(1000, activation='relu'))
#     model.add(Dense(150, activation='relu')) 
#     model.add(Dense(100, activation='relu'))
#     model.add(Dense(50, activation='relu')) 
#     model.add(Dense(1, activation='sigmoid'))

#     model.compile(loss='BinaryCrossentropy',
#                   optimizer='rmsprop',
#                   metrics=['accuracy'])

# #     y_train = np.reshape(y_train, (-1, 1))
# #     print(y_train.shape)
# #     print(X_train.shape)
    
# #     history = 
#     model.fit(X_train, y_train,
#                     batch_size=60,
#                     epochs=20,
#                     verbose=1,  # Set verbose to 0 to suppress epoch logging
#                     validation_split=0.25)
# #                     callbacks=[SilentHistory()])
    
#     predictions = model.predict(X_test)
#     y_pred = np.round(predictions>0.5).astype(int).transpose()
# #     print(y_pred)
#     y_pred = y_pred[0]
# #     print(y_pred[0,0:10])

#     y_pred_majority = []
#     for patient_idx in np.unique(patient_ids[test_idx]):
#         segment_predictions = y_pred[patient_ids[test_idx] == patient_idx]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_pred_majority.append(majority_vote)

#     y_test_majority = []
#     for patient_idx in np.unique(patient_ids[test_idx]):
#         segment_predictions = y_test[patient_ids[test_idx] == patient_idx]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_test_majority.append(majority_vote)

#     accuracy = accuracy_score(y_test_majority, y_pred_majority)
#     sum_accuracy += accuracy
#     print(confusion_matrix(y_test_majority, y_pred_majority))
#     print(accuracy)

# average_accuracy = sum_accuracy / 5
# print("Average Accuracy:", average_accuracy)

In [None]:
# from sklearn.metrics import confusion_matrix
# from sklearn.model_selection import KFold
# kf=KFold(n_splits=5,shuffle=True)

In [None]:
# sum=0
# for tr_idx, test_idx in kf.split(xt):
#     X_train, X_test=xt.iloc[tr_idx,:],xt.iloc[test_idx,:]
#     y_train, y_test=ynew[tr_idx],ynew[test_idx]

#     classifier=SGDClassifier()
#     classifier.fit(X_train,y_train)

#     y_pred=classifier.predict(X_test)
#     y_pred_majority = []
#     for i in range(0, y_data.shape[0], 5):
#         segment_predictions = y_pred[i:i+5]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_pred_majority.append(majority_vote)
        
#     y_test_majority = []
#     for i in range(0, y_data.shape[0], 5):
#         segment_predictions = y_test[i:i+5]
#         majority_vote = np.bincount(segment_predictions).argmax()
#         y_test_majority.append(majority_vote)    
    
#     sum=sum+accuracy_score(y_test_majority,y_pred_majority)
#     print(confusion_matrix(y_test_majority,y_pred_majority))
#     print(accuracy_score(y_test_majority,y_pred_majority))
# print(sum/5)

In [None]:
# sum=0
# for tr_idx, test_idx in kf.split(xt):
#     X_train, X_test=xt.iloc[tr_idx,:],xt.iloc[test_idx,:]
#     y_train, y_test=y_data[tr_idx],y_data[test_idx]

#     classifier=SGDClassifier()
#     classifier.fit(X_train,y_train)

#     ypred=classifier.predict(X_test)
#     sum=sum+accuracy_score(y_test,ypred)
#     print(confusion_matrix(y_test,ypred))
#     print(accuracy_score(y_test,ypred))
# print(sum/5)

In [None]:
# y_pred=classifier.predict(X_test)

In [None]:
# y_pred_majority = []
# for i in range(0, y_data.shape[0], 5):
#     segment_predictions = y_pred[i:i+5]
#     majority_vote = np.bincount(segment_predictions).argmax()
#     y_pred_majority.append(majority_vote)

In [None]:
# y_test_majority = []
# for i in range(0, y_data.shape[0], 5):
#     segment_predictions = y_test[i:i+5]
#     majority_vote = np.bincount(segment_predictions).argmax()
#     y_test_majority.append(majority_vote)

In [None]:
# from sklearn.metrics import confusion_matrix

In [None]:
# accuracy = accuracy_score(y_test_majority, y_pred_majority)
# print(accuracy)

In [None]:
# print(confusion_matrix(y_test,ypred))
# print(accuracy_score(y_test,ypred))

In [None]:
xt.shape

In [52]:
feature_matrix = np.array(K_matrices);
np.save('relational_features.npy',feature_matrix)

In [53]:
loaded_array = np.load('relational_features.npy')

In [55]:
np.array_equal(feature_matrix, loaded_array)

True

In [56]:
np.save('cross_corr_matrix.npy',np.array(cross_corr_matrices_list))
np.save('mae_matrix.npy',np.array(mae_matrices_list))
np.save('rmse_matrix.npy',np.array(rmse_matrices_list))
np.save('coherence_matrix.npy',np.array(coherence_matrices_list))

In [74]:
loaded_array.shape

(30000, 24, 24)