In [1]:
import glob, copy, pywt
import scipy
import wfdb
from sklearn import svm
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
from collections import Counter
from ecgdetectors import Detectors
from read_data import ECGSample, ECGDataset
import random

# Load dataset

In [2]:
dataset = ECGDataset()

# Train val test split 50% 25% 25%

In [3]:
train= [ 101, 106, 108, 109, 112, 114, 115, 116, 118, 119, 122, 124, 201, 203, 205, 207, 208, 209, 215, 220, 223, 230]
test_val = [100, 103, 105, 111, 113, 117, 121, 123, 200, 202, 210, 212, 213, 214, 219, 221, 222, 228, 231, 232, 233, 234]

random.shuffle(test_val)
test, val = test_val[:len(test_val)//2], test_val[len(test_val)//2:]

In [5]:
train_x, train_y, test_x, test_y, val_x, val_y = dataset.get_train_test_sets(train, test, val)
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)
print(val_x.shape)
print(val_y.shape)

### cahnge labels to binary 0 normal beat 1 beat with abnoramality 

train_y[train_y==1] = 0
train_y[train_y!=0] = 1

test_y[test_y==1] = 0
test_y[test_y!=0] = 1

val_y[val_y==1] = 0
val_y[val_y!=0] = 1

(51000, 70)
(51000,)
(26584, 70)
(26584,)
(23107, 70)
(23107,)


In [7]:
print(sum(train_y == 1))
print(sum(train_y == 2))
print(sum(train_y == 3))
print(sum(train_y == 4))
print(sum(train_y == 0))

5186
0
0
0
45814


# Downsample majority class

In [169]:
from sklearn.utils import resample
import pandas as pd

df_train = pd.DataFrame(train_x)
df_train['y'] = train_y

df_majority = df_train[df_train.y==0]
df_minority = df_train[df_train.y!=0]
 
# Downsample majority class
df_majority_downsampled = resample(df_majority, 
                                 replace=False,    # sample without replacement
                                 n_samples=sum(train_y == 0)//5,     # to match minority class
                                 random_state=123) # reproducible results
 
# Combine minority class with downsampled majority class
df_downsampled = pd.concat([df_majority_downsampled, df_minority])
 
# Display new class counts
df_downsampled.y.value_counts()

0    9162
1    5186
Name: y, dtype: int64

In [170]:
from sklearn import preprocessing
c= np.array(df_downsampled.drop(['y'], axis = 1))
X_train_cnn = np.reshape(c, (c.shape[0], c.shape[1], 1), "A")
Y_train_cnn = np.array(df_downsampled.y)
#preprocessing.label_binarize(np.array(train_y), [1, 2, 3, 4, 5])


X_val_cnn = np.reshape(val_x, (val_x.shape[0], val_x.shape[1], 1), "A")
X_test_cnn = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1), "A")


Y_val_cnn = val_y
#preprocessing.label_binarize(val_y, [1, 2, 3, 4, 5])
Y_test_cnn = test_y
#preprocessing.label_binarize(test_y, [1, 2, 3, 4, 5])

# Build 1D CNN for binary classification


In [171]:
from keras.layers import Conv1D, Dense, Flatten, Dropout,MaxPooling1D
from keras.models import Sequential
from keras.utils import to_categorical
import keras

In [172]:
model = Sequential()
model.add(Conv1D(filters = 128, kernel_size = 7, activation = "relu", input_shape = (70, 1)))
model.add(MaxPooling1D(2))
model.add(Dropout(rate= 0.5))
model.add(Conv1D(filters = 64, kernel_size = 5, activation = "relu"))
model.add(MaxPooling1D(2))
model.add(Dropout(rate= 0.50))
model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))

model.compile(loss = 'binary_crossentropy', optimizer = "adam", metrics = ["accuracy", keras.metrics.AUC()])

In [173]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_12 (Conv1D)           (None, 64, 128)           1024      
_________________________________________________________________
max_pooling1d_12 (MaxPooling (None, 32, 128)           0         
_________________________________________________________________
dropout_12 (Dropout)         (None, 32, 128)           0         
_________________________________________________________________
conv1d_13 (Conv1D)           (None, 28, 64)            41024     
_________________________________________________________________
max_pooling1d_13 (MaxPooling (None, 14, 64)            0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 14, 64)            0         
_________________________________________________________________
flatten_8 (Flatten)          (None, 896)              

In [None]:
history = model.fit(X_train_cnn, Y_train_cnn, batch_size = 32, epochs=5, validation_data = (X_val_cnn, Y_val_cnn))

Train on 14348 samples, validate on 23107 samples
Epoch 1/5

In [None]:
print(history.history)

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])

In [None]:
y_pred_probab = model.predict(X_test_cnn)

In [None]:
y_pred_probab[y_pred_probab>0.8]=1
y_pred_probab[y_pred_probab<=0.8]=0

In [None]:
len(y_pred_probab)

In [None]:
sum(Y_train_cnn)

In [None]:
sum(y_pred_probab==1)

In [None]:
from  sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(Y_test_cnn, y_pred_probab)

In [None]:
cm

In [None]:
accuracy_score(Y_test_cnn, y_pred_probab)

In [None]:
3035/26584