<a href="https://colab.research.google.com/github/priyankachahal/student_research/blob/master/ecg_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!pip install -q https://github.com/neuropsychology/NeuroKit.py/zipball/master

  Building wheel for neurokit (setup.py) ... [?25l[?25hdone


In [0]:
import os
import sys
import glob

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
from sklearn.externals import joblib
import neurokit as nk
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.layers import Dense, LSTM, Dropout, Activation
from keras.models import Sequential
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier

In [0]:
# paths for different type of files
ECG_VALUES_FILES_PATH = '/content/drive/My Drive/Final Project/dataset/ecg-bg/data/ECG'
DATA_FRAME_FILE_PATH = '/content/drive/My Drive/Final Project/dataset/ecg-bg/ecg_reading_data_frame.csv'
META_DATA_FILE_PATH = '/content/drive/My Drive/Final Project/dataset/ecg-bg/data/meta.csv'

In [0]:
def extract_filename_from_path(path):
  return (os.path.split(path)[1])

In [0]:
def get_input_dataframe(use_data_frame_file = True):
  # read the labels from meta data and append them in the end
  meta_data_frame = pd.read_csv(META_DATA_FILE_PATH, engine='python',skiprows = 1, names=['Id',	'Date',	'Time',	'Age',	'Gender',	'Height',	'Weight',	'Hr',	'G'])
  ecg_labels_dict = {}
  for index, row in meta_data_frame.iterrows():
    if (int(row["G"]) > 140):
      ecg_labels_dict[row["Id"]] = 1
    else:
      ecg_labels_dict[row["Id"]] = 0
  # read the ecg values either from pre-calculated file or from each file by file.
  ecg_values_data_frame = pd.DataFrame()
  if use_data_frame_file:
      ecg_values_data_frame = pd.read_csv(DATA_FRAME_FILE_PATH, engine='python')
  else:
    all_files = glob.glob(ECG_VALUES_FILES_PATH + '/*.csv')
    li_df = []
    counter = 0
    for filename in all_files:
      df = pd.read_csv(filename, usecols=[1], engine='python')
      df = df.transpose();
      df.insert(0, 'fn', extract_filename_from_path(filename))
      li_df.append(df)
      counter = counter + 1
      if counter % 10 == 0:
        print(counter)
    ecg_values_data_frame = pd.concat(li_df, axis=0, ignore_index=True)
  label_serries_arr = []
  # iterate over file name column so that we can create a corresponding column for labels to be appended to ecg_values_data_frame
  for filename_col in ecg_values_data_frame[['fn']]:
    print(ecg_values_data_frame[filename_col].values.shape)
    for index, x in np.ndenumerate(ecg_values_data_frame[filename_col].values):
      key = x.replace(".csv", "")
      label_serries_arr.append(ecg_labels_dict[key])
  label_serries = pd.Series(data=label_serries_arr)
  ecg_values_data_frame['labels'] = label_serries
  return ecg_values_data_frame

In [0]:
ecg_values_data_frame = get_input_dataframe()

(2238,)


In [0]:
ecg_values_data_frame_deleted = ecg_values_data_frame.drop('fn', 1)

In [0]:
ecg_values = ecg_values_data_frame_deleted.values

In [0]:
def sample_training_dataset(input_values, percentage = 0.7):
  freqmap = np.array(np.unique(input_values[:, [input_values.shape[1] - 1]].astype(int), return_counts=True)).T
  freqmap[:,1] = freqmap[:,1].astype(float) * percentage
  # stores the required freq of each label
  labelCountDict = {};
  for label, freq in freqmap:
    labelCountDict[label] = float(freq)
  print(labelCountDict)
  train_arr, test_arr = [], []
  # go through each row of input array after removing NaN
  for row in input_values:
    label_column_index = row[input_values.shape[1] - 1]
    # if we still have required freq of label, we add to train dataset or add to test dataset
    if labelCountDict[label_column_index] > 0:
      train_arr.append(row)
      labelCountDict[label_column_index] = labelCountDict[label_column_index] - 1
    else:
      test_arr.append(row)
  return np.array(train_arr), np.array(test_arr)

In [0]:
train, test = sample_training_dataset(ecg_values, 0.8)

{0: 1655.0, 1: 135.0}


In [0]:
timestep = 1
# split into input and outputs
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], timestep, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], timestep, test_X.shape[1]))
# keep labels as int
train_y = train_y.astype(str)
test_y = test_y.astype(str)

print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

(1790, 1, 51678) (1790,) (448, 1, 51678) (448,)


In [0]:
model = Sequential()
model.add(LSTM(200, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(2, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['mse'])
print(model.summary())

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 200)               41503200  
_________________________________________________________________
dense_6 (Dense)              (None, 2)                 402       
Total params: 41,503,602
Trainable params: 41,503,602
Non-trainable params: 0
_________________________________________________________________
None


In [0]:
history = model.fit(train_X, train_y, epochs=20, batch_size=400, validation_data=(test_X, test_y), verbose=2, shuffle=False)

In [0]:
results = model.predict(test_X)
final_results = np.arange(test_y.shape[0])
index = 0
for c in results:
  ind = np.argmax(c)
  final_results[index] = ind 
  index = index + 1;

In [0]:
print("Actual classes:", test_y[:30])
print("Predicted classes:", final_results[:30])

Actual classes: ['1.0' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0'
 '0.0' '1.0' '0.0' '0.0' '0.0' '1.0' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0'
 '0.0' '0.0' '0.0' '0.0' '0.0' '0.0']
Predicted classes: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [0]:
correct_classification = 0
total_classification = 0
for d, c in zip(final_results, test_y):
  total_classification = total_classification + 1
  print(d, c)
  if int(d) == int(c):
    correct_classification = correct_classification + 1
print(total_classification, correct_classification)

0 1.0


ValueError: ignored

In [0]:
score = (correct_classification/total_classification) * 100
print('score:', score)

score: 0.0


In [0]:
def getFiducialPoints(data_frame):
  fiducial_list = []
  for index, row in data_frame.iterrows():
    np_temp = row.dropna().to_numpy()
    # https://neurokit.readthedocs.io/en/latest/documentation.html
    processed_ecg = nk.ecg_process(np_temp,sampling_rate=1000,filter_type='FIR',filter_band='bandpass',filter_frequency=[1, 40],segmenter='hamilton',quality_model='default') 
    fiducial_list.append(processed_ecg)
    break
  return fiducial_list


In [0]:
f_list = getFiducialPoints(df)

In [0]:
f_list

[{'ECG': {'Average_Signal_Quality': 0.48124648620512533,
   'Cardiac_Cycles':                                    0          1   ...         59          60
   2020-03-14 17:58:19.751240  99.965553 -42.467348  ... -33.418881  -29.182194
   2020-03-14 17:58:19.752240  99.275064 -41.981878  ... -33.377116  -28.674684
   2020-03-14 17:58:19.753240  98.730623 -41.395626  ... -33.305808  -28.265801
   2020-03-14 17:58:19.754240  98.326059 -40.705797  ... -33.175270  -27.969688
   2020-03-14 17:58:19.755240  98.050688 -39.912908  ... -32.956119  -27.795999
   ...                               ...        ...  ...        ...         ...
   2020-03-14 17:58:20.346240 -43.858608 -27.868854  ... -29.698151  328.262517
   2020-03-14 17:58:20.347240 -43.371081 -28.864988  ... -29.525454  328.090733
   2020-03-14 17:58:20.348240 -42.790887 -29.682371  ... -29.669572  328.088803
   2020-03-14 17:58:20.349240 -42.119473 -30.317894  ... -30.154826  328.267115
   2020-03-14 17:58:20.350240 -41.364746 -30.