In [1]:
'''Reading Data'''
'''
Sampling Frequency = 250hz
Channels = 19
'''

'\nSampling Frequency = 250hz\nChannels = 19\n'

In [2]:
!pip install mne
from glob import glob
import os
import mne
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Collecting mne
  Downloading mne-0.24.1-py3-none-any.whl (7.4 MB)
[K     |████████████████████████████████| 7.4 MB 20.7 MB/s 
Installing collected packages: mne
Successfully installed mne-0.24.1


In [3]:
from google.colab import drive
drive.mount('/content/drive')
PATH = "/content/drive/MyDrive/dataset/"

Mounted at /content/drive


In [4]:
FILE_PATH=glob(PATH+'data/*.edf')
FILE_PATH[0]

'/content/drive/MyDrive/dataset/data/h01.edf'

In [8]:
H_PATH=[i for i in FILE_PATH if 'h' in i.split('/')[6]]
P_PATH=[i for i in FILE_PATH if 's' in i.split('/')[6]]

In [9]:
def read_data(PATH):
  data=mne.io.read_raw_edf(PATH, preload=True) #read data from file path
  data.set_eeg_reference() #by default takes average of all channels
  data.filter(l_freq=0.5,h_freq=45) # to be changed with bandpass filter?
  #segmentation
  epochs=mne.make_fixed_length_epochs(data,duration=5,overlap=1) #overlapping segments
  array=epochs.get_data() #converts mne epoch object to numpy array
  return(array)

In [10]:
%%capture 
#capture is used to prevent the data jargon from printing
healthy_epochs_array=[read_data(i) for i in H_PATH]
patient_epochs_array=[read_data(i) for i in P_PATH]

In [11]:
#creating labels 0 for healthy and 1 for patient
healthy_epoch_labels=[len(i)*[0] for i in healthy_epochs_array]
#print(healthy_epoch_labels)
patient_epoch_labels=[len(i)*[1] for i in patient_epochs_array]
#print(patient_epoch_labels)

In [12]:
data_list=healthy_epochs_array+patient_epochs_array
label_list=healthy_epoch_labels+patient_epoch_labels

In [13]:
%%capture
#splitting 
group_list=[[i]*len(j) for i,j in enumerate(data_list)] #list of 0 for element 1 list of 1 for next etc multiplied bt length
group_list

In [14]:
data_array=np.vstack(data_list) #stack arrays vertically
label_array=np.hstack(label_list)
group_array=np.hstack(group_list)

In [15]:
data_array = np.moveaxis(data_array,1,2)
data_array.shape

(7201, 1250, 19)

In [18]:
from tensorflow.keras.layers import Conv1D, BatchNormalization, LeakyReLU, MaxPool1D, \
GlobalAveragePooling1D, Dense, Dropout, AveragePooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.backend import clear_session

In [19]:
def CNN_Model():
  clear_session()
  model = Sequential()
  model.add(Conv1D(filters=5,kernel_size=3,strides=1,input_shape=(1250,19))) #1
  model.add(BatchNormalization())
  model.add(LeakyReLU())
  model.add(MaxPool1D(pool_size=2, strides=2))#2
  model.add(Conv1D(filters=5,kernel_size=3,strides=1))#3
  model.add(LeakyReLU())
  model.add(MaxPool1D(pool_size=2,strides=2))#4
  model.add(Dropout(0.5))
  model.add(Conv1D(filters=5,kernel_size=3,strides=1))#5
  model.add(LeakyReLU())
  model.add(AveragePooling1D(pool_size=2,strides=2))#6
  model.add(Dropout(0.5))
  model.add(Conv1D(filters=5,kernel_size=3,strides=1))#7
  model.add(LeakyReLU())
  model.add(AveragePooling1D(pool_size=2,strides=2))#8
  model.add(Conv1D(filters=5,kernel_size=3,strides=1))#9
  model.add(LeakyReLU())
  model.add(GlobalAveragePooling1D())#10
  model.add(Dense(1,activation='sigmoid'))#11

  model.compile('adam',loss='binary_crossentropy',metrics=['accuracy'])
  return model

In [20]:
model = CNN_Model()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 1248, 5)           290       
                                                                 
 batch_normalization (BatchN  (None, 1248, 5)          20        
 ormalization)                                                   
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 1248, 5)           0         
                                                                 
 max_pooling1d (MaxPooling1D  (None, 624, 5)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 622, 5)            80        
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 622, 5)            0

In [21]:
from sklearn.model_selection import GroupKFold, LeaveOneGroupOut
from sklearn.preprocessing import StandardScaler

In [22]:
group_k_fold = GroupKFold()

In [25]:
accuracy = []
for train_index, validation_index in group_k_fold.split(data_array,label_array,groups=group_array):
  train_features, train_labels = data_array[train_index], label_array[train_index]
  validation_features, validation_labels = data_array[validation_index], label_array[validation_index]
  
  scaler = StandardScaler()
  train_features = scaler.fit_transform(train_features.reshape(-1,train_features.shape[-1])).reshape(train_features.shape)
  validation_features = scaler.transform(validation_features.reshape(-1,validation_features.shape[-1])).reshape(validation_features.shape)
  model = CNN_Model()
  model.fit(train_features,train_labels,epochs=10,batch_size=100,validation_data=(validation_features,validation_labels))
  accuracy.append(model.evaluate(validation_features,validation_labels)[1])
  break

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
np.mean(accuracy)

0.6719286441802979