In [None]:
'''Reading Data'''

In [None]:
!pip install mne
from glob import glob
import os
import mne
import numpy as np
import pandas
import matplotlib.pyplot as plt


Collecting mne
  Downloading mne-0.24.1-py3-none-any.whl (7.4 MB)
[K     |████████████████████████████████| 7.4 MB 4.2 MB/s 
Installing collected packages: mne
Successfully installed mne-0.24.1


In [42]:
allfile_path=glob('dataset/*.edf')
allfile_path[0]

'dataset/s05.edf'

In [43]:
healthy_path=[i for i in allfile_path if 'h' in i.split('/')[1]]
patient_path=[i for i in allfile_path if 's' in i.split('/')[1]]

In [44]:
def read_data(file_path):
  data=mne.io.read_raw_edf(file_path, preload=True) #read data from file path
  data.set_eeg_reference() #by default takes average of all channels
  data.filter(l_freq=0.5,h_freq=45) # to be changed with bandpass filter?
  #segmentation
  epochs=mne.make_fixed_length_epochs(data,duration=5,overlap=1) #overlapping segments
  array=epochs.get_data() #converts mne epoch object to numpy array
  return(array)

In [None]:
sample_data=read_data(healthy_path[0])

Extracting EDF parameters from /content/dataset/h10.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 278749  =      0.000 ...  1114.996 secs...
EEG channel type selected for re-referencing
Applying average reference.
Applying a custom ('EEG',) reference.
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1651 samples (6.604 sec)

Not setting metadata
Not setting metadata
278 matching events found
No baseline correction applied
0 projection items activa

In [59]:
sample_data.shape # dimension of data => no.of epochs, channels , length of signal


(278, 19, 1250)

In [None]:
%%capture 
#capture is used to prevent the data jargon from printing
healthy_epochs_array=[read_data(i) for i in healthy_path]
patient_epochs_array=[read_data(i) for i in patient_path]

In [60]:
#creating labels 0 for healthy and 1 for patient
healthy_epoch_labels=[len(i)*[0] for i in healthy_epochs_array]
patient_epoch_labels=[len(i)*[1] for i in patient_epochs_array]

In [61]:
data_list=healthy_epochs_array+patient_epochs_array
label_list=healthy_epoch_labels+patient_epoch_labels

In [62]:
%%capture
#splitting 
group_list=[[i]*len(j) for i,j in enumerate(data_list)] #list of 0 for element 1 list of 1 for next etc multiplied bt length
group_list

In [63]:
data_array=np.vstack(data_list) #stack arrays vertically
label_array=np.hstack(label_list)
group_array=np.hstack(group_list)

#7201 epochs of length 1250
print(data_array.shape)

(7201, 19, 1250)


In [None]:
'''Feature Extaction'''

In [64]:
from scipy import stats
def mean(x):
  return(np.mean(x,axis=-1))
def std(x):
  return(np.std(x,axis=-1)) #standard deviation
def ptp(x):
  return(np.ptp(x,axis=-1)) # peak to peak value
def var(x):
  return(np.var(x,axis=-1)) #variance
def minim(x):
  return(np.min(x,axis=-1))
def maxim(x):
  return(np.max(x,axis=-1))
def argminim(x):
  return(np.argmin(x,axis=-1))
def argmaxim(x):
  return(np.argmax(x,axis=-1))
def rms(x):
  return(np.sqrt(np.mean(x**2,axis=-1))) #root mean square
def abs_diff_signal(x):
  return(np.sum(np.abs(np.diff(x,axis=-1)),axis=-1))

def skewness(x):
  return(stats.skew(x,axis=-1)) #to measure symmetry of dataset
def kurtosis(x):
  return(stats.kurtosis(x,axis=-1)) # measure of tailedness=> how heavy dataset's tails are compared to normal distribution

def concatenate_features(x):
  return(np.concatenate((mean(x),std(x),ptp(x),var(x),minim(x),maxim(x),argminim(x),argmaxim(x),rms(x),abs_diff_signal(x),skewness(x),kurtosis(x)),axis=-1))





In [65]:
features=[]
for d in data_array:
  features.append(concatenate_features(d))

In [47]:
abs_diff_signal(d).shape

(19,)

In [66]:
features_array=np.array(features)
features_array.shape

(7201, 228)

In [67]:
#we have 228/19=12 features for classification
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold,GridSearchCV


In [68]:
clf = LogisticRegression()
gkf=GroupKFold(5)
pipe=Pipeline([('scalar',StandardScaler()),('clf',clf)])
param_grid={'clf__C':[0.1,0.5,0.7,1,3,5,7]}
gscv=GridSearchCV(pipe,param_grid,cv=gkf,n_jobs=12)
gscv.fit(features_array,label_array,groups=group_array)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


GridSearchCV(cv=GroupKFold(n_splits=5),
             estimator=Pipeline(steps=[('scalar', StandardScaler()),
                                       ('clf', LogisticRegression())]),
             n_jobs=12, param_grid={'clf__C': [0.1, 0.5, 0.7, 1, 3, 5, 7]})

In [69]:
gscv.best_score_

0.6749861872931688

In [None]:
'''Convolution'''