In [372]:
import EEGExtract as eeg
import glob
import numpy as np
import scipy.io

In [373]:
# https://pyod.readthedocs.io/en/latest/pyod.models.html
from pyod import models
from pyod.models import hbos,auto_encoder,lof,so_gaal,lscp,vae,abod,ocsvm,xgbod,pca

In [383]:
ls mat-files/

NM_sl.mat  S5_sl.mat  SB_sl.mat  SZ_sl.mat  TS_sl.mat
RZ_sl.mat  S6_sl.mat  SM_sl.mat  TM_sl.mat  VB_sl.mat


In [384]:
eeg_dict = scipy.io.loadmat('./mat-files/RZ_sl.mat')

In [385]:
eegData = eeg_dict['eegmat']

In [387]:
ShannonRes = eeg.shannonEntropy(eegData, bin_min=-200, bin_max=200, binWidth=2)

In [390]:
fs = 100

In [391]:
all_features = []
for file in glob.glob('./mat-files/*.mat'):
    features = []
    print(file)
    eeg_dict = scipy.io.loadmat(file)
    # eegData: 3D np array [chans x ms x epochs]
    eegData = eeg_dict['eegmat']
    #Shannon Entropy
    ShannonRes = eeg.shannonEntropy(eegData, bin_min=-200, bin_max=200, binWidth=2)
    features.append(ShannonRes)
    # Subband Information Quantity
    # delta (0.5–4 Hz)
    eegData_delta = eeg.filt_data(eegData, 0.5, 4, fs)
    ShannonRes_delta = eeg.shannonEntropy(eegData_delta, bin_min=-200, bin_max=200, binWidth=2)
    features.append(ShannonRes_delta)
    # theta (4–8 Hz)
    eegData_theta = eeg.filt_data(eegData, 4, 8, fs)
    ShannonRes_theta = eeg.shannonEntropy(eegData_theta, bin_min=-200, bin_max=200, binWidth=2)
    features.append(ShannonRes_theta)
    # Lyapunov Exponent
    LyapunovRes = eeg.lyapunov(eegData)
    features.append(LyapunovRes)
    # False Nearest Neighbor
    FalseNnRes = eeg.falseNearestNeighbor(eegData)
    features.append(FalseNnRes/(10*np.e**40))
    # δ band Power
    bandPwr_delta = eeg.bandPower(eegData, 0.5, 4, fs)
    features.append(bandPwr_delta/(10*np.e**40))
    # θ band Power
    bandPwr_theta = eeg.bandPower(eegData, 4, 8, fs)
    features.append(bandPwr_theta)
    # Standard Deviation
    std_res = eeg.eegStd(eegData)
    features.append(std_res)
    # Voltage < 5μ
    volt05_res = eeg.eegVoltage(eegData,voltage=5)
    features.append(volt05_res)
    # Voltage < 10μ
    volt10_res = eeg.eegVoltage(eegData,voltage=10)
    features.append(volt10_res)
    # Voltage < 20μ
    volt20_res = eeg.eegVoltage(eegData,voltage=20)
    features.append(volt20_res)
    all_features.append(features)

./mat-files/SZ_sl.mat


  volt_res = np.nanmean(eegFilt,axis=1)


./mat-files/NM_sl.mat
./mat-files/S6_sl.mat
./mat-files/S5_sl.mat
./mat-files/SM_sl.mat
./mat-files/VB_sl.mat
./mat-files/TM_sl.mat
./mat-files/SB_sl.mat
./mat-files/RZ_sl.mat
./mat-files/TS_sl.mat


In [345]:
bandPwr_delta/(10*np.e**20)

array([[3.19880616e-09, 5.53623996e-09, 5.52815181e-12, ...,
        5.18787670e-12, 3.46716102e-11, 9.33216064e-12],
       [6.35868658e-11, 1.44603367e-10, 2.00426935e-11, ...,
        1.23321321e-11, 1.40070448e-11, 1.13501920e-11],
       [1.13186686e-10, 1.72754711e-10, 1.32514677e-11, ...,
        3.80739895e-12, 2.58850732e-12, 1.65031757e-11],
       ...,
       [2.51713704e-09, 4.03991175e-09, 4.23821724e-12, ...,
        1.15672173e-11, 2.44537978e-11, 1.25002145e-11],
       [4.14073400e-08, 7.11197352e-08, 1.30387838e-10, ...,
        4.21345383e-11, 7.69354728e-11, 4.25660590e-11],
       [1.44470158e-09, 1.75749868e-09, 2.94239465e-10, ...,
        2.69309771e-11, 4.01812925e-11, 2.55520661e-11]])

In [361]:
len(all_features)

14

In [362]:
arr_fearures = np.asarray(all_features)

  return array(a, dtype, copy=False, order=order)


ValueError: could not broadcast input array from shape (65,840) into shape (65)

In [392]:
arr_fearures_all = [np.asarray(features_arr) for features_arr in all_features]

In [393]:
arr_fearures_all[2].shape

(11, 65, 840)

In [379]:
import csv
with open("features.csv", "w") as f:
    wr = csv.writer(f)
    wr.writerows(all_features)

In [380]:
from pathlib import Path

In [394]:
for ii,file in enumerate(glob.glob('./mat-files/*.mat')):
    print(Path(file).stem)
    outfile = './feature-files/'+Path(file).stem+'.npy'
    np.save(outfile, np.asarray(all_features[ii]))

SZ_sl
NM_sl
S6_sl
S5_sl
SM_sl
VB_sl
TM_sl
SB_sl
RZ_sl
TS_sl


In [395]:
for ii,file in enumerate(glob.glob('./mat-files/*.mat')):
    B = np.asarray(all_features[ii])
    #B = A[0:11,:,:]
    a,b,ntrials = np.asarray(B).shape
    new_arr = B.reshape(a*b,ntrials)
    infIdx = np.where(abs(B)==np.inf)
    B[infIdx[0],infIdx[1],infIdx[2]] = B[infIdx[0],infIdx[1]-1,infIdx[2]]
    infIdx = np.where(np.isnan(B))
    B[infIdx[0],infIdx[1],infIdx[2]] = np.nanmean(B[infIdx[0],:,infIdx[2]])
    new_arr = B.reshape(a*b,ntrials)
    # pyod expects n_samples x n_features
    new_arr_trans = np.swapaxes(new_arr,0,1)
    clf = auto_encoder.AutoEncoder(epochs=20, contamination=.15)
    clf.fit(new_arr_trans)
    outlier_labels = np.where(clf.labels_)
    outfile = './outlier-files/'+Path(file).stem+'.npy'
    np.save(outfile,outlier_labels[0])
    outfile = './outlier-mat/rjct_'+Path(file).stem+'.mat'
    scipy.io.savemat(outfile, {'reject_list':clf.labels_})

Model: "sequential_177"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1239 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1062 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1240 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1063 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1241 (Dense)           (None, 64)                45824     
_________________________________________________________________
dropout_1064 (Dropout)       (None, 64)                0         
_________________________________________________________________
dense_1242 (Dense)           (None, 32)             

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_180"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1260 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1080 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1261 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1081 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1262 (Dense)           (None, 64)                45824     
______________________________

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_182"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1274 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1092 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1275 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1093 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1276 (Dense)           (None, 64)                45824     
______________________________

Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_184"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1288 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1104 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1289 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1105 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1290 (Dense)           (None, 64)                45824     
_________________________________________

Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  if __name__ == '__main__':


Model: "sequential_186"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1302 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1116 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1303 (Dense)           (None, 715)               511940    
_________________________________________________________________
dropout_1117 (Dropout)       (None, 715)               0         
_________________________________________________________________
dense_1304 (Dense)           (None, 64)                45824     
_________________________________________________________________
dropout_1118 (Dropout)       (None, 64)                0         
_________________________________________________________________
dense_1305 (Dense)           (None, 32)             

In [275]:
ii = 1

In [306]:
A = np.asarray(all_features[ii])
B = A[0:11,:,:]
a,b,ntrials = np.asarray(B).shape
new_arr = B.reshape(a*b,ntrials)

In [307]:
infIdx = np.where(abs(B)==np.inf)

In [308]:
infIdx

(array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3]),
 array([ 6,  9, 15, 16, 17, 18, 18, 19, 19, 21, 22, 26, 26, 29, 33, 34, 35,
        36, 41, 44, 45, 46, 49, 61, 64]),
 array([283, 100, 619,  73, 363, 545, 565, 375, 603, 568,  95, 212, 577,
        524, 308, 694, 618,  21, 719, 143, 153, 541, 500, 333, 156]))

In [310]:
B[0,0,1]

3.908827143945497

In [295]:
B[infIdx[0],infIdx[1],infIdx[2]] = B[infIdx[0],infIdx[1]-1,infIdx[2]]

In [297]:
infIdx = np.where(np.isnan(B))
B[infIdx[0],infIdx[1],infIdx[2]] = np.nanmean(B[infIdx[0],:,infIdx[2]])

In [280]:
B[5,:,:]=B[5,:,:]/(10*np.e**30)
B[6,:,:]=B[6,:,:]/(10*np.e**30)

In [281]:
B.shape

(11, 65, 786)

In [291]:
new_arr = B.reshape(a*b,ntrials)

In [325]:
new_arr_trans = np.swapaxes(new_arr,0,1)

In [326]:
clf = auto_encoder.AutoEncoder(epochs=20, contamination=.15)
clf.fit(new_arr_trans)

Model: "sequential_103"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_721 (Dense)            (None, 715)               511940    
_________________________________________________________________
dropout_618 (Dropout)        (None, 715)               0         
_________________________________________________________________
dense_722 (Dense)            (None, 715)               511940    
_________________________________________________________________
dropout_619 (Dropout)        (None, 715)               0         
_________________________________________________________________
dense_723 (Dense)            (None, 64)                45824     
_________________________________________________________________
dropout_620 (Dropout)        (None, 64)                0         
_________________________________________________________________
dense_724 (Dense)            (None, 32)             

AutoEncoder(batch_size=32, contamination=0.15, dropout_rate=0.2, epochs=20,
      hidden_activation='relu', hidden_neurons=[64, 32, 32, 64],
      l2_regularizer=0.1,
      loss=<function mean_squared_error at 0x7fbd99a8aa60>,
      optimizer='adam', output_activation='sigmoid', preprocessing=True,
      random_state=None, validation_size=0.1, verbose=1)

In [327]:
idx = np.where(clf.labels_)[0]

In [336]:
idx

array([  8,  12,  53,  56,  84,  94, 100, 121, 185, 191, 201, 206, 247,
       261, 269, 271, 282, 288, 295, 298, 301, 303, 306, 308, 313, 328,
       334, 338, 352, 357, 364, 379, 381, 382, 390, 391, 392, 394, 400,
       415, 420, 422, 432, 437, 442, 443, 446, 455, 457, 460, 465, 473,
       475, 478, 481, 484, 490, 502, 504, 511, 517, 520, 524, 532, 544,
       549, 550, 551, 552, 556, 558, 559, 560, 564, 566, 570, 573, 574,
       576, 578, 593, 600, 602, 603, 604, 605, 608, 609, 611, 614, 618,
       621, 628, 629, 630, 655, 667, 668, 669, 671, 672, 678, 680, 683,
       686, 688, 689, 694])

In [337]:
tolist(clf.labels_)

NameError: name 'tolist' is not defined

In [330]:
A=np.zeros(idx.shape[0])

In [331]:
A.shape

(108,)

In [328]:
new_arr_trans.shape

(721, 715)

In [322]:
X_train, X_test, y_train, y_test = utils.data.generate_data(n_train=10, n_test=10, contamination=.1)

In [320]:
from pyod import utils

In [323]:
X_train.shape

(10, 2)