In [1]:
import numpy as np
import importlib 
import keras
from keras.models import Sequential, model_from_json
from keras.layers import Conv2DTranspose, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, UpSampling2D, Reshape

from keras.layers.normalization import BatchNormalization

import pickle
import os
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
import h5py    

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

In [3]:
!pip install keras

Collecting keras
  Using cached https://files.pythonhosted.org/packages/54/e8/eaff7a09349ae9bd40d3ebaf028b49f5e2392c771f294910f75bb608b241/Keras-2.1.6-py2.py3-none-any.whl
Collecting pyyaml (from keras)
Collecting six>=1.9.0 (from keras)
  Using cached https://files.pythonhosted.org/packages/67/4b/141a581104b1f6397bfa78ac9d43d8ad29a7ca43ea90a2d863fe3056e86a/six-1.11.0-py2.py3-none-any.whl
Collecting scipy>=0.14 (from keras)
  Using cached https://files.pythonhosted.org/packages/2a/f3/de9c1bd16311982711209edaa8c6caa962db30ebb6a8cc6f1dcd2d3ef616/scipy-1.1.0-cp27-cp27mu-manylinux1_x86_64.whl
Collecting h5py (from keras)
  Using cached https://files.pythonhosted.org/packages/24/9e/d68bd01058e748bd5e7c3c6368d1703b4cd882b669e5d993a0237c75af5a/h5py-2.7.1-cp27-cp27mu-manylinux1_x86_64.whl
Collecting numpy>=1.9.1 (from keras)
  Using cached https://files.pythonhosted.org/packages/c0/e7/08f059a00367fd613e4f2875a16c70b6237268a1d6d166c6d36acada8301/numpy-1.14.3-cp27-cp27mu-manylinux1_x86_64.whl
In

In [3]:
def read_h5_file(file_name, scaler = None, preprocess = False):
    h5_file = h5py.File(train_eeg_dir + file_name, 'r')
    a_group_key = list(h5_file.keys())[0]
    eeg_data = np.array(h5_file[a_group_key]).T
    if preprocess:
        eeg_data = scaler.transform(eeg_data)
    return eeg_data

In [4]:
def train_scaler(scaler, train_eeg_names, log = False):
    i = 0
    for eeg_name in train_eeg_names:
        if log:
            print("{} from {}".format(i, len(train_eeg_names)))
            print("reading:{}".format(eeg_name))
        data = read_h5_file(eeg_name)
        i = i+1
        scaler.fit(data)
        if log:
            print("trained on {}".format(eeg_name))
            
def save_scaler(path,scaler):
    pickle.dump(scaler, open(path, 'wb'))
def load_scaler(path):
    scaler = pickle.load(open(path, 'rb'))
    return scaler

In [5]:
train_eeg_dir = "./data/train/"
trained_scaler_path = None

In [6]:
train_eeg_dir = "./data/train/"
all_train_eeg_names = [x for x in os.listdir(train_eeg_dir) 
                 if x[-3:] == ".h5"]
eeg_num = len(all_train_eeg_names)
print("Number of EEG overall:", eeg_num)

('Number of EEG overall:', 32)


In [7]:
if trained_scaler_path:
    scaler = load_scaler(trained_scaler_path)
else:
    scaler = StandardScaler()
    print("Params before training ", scaler.get_params())
    train_scaler(scaler, all_train_eeg_names, log = True)
    print("Params after training ", scaler.get_params())
    save_scaler("./StandardScaler.p", scaler)

('Params before training ', {'copy': True, 'with_mean': True, 'with_std': True})
0 from 32
reading:2003_ivanova_post_eeg_processed.h5
trained on 2003_ivanova_post_eeg_processed.h5
1 from 32
reading:2403_kutuzova_posteeg_processed.h5
trained on 2403_kutuzova_posteeg_processed.h5
2 from 32
reading:miloslavov_22_05_pre_eeg_processed.h5
trained on miloslavov_22_05_pre_eeg_processed.h5
3 from 32
reading:2505_shirokova_post_eeg_processed.h5
trained on 2505_shirokova_post_eeg_processed.h5
4 from 32
reading:2505_shirokova_processed.h5
trained on 2505_shirokova_processed.h5
5 from 32
reading:zavrin_15021500_eyesclosed_post_eeg_processed.h5
trained on zavrin_15021500_eyesclosed_post_eeg_processed.h5
6 from 32
reading:gorin_rest_eeg_post_31011200_processed.h5
trained on gorin_rest_eeg_post_31011200_processed.h5
7 from 32
reading:zavrin_eyes_closed_eeg_15021500_processed.h5
trained on zavrin_eyes_closed_eeg_15021500_processed.h5
8 from 32
reading:gorin_310117_rest_eeg_processed.h5
trained on gorin

In [13]:
window_size = 10
encoding_dim = 50

cnnencoder = Sequential((
    Conv2D(nb_filter=5, kernel_size=(5, 5), activation='relu', padding='valid', input_shape=(window_size, 58, 1)),
    Dropout(0.6),
    MaxPooling2D(),
    
    Flatten(),
    Dense(encoding_dim, activation='relu'),
))
cnnencoder.summary()
print(cnnencoder.output_shape)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 6, 54, 5)          130       
_________________________________________________________________
dropout_4 (Dropout)          (None, 6, 54, 5)          0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 3, 27, 5)          0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 405)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 50)                20300     
Total params: 20,430
Trainable params: 20,430
Non-trainable params: 0
_________________________________________________________________
(None, 50)


  """


In [14]:
cnndecoder = Sequential((
    Dense(405, activation='relu', input_shape=(encoding_dim,)),
    Reshape((-1, 27, 5)),
    UpSampling2D(),
    Conv2DTranspose(nb_filter=5, kernel_size=(5, 5), activation='relu',  padding='valid'),
    Dense(1, activation='relu')
))
cnndecoder.summary()
print(cnndecoder.output_shape)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 405)               20655     
_________________________________________________________________
reshape_3 (Reshape)          (None, 3, 27, 5)          0         
_________________________________________________________________
up_sampling2d_3 (UpSampling2 (None, 6, 54, 5)          0         
_________________________________________________________________
conv2d_transpose_3 (Conv2DTr (None, 10, 58, 5)         630       
_________________________________________________________________
dense_10 (Dense)             (None, 10, 58, 1)         6         
Total params: 21,291
Trainable params: 21,291
Non-trainable params: 0
_________________________________________________________________
(None, 10, 58, 1)


  """


In [15]:
from keras.models import Model
from keras.layers import Input

input_ = Input(shape=(window_size, 58, 1))

autoencoder = Model(input_, cnndecoder(cnnencoder(input_)), name="autoencoder")
autoencoder.compile(loss='mse', optimizer='adam') # .compile(optimizer='adadelta', loss='binary_crossentropy')

autoencoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 10, 58, 1)         0         
_________________________________________________________________
sequential_6 (Sequential)    (None, 50)                20430     
_________________________________________________________________
sequential_7 (Sequential)    (None, 10, 58, 1)         21291     
Total params: 41,721
Trainable params: 41,721
Non-trainable params: 0
_________________________________________________________________


In [16]:
b1, b2, b3 = 'shuhova_08022017_rest_eeg_processed.h5', 'zavrib_post_eeg_eyesopen15021500_processed.h5', 'zavrin_15021500_eyesclosed_post_eeg_processed.h5'

In [17]:
all_train_eeg_names = np.array(all_train_eeg_names)
all_train_eeg_names = all_train_eeg_names[(all_train_eeg_names != b1) & (all_train_eeg_names != b2) & (all_train_eeg_names != b3)]

In [18]:
overall_epoch_num = 10
file_epoch_num = 1
history_path = "train_hist"

test_eeg_name = np.random.choice(all_train_eeg_names)
train_eeg_names = np.array(all_train_eeg_names)
print("test_eeg_name is ", test_eeg_name)
test_data = read_h5_file(test_eeg_name, scaler, True)

('test_eeg_name is ', 'tsoy_pre_eeg_2504_processed.h5')


In [19]:
learn_file_length = 300000

In [20]:
import threading
class threadsafe_iter:
    def __init__(self, it):
        self.it = it
        self.lock = threading.Lock()
    def __iter__(self):
        return self
    def __next__(self):
        with self.lock:
            return next(self.it)

def threadsafe_generator(f):
    def g(*a, **kw):
        return threadsafe_iter(f(*a, **kw))
    return g

In [21]:
batch_length = 10
def generate_batch():
    while True:
        cur_files = [[] for _ in range(len(train_eeg_names))]
        batches = [[] for _ in range(len(train_eeg_names))]
        files_count = len(train_eeg_names)
        while files_count > 0:
            file_ind = np.random.choice(np.arange(len(train_eeg_names)))
            if len(cur_files[file_ind]) == 1 and cur_files[file_ind] == -1: continue
            elif len(cur_files[file_ind]) < 1:
                raw = read_h5_file(train_eeg_names[file_ind], scaler, True)
                cur_files[file_ind] = raw
                #print(raw.shape[0] // batch_length)
                batches[file_ind] = np.arange(raw.shape[0] // batch_length)
                
            begin = np.random.choice(np.arange(len(batches[file_ind])))
            
            data = cur_files[file_ind][begin:begin+batch_length, :]
            yield data.reshape(-1, window_size, 58, 1), data.reshape(-1, window_size, 58, 1) # add noise later
            
            batches[file_ind] = np.delete(batches[file_ind], begin)
            if len(batches[file_ind]) == 0:
                cur_files[file_ind] = -1
                files_count -= 1

In [None]:
history = autoencoder.fit_generator(generate_batch(), 
                                    samples_per_epoch=30000, 
                                    verbose=1,
                                    nb_epoch=10,
                                    validation_data=(test_data.reshape(-1, window_size, 58, 1), test_data.reshape(-1, window_size, 58, 1))
                                   )

  """
  """


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

In [137]:
history_path = "train_hist_51.txt"

In [138]:
cnnencoder.save('CNN_encoder50.p')
autoencoder.save('CNN_autoencoder50.p')
with open(history_path, 'wb') as file:
    pickle.dump(history.history, file)

In [139]:
for epoch in range(overall_epoch_num//file_epoch_num):
    for name in train_eeg_names:
        train_data = read_h5_file(name, scaler, True)
        if len(train_data) > learn_file_length:
            train_data = train_data[:learn_file_length]
        print("epoch: {}, file: {}".format(epoch, name))
        history = autoencoder.fit(train_data.reshape(-1, window_size, 58, 1), train_data.reshape(-1, window_size, 58, 1), 
                                  verbose=1, 
                                  epochs=file_epoch_num,
                                  batch_size = 10,
                                  validation_data=(test_data.reshape(-1, window_size, 58, 1), test_data.reshape(-1, window_size, 58, 1)))
cnnencoder.save('CNN_encoder3.p')
autoencoder.save('CNN_autoencoder3.p')
with open(history_path, 'wb') as file:
    pickle.dump(history.history, file)

epoch: 0, file: 2003_ivanova_post_eeg_processed.h5
Train on 30000 samples, validate on 60495 samples
Epoch 1/1
epoch: 0, file: 2403_kutuzova_posteeg_processed.h5
Train on 30000 samples, validate on 60495 samples
Epoch 1/1

KeyboardInterrupt: 

In [None]:
test_data.reshape(-1, 60, 58, 1)

In [14]:
test_data.shape

(604950, 58)

In [None]:
'shuhova_08022017_rest_eeg_processed.h5', 'zavrib_post_eeg_eyesopen15021500_processed.h5', 