In [2]:
%load_ext autoreload
%autoreload 2
%load_ext watermark
%watermark -v -n -m -p numpy,scipy,sklearn,pandas

Sat Jan 19 2019 

CPython 3.6.6
IPython 6.5.0

numpy 1.15.1
scipy 1.1.0
sklearn 0.19.1
pandas 0.23.4

compiler   : GCC 4.8.2 20140120 (Red Hat 4.8.2-15)
system     : Linux
release    : 4.9.0-7-amd64
machine    : x86_64
processor  : 
CPU cores  : 12
interpreter: 64bit


In [4]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import nolds
import data
import mne

from data.data_files import CHANNEL_NAMES, DataKind, files_builder
from measures import algorithms as algos

PROJ_ROOT = os.getenv('THESIS_ROOT')
DATA_ROOT = os.path.abspath(os.path.join(PROJ_ROOT, 'data'))
PROCESSED_ROOT = os.path.abspath(os.path.join(DATA_ROOT, 'processed'))
RAW_ROOT = os.path.abspath(os.path.join(DATA_ROOT, 'raw'))
LABELED_ROOT = os.path.abspath(os.path.join(DATA_ROOT, 'labeled'))
DURATIONS_ROOT = os.path.abspath(os.path.join(DATA_ROOT, 'durations'))
REC_ROOT = os.path.abspath(os.path.join(DATA_ROOT, 'recplots'))
print(PROJ_ROOT)
print(DATA_ROOT)
print(PROCESSED_ROOT)
import sys
sys.path.append(os.path.join(PROJ_ROOT, 'src'))
CHANNEL_NAMES = ['FP1', 'FP2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1', 'O2',
                 'F7', 'F8', 'T3', 'T4', 'T5', 'T6', 'Fz', 'Cz', 'Pz']
META_COLUMN_NAMES = ['freq', 'RESP_4W', 'RESP_FIN', 'REMISE_FIN', 'AGE', 'SEX', 'M_1',
       'M_4', 'M_F', 'délka léčby', 'lék 1', 'lék 2', 'lék 3', 'lék 4']
META_FILE_NAME = 'DEP-POOL_Final_144.xlsx'
meta_df = pd.read_excel(os.path.join(RAW_ROOT, META_FILE_NAME), index_col='ID', names=META_COLUMN_NAMES)

raw_fif = mne.io.read_raw_fif(os.path.join(PROCESSED_ROOT, '50a.fif'))
t = pd.DataFrame(raw_fif.get_data())
data = pd.DataFrame(np.transpose(t.values), columns=CHANNEL_NAMES)
data = np.transpose(data.values)

/home/kovar/thesis_project/
/home/kovar/thesis_project/data
/home/kovar/thesis_project/data/processed
Opening raw data file /home/kovar/thesis_project/data/processed/50a.fif...
This filename (/home/kovar/thesis_project/data/processed/50a.fif) does not conform to MNE naming conventions. All raw files should end with raw.fif, raw_sss.fif, raw_tsss.fif, raw.fif.gz, raw_sss.fif.gz or raw_tsss.fif.gz
Isotrak not found
    Range : 0 ... 16930 =      0.000 ...    67.720 secs
Ready.


  raw_fif = mne.io.read_raw_fif(os.path.join(PROCESSED_ROOT, '50a.fif'))


# Compute recurrence plot

In [5]:
from scipy.spatial.distance import pdist, squareform

def rec_plot(s, eps=0.10, steps=10):
    d = pdist(s[:,None])
    d = np.floor(d/eps)
    d[d>steps] = steps
    Z = squareform(d).astype('int8')
    return Z

In [9]:
def compute_rp(file, minl=1000, maxl=1000):
    res = np.zeros((maxl, maxl, len(CHANNEL_NAMES)))
    for i, channel in enumerate(CHANNEL_NAMES):
        data = file.df[channel].values
        length = len(data) if maxl is None else min(maxl, len(data))
        res[:, :, i] = rec_plot(data[:length])
    np.save(os.path.join(REC_ROOT, ''.join((str(file.id), file.trial, '.npy'))), res)
    return res

In [10]:
import logging
mne.set_log_level(logging.ERROR)
for file in files_builder(DataKind('processed')):
    res = compute_rp(file)

# Training

In [4]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from keras.utils import to_categorical
from keras.models import Sequential
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.model_selection import GridSearchCV, train_test_split
from data.data_files import CHANNEL_NAMES, DataKind, files_builder

In [5]:
image_size = 1000
num_channels = 1
batch_size = 10
num_epochs=25
dropout_rate = 0.5

In [6]:
import numpy as np
from keras.utils import Sequence

class batch_generator(Sequence):

    def __init__(self, filenames, labels, batch_size):
        self.filenames, self.labels = filenames, labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.filenames) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.filenames[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]

        # TODO: Here we can select the channels
        return np.array([
            np.expand_dims(np.load(file_name.decode('ascii')), axis=-1) for file_name in batch_x]), np.array(batch_y).astype('int8')

In [7]:
def define_model(dropout_rate=dropout_rate):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(image_size, image_size, num_channels), batch_size=batch_size, data_format='channels_last'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(2))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    
    return model

In [8]:
def remove_middle(filenames, labels):
    ta = zip(filenames, labels)
    ta = np.array([(t, l) for t, l in ta], dtype=[('fname', 'S100'), ('label', 'int8')])
    ta = ta[:][(ta['label'] == -1) | (ta['label'] == 1)]
    return ta['fname'], ta['label']

In [9]:
fb = files_builder(DataKind('recplot'))
seed = 123
fns = [fn[1] for fn in fb.file_names(True)]
filenames, labels = remove_middle(fns, fb.get_labels())
unique, counts = np.unique(labels, return_counts=True)
print('Overall distribution: ', dict(zip(unique, counts)))
training_filenames, validation_filenames, training_labels, validation_labels = \
    train_test_split(filenames, labels, test_size=0.3, random_state=seed)
unique, counts = np.unique(training_labels, return_counts=True)
print('Training distribution: ', dict(zip(unique, counts)))
unique, counts = np.unique(validation_labels, return_counts=True)
print('Testing distribution: ', dict(zip(unique, counts)))
training_labels = to_categorical(training_labels, 2)
validation_labels = to_categorical(validation_labels, 2)

Overall distribution:  {-1: 62, 1: 56}
Training distribution:  {-1: 45, 1: 37}
Testing distribution:  {-1: 17, 1: 19}


In [None]:
training_batch_generator = batch_generator(training_filenames, training_labels, batch_size)
validation_batch_generator = batch_generator(validation_filenames, validation_labels, batch_size)

model = define_model()
model.fit_generator(generator=training_batch_generator,
                                      steps_per_epoch=(len(training_filenames) // batch_size),
                                      epochs=num_epochs,
                                      verbose=1,
                                      validation_data=validation_batch_generator,
                                      validation_steps=(len(validation_filenames) // batch_size),
                                      use_multiprocessing=False,)
                                      # workers=None,
                                      # max_queue_size=None)

Epoch 1/25


In [None]:

model = define_model()
model.fit(
        train_dataset, train_labels,
        batch_size=batch_size,
#         steps_per_epoch=2000 // batch_size,
        epochs=epochs,
#         validation_data=(valid_dataset, valid_labels),
#         validation_steps=800 // batch_size
        )

print(model.evaluate(test_dataset, test_labels, batch_size=batch_size))
# model.save_weights('.h5') 

In [1]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

Using TensorFlow backend.


[]