In [2]:
import sys
sys.path.insert(0, "/users/sista/jdan/neureka/neureka-codebase/vizualize-seizures")
EDF_ROOT = '/esat/biomeddata/Neureka_challenge/edf/dev/'

import glob
import h5py
from keras.models import Sequential
from keras.layers import Bidirectional, Dense, GRU, LSTM
import numpy as np
import os
import pathlib
import resampy
from scipy.io import loadmat

import nedc
import spir

Using TensorFlow backend.


In [8]:
def load_filenames():
    filenames = list()
    with h5py.File('evaluation/prediction_test_iclabel.h5', 'r') as f:
        filenames = list(f['filenames'])
    return filenames


def prepare_file(file_i, filename, classifiers, f_nick, model_type):    
    # Load data
    x = list()
    for classifier in classifiers:
        if classifier['format'] == 'nick':
            z = list(f_nick[classifier['name']]['filenames'])
            file_i =  z.index(filename)
            predictions = f_nick[classifier['name']]['signals'][file_i]
            predictions = downsample(predictions, 200, fs)
        elif classifier['format'] == 'kaat':
            predictions = loadmat(os.path.join(classifier['dir'], filename + classifier['file']))
            predictions = predictions['score_predict'][0]
            predictions = np.convolve(predictions, [0.5, 0.5], mode='valid')
            if len(predictions) > len(x[0]):
                predictions = predictions[:-(len(predictions)-len(x[0]))]
            elif len(predictions) < len(x[0]):
                predictions = np.concatenate((predictions, np.zeros(((len(x[0])-len(predictions))), )))
        x.append(np.array(predictions, dtype=float))
        
    x = np.array(x)
    x = np.transpose(x)
    if model_type == 'lstm' or model_type == 'gru':
        x = x.reshape((len(x), 1, len(x[0])))

    
    return x

class AvgModel:
    def fit(*argv, **kwargs):
        return 0
    
    def reset_states(*argv, **kwargs):
        return 0
    
    def predict(x, *argv, **kwargs):
        if np.ndim(x) > 1:
            return np.mean(x, axis=1)
        else:
            return x

def downsample(x, oldFs, newFs):
    return resampy.resample(x, oldFs, newFs)


def findTse(filename):
    result = glob.glob(os.path.join(EDF_ROOT, '*', filename[3:6], filename.split('_')[0], filename.split('_')[1] + '_' + '[0-9_]*', filename + '.tse'))
    return result[0]


def build_model(n_input, model_type, complexity=None):
    if model_type == 'lstm':
        model = Sequential()
        model.add(Bidirectional(LSTM(complexity, stateful=True, return_sequences=False),
                                input_shape=(1, n_input), batch_size=1))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='mse', optimizer='adam')
    elif model_type == 'gru':
        model = Sequential()
        model.add(Bidirectional(GRU(complexity, stateful=True, return_sequences=False),
                                input_shape=(1, n_input), batch_size=1))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='mse', optimizer='adam')
    elif model_type == 'dense':
        model = Sequential()
        model.add(Dense(1, activation='sigmoid', input_shape=(n_input, ), batch_size=1))
        model.compile(loss='mse', optimizer='adam')
    elif model_type == 'avg':
        model = AvgModel
    return model


def train(model, model_type, classifiers, filenames):
    if model_type == 'avg':
        return 0
    
    # Preload Nick data
    f_nick = dict()
    f_nick['ICA'] = h5py.File('evaluation/prediction_test_iclabel.h5', 'r')
    f_nick['DNN'] = h5py.File('evaluation/prediction_test_raw.h5', 'r')
    f_nick['DNN-wiener'] = h5py.File('evaluation/prediction_test_wiener.h5', 'r')
    
    # Train
    for i, filename in enumerate(filenames):
        x, y = prepare_file(i, filename, classifiers, f_nick, model_type)
        if np.any(y):
            model.fit(x, y, batch_size=1, epochs=15, verbose=1)
        else:
            model.fit(x, y, batch_size=1, epochs=1, verbose=1)
        model.reset_states()
        
    # Close Nick data
    for key in f_nick:
        f_nick[key].close()

        
def test(model, modeltype, modelName, classifiers, filenames):
    # Preload Nick data
    f_nick = dict()
    f_nick['ICA'] = h5py.File('evaluation/prediction_test_iclabel.h5', 'r')
    f_nick['DNN'] = h5py.File('evaluation/prediction_test_raw.h5', 'r')
    f_nick['DNN-wiener'] = h5py.File('evaluation/prediction_test_wiener.h5', 'r')
    
    # Predict probabilities
    results = list()
    for i, filename in enumerate(filenames):
        x, y = prepare_file(i, filename, classifiers, f_nick, modeltype)
        u = model.predict(x, batch_size=1)
        model.reset_states()
        results.append(u)
        
    # Build ROC curves
    for threshold in np.arange(0.05, 1, step=0.05):
        # Save results
        resultDir = "results/{}".format(modelName)
        resultFile = "{}.txt".format(str(threshold).split('.')[1][0:2])
        pathlib.Path(resultDir).mkdir(parents=True, exist_ok=True)
        with open(os.path.join(resultDir, resultFile), 'w') as handle:
            for i, filename in enumerate(filenames):
                events = spir.mask2eventList(results[i].flatten() > threshold, fs)
                for event in events:
                    handle.write('{} {} {} 1\n'.format(filename, event[0], event[1]))
                    
    # Close Nick data
    for key in f_nick:
        f_nick[key].close()

In [9]:
fs = 1

classifiers = [{
    'name': 'ICA',
    'file': 'evaluation/prediction_test_iclabel.h5',
    'fs': 200,
    'format': 'nick',    
},
    {
    'name': 'DNN',
    'file': 'evaluation/prediction_test_raw.h5',
    'fs': 200,
    'format': 'nick',    
},
{
    'name': 'DNN-wiener',
    'file': 'evaluation/prediction_test_wiener.h5',
    'fs': 200,
    'format': 'nick',
}
]

modeltype = 'lstm'
complexity = 4

filenames = load_filenames()
#model = build_model(len(classifiers), modeltype, complexity)

from keras.models import load_model
model = load_model('../vizualize-seizures/results/model-dnn-dnnw-dnnicalbl-lstm-4.h5')
#train(model, modeltype, classifiers, filenames[0:100])
#test(model, modeltype, 'all-{}-{}'.format(modeltype, complexity), classifiers, filenames[100:200])


In [10]:
f_nick = dict()
f_nick['ICA'] = h5py.File('evaluation/prediction_test_iclabel.h5', 'r')
f_nick['DNN'] = h5py.File('evaluation/prediction_test_raw.h5', 'r')
f_nick['DNN-wiener'] = h5py.File('evaluation/prediction_test_wiener.h5', 'r')

# Predict probabilities
results = list()
for i, filename in enumerate(filenames):
    x = prepare_file(i, filename, classifiers, f_nick, modeltype)
    u = model.predict(x, batch_size=1)
    model.reset_states()
    results.append(u)

In [15]:
pathlib.Path('tmp').mkdir(parents=True, exist_ok=True)
try:
    os.remove(os.path.join('evaluation', 'hyp_lstm.txt'))
except FileNotFoundError:
    print('could not remove ref or hyp.txt')

threshold = 0.55


for i, filename in enumerate(filenames):
    
    hyp = spir.mask2eventList((results[i].flatten() - np.median(results[i].flatten())) > threshold, fs)
    #hyp = spir.mask2eventList(results[i] > threshold, fs)
    hyp = spir.merge_events(hyp, 30)
    
    if len(hyp):
        amp = list()
        for event in hyp:
            amp.append(np.mean(results[i].flatten()[int(event[0]*fs):int(event[1]*fs)]))
        amp = np.array(amp)
        amp /= np.max(amp)

        hyp = list(np.array(hyp)[amp > 0.82])
    
    with open(os.path.join('evaluation', 'hyp_lstm.txt'), 'a') as handle:
        for event in hyp:
            if event[1] - event[0] > 15:
                amp = np.sum(results[i][int(event[0]*fs):int(event[1]*fs)])
                handle.write('{} {} {} 1.0 15\n'.format(filename, event[0]+1, event[1]-1))
