In [1]:
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import preprocess
from sklearn.model_selection import train_test_split
import scipy.io as sio
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from collections import Counter
from imblearn.over_sampling import RandomOverSampler

# Load Data

In [2]:
%%time
rootdir = '../../data.nosync/'
interictal_count = preprocess.count_interictal_files(rootdir)
interictal_files = preprocess.get_interictal_files(rootdir, interictal_count)
preictal_files = preprocess.get_preictal_files(rootdir)
print('Interictal file count:', len(interictal_files))
print('Preictal file count:', len(preictal_files))

Interictal file count: 480
Preictal file count: 24
CPU times: user 3.04 ms, sys: 7.12 ms, total: 10.2 ms
Wall time: 20.4 ms


In [3]:
interictal_y = np.zeros(len(interictal_files))
preictal_y = np.ones(len(preictal_files))
y = np.concatenate((interictal_y, preictal_y), axis=0)
y.shape

(504,)

In [4]:
%%time
X_list = []
X_list.extend(interictal_files)
X_list.extend(preictal_files)
len(X_list)

CPU times: user 52 µs, sys: 8 µs, total: 60 µs
Wall time: 67 µs


504

In [5]:
X = np.array(X_list)
print(type(X))
X.shape

<class 'numpy.ndarray'>


(504,)

In [6]:
# summarize class distribution
print(Counter(y))

Counter({0.0: 480, 1.0: 24})


## Class Imbalance

In [7]:
X = X.reshape(-1, 1)
X.shape

(504, 1)

In [8]:
# define oversampling strategy
oversample = RandomOverSampler(sampling_strategy='minority')
X_over, y_over = oversample.fit_resample(X, y)
print(Counter(y_over))

Counter({0.0: 480, 1.0: 480})


In [9]:
%%time
X_train, X_test, y_train, y_test = train_test_split(X_over, y_over, test_size=0.33, random_state=42)
print(X_train.shape)
print(X_test.shape)

(643, 1)
(317, 1)
CPU times: user 2.35 ms, sys: 950 µs, total: 3.3 ms
Wall time: 4.62 ms


In [20]:
print(X_train.shape)
print(y_train.shape)

(643, 1)
(643,)


# Preprocess

In [10]:
def rescale_ch(signal):
    signal = signal.reshape((len(signal), 1))
    scaler = MinMaxScaler(feature_range=(0,1))
    scaler = scaler.fit(signal)
    rescaled = scaler.transform(signal)
    rescaled = rescaled.reshape((-1,))
    return rescaled

In [21]:
# rescale signal in range [0,1]
X_sig = np.zeros((643, 16, 239766))

for i in tqdm(range(len(X_train))):
    filepath = rootdir + X_train[i].item()
    mat = sio.loadmat(filepath)
    sigbuf = preprocess.get_sig(mat)
    rescaled_sigbuf = np.empty_like(sigbuf)
    for j in range(sigbuf.shape[0]):
        rescaled_sigbuf[j,:] = rescale_ch(sigbuf[j,:])
    X_sig[i] = rescaled_sigbuf

100%|██████████| 643/643 [01:35<00:00,  6.77it/s]


In [22]:
X_sig.shape

(643, 16, 239766)

# Build Model

Architecture is a fully convolutional neural network according to [Wang, 2016](https://arxiv.org/abs/1611.06455).

In [23]:
num_classes = 2

In [24]:
def make_model(input_shape):
    input_layer = keras.layers.Input(input_shape)
    
    conv1 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.ReLU()(conv1)
    
    conv2 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.ReLU()(conv2)
    
    conv3 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.ReLU()(conv3)
    
    gap = keras.layers.GlobalAveragePooling1D()(conv3)
    
    output_layer = keras.layers.Dense(num_classes, activation="softmax")(gap)
    
    return keras.models.Model(inputs=input_layer, outputs=output_layer)

model = make_model(input_shape=X_sig.shape[1:])

# Build Model

In [25]:
X_sig.shape

(643, 16, 239766)

In [26]:
y_train.shape

(643,)

In [27]:
%%time
epochs = 200
batch_size = 32

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model.h5", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)

history = model.fit(
    X_sig,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2,
    verbose=1,
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200

KeyboardInterrupt: 

# Evaluate Model

In [28]:
# model = keras.models.load_model("best_model.h5")
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)



ValueError: in user code:

    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1233 test_function  *
        return step_function(self, iterator)
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1224 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1217 run_step  **
        outputs = model.test_step(data)
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1183 test_step
        y_pred = self(x, training=False)
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1012 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:424 call
        return self._run_internal_graph(
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:560 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /Users/sameenislam/anaconda3/envs/ml/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:234 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer conv1d_3 is incompatible with the layer: : expected min_ndim=3, found ndim=2. Full shape received: (None, 1)


# Plot Loss

In [None]:
metric = "sparse_categorical_accuracy"
plt.figure()
plt.plot(history.history[metric])
plt.plot(history.history["val_" + metric])
plt.title("model " + metric)
plt.ylabel(metric, fontsize="large")
plt.xlabel("epoch", fontsize="large")
plt.legend(["train", "val"], loc="best")

The loss characteristics is indicative of imbalanced classes. See [accuracy paradox](https://en.wikipedia.org/wiki/Accuracy_paradox).