<a href="https://colab.research.google.com/github/mauricio-ms/motor-imagery-convolutional-recurrent-neural-network/blob/master/notebooks/cnn-1d.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Verify if GPU is enabled

In [1]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != "/device:GPU:0":
  raise SystemError("GPU device not found")
print("Found GPU at: {}".format(device_name))

Found GPU at: /device:GPU:0


Mount Google Drive directory

In [2]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Install needed packages

In [3]:
!pip install colorlog



In [4]:
import logging
import colorlog


def get_logger(name="__main__", debug=False):
    log_format = (
        "[%(levelname)s] "
        "%(asctime)s - "
        "%(name)s: "
        "%(funcName)s - "
        "%(message)s"
    )
    bold_seq = "\033[1m"
    colorlog_format = (
        f"{bold_seq} "
        "%(log_color)s "
        f"{log_format}"
    )
    colorlog.basicConfig(format=colorlog_format)

    logger = logging.getLogger(name)
    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    # Output full log
    fh = logging.FileHandler("app.log")
    fh.setLevel(logging.DEBUG)
    formatter = logging.Formatter(log_format)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    return logger

In [5]:
import os
import numpy as np

LOGGER = get_logger("physionet.py")
TFRECORD_FILES_DIR = "drive/My Drive/motor-imagery-convolutional-recurrent-neural-network/preprocessed-tfrecord-files"

In [8]:
def load_data(train_size=0.75, validation_size=None, n_subjects=None, **kwargs):
    LOGGER.info("Loading Physionet dataset ...")
    subjects = np.array(sorted(os.listdir(TFRECORD_FILES_DIR)))
    if n_subjects is not None:
        np.random.shuffle(subjects)
        subjects = subjects[:n_subjects]
    train_subjects, test_subjects = _train_test_split_subjects(subjects, train_size)
    if validation_size is not None:
        train_subjects, validation_subjects = _train_test_split_subjects(train_subjects, 1-validation_size)
        LOGGER.info(f"(Train, Validation, Test) Subjects = "
                    f"({len(train_subjects)}, {len(validation_subjects)}, {len(test_subjects)})")
        LOGGER.info(f"Train subjects: {train_subjects}")
        LOGGER.info(f"Validation subjects: {validation_subjects}")
        LOGGER.info(f"Test subjects: {test_subjects}")
        return _load_set(train_subjects, **kwargs), \
            _load_set(validation_subjects, **kwargs), \
            _load_set(test_subjects, **kwargs)

    LOGGER.info(f"(Train, Test) Subjects = ({len(train_subjects)}, {len(test_subjects)})")
    LOGGER.info(f"Train subjects: {train_subjects}")
    LOGGER.info(f"Test subjects: {test_subjects}")
    return _load_set(train_subjects, **kwargs), _load_set(test_subjects, **kwargs)


def _train_test_split_subjects(subjects, train_size):
    train_subjects_mask = np.random.rand(len(subjects)) < train_size
    return subjects[train_subjects_mask], subjects[~train_subjects_mask]


# TODO - convert_to_2d should define the data directory
def _load_set(subjects, n_readers=tf.data.experimental.AUTOTUNE, 
              n_parse_threads=tf.data.experimental.AUTOTUNE, 
              batch_size=100, convert_to_2d=False, expand_dim=False):
    path_files = [os.path.join(TFRECORD_FILES_DIR, subject, file_name)
                  for subject in subjects
                  for file_name in sorted(os.listdir(os.path.join(TFRECORD_FILES_DIR, subject)))]
    dataset = tf.data.Dataset.list_files(path_files)
    dataset = dataset.interleave(
        lambda filepath: tf.data.TFRecordDataset(filepath, compression_type="GZIP"),
        cycle_length=n_readers, num_parallel_calls=n_parse_threads)
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(lambda r: _preprocess(r, expand_dim=expand_dim),
                          num_parallel_calls=n_parse_threads)
    dataset = dataset.cache()
    return dataset.prefetch(tf.data.experimental.AUTOTUNE)


@tf.function
def _preprocess(serialized_eeg_records, expand_dim=False):
    n_channels = 64
    feature_description = {
      "X": tf.io.FixedLenFeature([n_channels], tf.float32),
      "y": tf.io.FixedLenFeature([], tf.int64)
    }
    parsed_eeg_records = tf.io.parse_example(serialized_eeg_records, feature_description)
    
    X = parsed_eeg_records["X"]
    y = parsed_eeg_records["y"]
    if expand_dim:
        X = X[..., np.newaxis]
    return X, y

In [9]:
train_set, test_set = load_data(expand_dim=True)

[1m [32m [INFO] 2020-08-17 11:06:14,814 - physionet.py: load_data - Loading Physionet dataset ...[0m
[1m [32m [INFO] 2020-08-17 11:06:14,820 - physionet.py: load_data - (Train, Test) Subjects = (83, 25)[0m
[1m [32m [INFO] 2020-08-17 11:06:14,823 - physionet.py: load_data - Train subjects: ['S002' 'S006' 'S007' 'S008' 'S010' 'S011' 'S012' 'S013' 'S014' 'S015'
 'S016' 'S017' 'S018' 'S019' 'S021' 'S023' 'S024' 'S025' 'S026' 'S027'
 'S028' 'S029' 'S030' 'S031' 'S032' 'S034' 'S035' 'S036' 'S037' 'S038'
 'S039' 'S040' 'S041' 'S042' 'S043' 'S045' 'S046' 'S047' 'S049' 'S050'
 'S051' 'S053' 'S056' 'S058' 'S059' 'S061' 'S062' 'S063' 'S064' 'S065'
 'S066' 'S067' 'S069' 'S071' 'S072' 'S073' 'S075' 'S076' 'S077' 'S078'
 'S079' 'S080' 'S081' 'S082' 'S083' 'S085' 'S086' 'S087' 'S088' 'S090'
 'S093' 'S094' 'S095' 'S096' 'S097' 'S098' 'S100' 'S102' 'S104' 'S105'
 'S106' 'S107' 'S108'][0m
[1m [32m [INFO] 2020-08-17 11:06:14,825 - physionet.py: load_data - Test subjects: ['S001' 'S003' 'S004' 

In [10]:
from tensorflow import keras

with tf.device("/device:GPU:0"):
  model = keras.models.Sequential([
      keras.layers.Conv1D(32, 3, activation="relu",
                          kernel_initializer="he_normal",
                          padding="SAME",
                          input_shape=[64, 1]),
      keras.layers.Conv1D(64, 3, activation="relu",
                          kernel_initializer="he_normal",
                          padding="SAME"),
      keras.layers.Conv1D(128, 3, activation="relu",
                          kernel_initializer="he_normal",
                          padding="SAME"),
      keras.layers.Flatten(),
      keras.layers.Dropout(0.5),
      keras.layers.Dense(1024, activation="relu",
                        kernel_initializer="he_normal"),
      keras.layers.Dropout(0.5),
      keras.layers.Dense(5, activation="softmax")
  ])

  optimizer = keras.optimizers.Adam(lr=1e-4)
  model.compile(loss="sparse_categorical_crossentropy",
                optimizer=optimizer,
                metrics=["accuracy"])

In [11]:
LOGGER.info("Starting training CNN 1D model ...")
model.fit(train_set, epochs=10, verbose=2)
model.save("drive/My Drive/motor-imagery-convolutional-recurrent-neural-network/cnn_1d_until_epoch_10.h5")
LOGGER.info("Training CNN 1D model end!")

[1m [32m [INFO] 2020-08-17 11:08:08,600 - physionet.py: <module> - Starting training CNN 1D model ...[0m


Epoch 1/10
105739/105739 - 1145s - loss: 1.6147 - accuracy: 0.5289
Epoch 2/10
105739/105739 - 1114s - loss: 1.3296 - accuracy: 0.5316
Epoch 3/10
105739/105739 - 1113s - loss: 1.3413 - accuracy: 0.5330
Epoch 4/10
105739/105739 - 1113s - loss: 1.3335 - accuracy: 0.5332
Epoch 5/10
105739/105739 - 1109s - loss: 1.3782 - accuracy: 0.5342
Epoch 6/10
105739/105739 - 1109s - loss: 1.3520 - accuracy: 0.5343
Epoch 7/10
105739/105739 - 1108s - loss: 1.3319 - accuracy: 0.5339
Epoch 8/10
105739/105739 - 1109s - loss: 1.3280 - accuracy: 0.5346
Epoch 9/10
105739/105739 - 1109s - loss: 1.3538 - accuracy: 0.5353
Epoch 10/10
105739/105739 - 1105s - loss: 1.3565 - accuracy: 0.5352


[1m [32m [INFO] 2020-08-17 14:13:50,078 - physionet.py: <module> - Training CNN 1D model end![0m


In [None]:
LOGGER.info("Continue training CNN 1D model ...")
model.fit(train_set, epochs=10, verbose=2)
model.save("drive/My Drive/motor-imagery-convolutional-recurrent-neural-network/cnn_1d_until_epoch_20.h5")
LOGGER.info("Training CNN 1D model end!")

[1m [32m [INFO] 2020-08-17 14:17:51,371 - physionet.py: <module> - Continue training CNN 1D model ...[0m


Epoch 1/10
105739/105739 - 1105s - loss: 1.3398 - accuracy: 0.5353
Epoch 2/10
105739/105739 - 1109s - loss: 1.3427 - accuracy: 0.5354
Epoch 3/10
105739/105739 - 1110s - loss: 1.3342 - accuracy: 0.5356
Epoch 4/10
105739/105739 - 1112s - loss: 1.3339 - accuracy: 0.5348
Epoch 5/10
105739/105739 - 1113s - loss: 1.3476 - accuracy: 0.5348
Epoch 6/10
105739/105739 - 1110s - loss: 1.3342 - accuracy: 0.5351
Epoch 7/10
105739/105739 - 1112s - loss: 1.3386 - accuracy: 0.5354
Epoch 8/10
