<a href="https://colab.research.google.com/github/mauricio-ms/motor-imagery-convolutional-recurrent-neural-network/blob/master/notebooks/cnn-1d.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Verify if GPU is enabled

In [2]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != "/device:GPU:0":
  raise SystemError("GPU device not found")
print("Found GPU at: {}".format(device_name))

Found GPU at: /device:GPU:0


Mount Google Drive directory

In [3]:
from google.colab import drive
drive.mount("/content/drive")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


Unzip dataset

In [4]:
!tar -xf "drive/My Drive/motor-imagery-convolutional-recurrent-neural-network/preprocessed-csv-files.tar.xz"

Install needed packages

In [5]:
!pip install colorlog

Collecting colorlog
  Downloading https://files.pythonhosted.org/packages/2a/81/12d77537c82c5d46aa2721dfee25a0e873ef5920ebd0827152f411effb57/colorlog-4.2.1-py2.py3-none-any.whl
Installing collected packages: colorlog
Successfully installed colorlog-4.2.1


In [6]:
import logging
import colorlog


def get_logger(name="__main__", debug=False):
    log_format = (
        "[%(levelname)s] "
        "%(asctime)s - "
        "%(name)s: "
        "%(funcName)s - "
        "%(message)s"
    )
    bold_seq = "\033[1m"
    colorlog_format = (
        f"{bold_seq} "
        "%(log_color)s "
        f"{log_format}"
    )
    colorlog.basicConfig(format=colorlog_format)

    logger = logging.getLogger(name)
    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    # Output full log
    fh = logging.FileHandler("app.log")
    fh.setLevel(logging.DEBUG)
    formatter = logging.Formatter(log_format)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    return logger

In [7]:
import os
import tensorflow as tf
import numpy as np

LOGGER = get_logger("physionet.py")
N_CHANNELS = 64
PREPROCESSED_CSV_FILES_DIR = "preprocessed-csv-files"

In [8]:
def load_data(train_size=0.75, validation_size=None, n_subjects=None, **kwargs):
    LOGGER.info("Loading Physionet dataset ...")
    subjects = np.array(sorted(os.listdir(PREPROCESSED_CSV_FILES_DIR)))
    if n_subjects is not None:
        subjects = subjects[:n_subjects]
    train_subjects, test_subjects = _train_test_split_subjects(subjects, train_size)
    if validation_size is not None:
        train_subjects, validation_subjects = _train_test_split_subjects(train_subjects, 1-validation_size)
        LOGGER.info(f"(Train, Validation, Test) Subjects = "
                    f"({len(train_subjects)}, {len(validation_subjects)}, {len(test_subjects)})")
        return _load_set(train_subjects, **kwargs), \
            _load_set(validation_subjects, **kwargs), \
            _load_set(test_subjects, **kwargs)

    LOGGER.info(f"(Train, Validation) Subjects = ({len(train_subjects)}, {len(test_subjects)})")
    return _load_set(train_subjects, **kwargs), _load_set(test_subjects, **kwargs)


def _train_test_split_subjects(subjects, train_size):
    train_subjects_mask = np.random.rand(len(subjects)) < train_size
    return subjects[train_subjects_mask], subjects[~train_subjects_mask]


def _load_set(subjects, n_readers=5, n_parse_threads=5, batch_size=100,
              convert_to_2d=False, expand_dim=False):
    path_files = [os.path.join(PREPROCESSED_CSV_FILES_DIR, subject, file_name)
                  for subject in subjects
                  for file_name in sorted(os.listdir(os.path.join(PREPROCESSED_CSV_FILES_DIR, subject)))]
    dataset = tf.data.Dataset.list_files(path_files)
    dataset = dataset.interleave(
        lambda filepath: tf.data.TextLineDataset(filepath).skip(1),
        cycle_length=n_readers)
    dataset = dataset.map(lambda r: _preprocess(r, convert_to_2d=convert_to_2d,
                                                expand_dim=expand_dim),
                          num_parallel_calls=n_parse_threads)
    return dataset.batch(batch_size).prefetch(1)


def _preprocess(eeg_record, convert_to_2d=False, expand_dim=False):
    # Create the definitions for the columns (channels + label)
    # The empty array tells TensorFlow to raise exception to missing values
    defs = [tf.constant([], dtype=tf.float32)] * (N_CHANNELS + 1)
    fields = tf.io.decode_csv(eeg_record, record_defaults=defs)
    x = _get_features(fields, convert_to_2d=convert_to_2d)
    if expand_dim:
        x = x[..., np.newaxis]
    y = tf.cast(tf.stack(fields[-1:]), tf.int32)
    return x, y


def _get_features(fields, convert_to_2d=False):
    return tf.stack(fields[:-1])

In [16]:
%tensorflow_version 2.x
from tensorflow import keras

with tf.device('/device:GPU:0'):
  train_set, validation_set, test_set = load_data(expand_dim=True, 
                                                  validation_size=0.20,
                                                  n_readers=108,
                                                  n_parse_threads=108)

  model = keras.models.Sequential([
      keras.layers.Conv1D(32, 3, activation="relu",
                          kernel_initializer="he_normal",
                          padding="SAME",
                          input_shape=[64, 1]),
      keras.layers.Conv1D(64, 3, activation="relu",
                          kernel_initializer="he_normal",
                          padding="SAME"),
      keras.layers.Conv1D(128, 3, activation="relu",
                          kernel_initializer="he_normal",
                          padding="SAME"),
      keras.layers.Flatten(),
      keras.layers.Dropout(0.5),
      keras.layers.Dense(1024, activation="relu",
                        kernel_initializer="he_normal"),
      keras.layers.Dropout(0.5),
      keras.layers.Dense(5, activation="softmax")
  ])

  optimizer = keras.optimizers.Adam(lr=1e-4)
  model.compile(loss="sparse_categorical_crossentropy",
                optimizer=optimizer,
                metrics=["accuracy"])
  LOGGER.info("Starting training CNN 1D model ...")
  model.fit(train_set, epochs=2, validation_data=validation_set, verbose=2)
  LOGGER.info("Training CNN 1D model end!")

[1m [32m [INFO] 2020-08-12 22:24:23,088 - physionet.py: load_data - Loading Physionet dataset ...[0m
[1m [32m [INFO] 2020-08-12 22:24:23,090 - physionet.py: load_data - (Train, Validation, Test) Subjects = (68, 14, 26)[0m
[1m [32m [INFO] 2020-08-12 22:24:24,358 - physionet.py: <module> - Starting training CNN 1D model ...[0m


Epoch 1/2
Epoch 2/2

In [19]:
LOGGER.info("Starting training CNN 1D model ...")
model.fit(train_set, epochs=2, validation_data=validation_set, verbose=1)
LOGGER.info("Training CNN 1D model end!")

[1m [32m [INFO] 2020-08-13 01:24:21,698 - physionet.py: <module> - Starting training CNN 1D model ...[0m


Epoch 1/2
   1305/Unknown - 18s 14ms/step - loss: 0.8682 - accuracy: 0.6533

KeyboardInterrupt: ignored