In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import h5py # Read and write HDF5 files from Python

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ei-dreem-sleep-stages-2020/y_train.csv
/kaggle/input/ei-dreem-sleep-stages-2020/X_train.h5
/kaggle/input/ei-dreem-sleep-stages-2020/X_test.h5
/kaggle/input/ei-dreem-sleep-stages-2020/sample_submission.csv


Now we are going into the Dreem 2 Challenge.
The goal is to use Dreem 2 headband data to perform sleep stage scoring on 30 seconds epochs of biophysiological signals.
https://www.kaggle.com/c/ei-dreem-sleep-stages-2020/data

The training dataset is composed of:
- X_train.h5: input Dreem2 headband data: 30s of biosignals including EEG and accelerometer
- y_train: sleep stages {'Wake':0, 'N1':1, 'N2':2, 'N3':3, 'REM':4} 

The challenge is to submit the sleep stages associated to:
- X_test.h5
(it has to be submitted in the right format, see sample_submission.csv)

Let's have a look:



In [None]:
# filenames
data_path = "/kaggle/input/ei-dreem-sleep-stages-2020/"
file_xtrain = data_path + "X_train.h5"
file_xtest = data_path + "X_test.h5"
file_ytrain = data_path + "y_train.csv"

#

In this TD, we will only work with one EEG channel.
Let's create dataset functions that will be used for training and testing the model:

*EegEpochDataset*: Eeg Class herited from pytorch Dataset to deal with our data

*get_train_validation_dataset*: 
- return train_dataloader and validation_dataloader
- dataloaders will be used during the training and the tests


In [2]:
""" Load project data
    DataLoader and Dataset for single-channel EEG

"""

import torch
from torch.utils.data import Dataset, DataLoader


def normalize_data(eeg_array):
    """normalize signal between 0 and 1"""

    normalized_array = np.clip(eeg_array, -150, 150)
    normalized_array = normalized_array / 150

    return normalized_array


class EegEpochDataset(Dataset):
    """EEG Epochs dataset."""

    def __init__(self, x_data, y_data, transform=None):
        """
        Args:
            x_data (numpy array): Numpy array of input data.
            y_data (list of numpy array): Sleep Stages
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.y_data = y_data
        self.x_data = x_data
        self.transform = transform

        self.x_data = normalize_data(x_data)

    def __len__(self):
        return len(self.y_data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        signal = np.expand_dims(self.x_data[idx], axis=0)
        stage = self.y_data[idx]

        if self.transform:
            signal = self.transform(signal)

        return signal, stage


def get_train_validation_dataset(derivation, batch_size=32, validation_ratio=0.2):
    """
    Return train and validation datasets in Dataloader format
    :param derivation: EEG derivation, from eeg_1 to eeg_7
    :param batch_size: size of the batch, usually 16, 3é or 64
    :param validation_ratio:

    :return:
    train_dataloader
    validation_dataloader
    """

    with h5py.File(file_xtrain, "r") as fi:
        x_data = fi[derivation][()]
    y_data = pd.read_csv(file_ytrain)['sleep_stage'].to_numpy()

    # Creating data indices for training and validation splits:
    dataset_size = len(y_data)
    indices = list(range(dataset_size))
    split = int((1 - validation_ratio) * dataset_size)
    np.random.shuffle(indices)
    train_indices, val_indices = indices[:split], indices[split:]

    x_train, x_validation = x_data[train_indices], x_data[val_indices]
    y_train, y_validation = y_data[train_indices], y_data[val_indices]

    # torch dataset
    train_dataset = EegEpochDataset(x_data=x_train, y_data=y_train)
    val_dataset = EegEpochDataset(x_data=x_validation, y_data=y_validation)

    # to dataloader
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

    return train_dataloader, val_dataloader


# load dataloaders
train_dataloader, validation_dataloader = get_train_validation_dataset('eeg_5', batch_size=32)


Now we create the neural network Model:
- convolutionnal neural network
- Fully conencted layers at the end
- takes only a single channel of EEG signal as input

In [3]:
import torch
import torch.nn as nn


class SingleChannelConvNet(nn.Module):

    def __init__(self):
        super(SingleChannelConvNet, self).__init__()
        self.conv_a = nn.Conv1d(1, 128, 7, stride=2, padding=6, padding_mode='zeros')
        self.conv_b = nn.Conv1d(128, 128, 7, stride=2, padding=6, padding_mode='zeros')
        self.conv_c = nn.Conv1d(128, 256, 7, stride=2, padding=6, padding_mode='zeros')
        self.conv_d = nn.Conv1d(256, 256, 5, stride=2, padding=4, padding_mode='zeros')
        self.conv_e = nn.Conv1d(256, 256, 3, stride=2, padding=2, padding_mode='zeros')

        self.pool = nn.MaxPool1d(2)

        self.activfunc_a = nn.LeakyReLU(negative_slope=0.1)

        self.fc1 = nn.Linear(3 * 256, 100)
        self.fc2 = nn.Linear(100, 5)

    def forward(self, x):

        x = self.activfunc_a(self.conv_a(x))
        for _ in range(5):
            x = self.activfunc_a(self.conv_b(x))
        x = self.activfunc_a(self.conv_c(x))
        for _ in range(3):
            x = self.activfunc_a(self.conv_d(x))
        x = self.activfunc_a(self.conv_e(x))
        x = self.activfunc_a(self.conv_e(x))

        x = x.view(-1, self.num_flat_features(x))
        x = self.activfunc_a(self.fc1(x))
        x = self.fc2(x)

        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

You can now start the training on the train dataloader

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim

# device: use GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# parameters
learning_rate = 0.001
n_epoch = 50

# neural network and co
my_net = SingleChannelConvNet()
my_net = my_net.to(device) # model into GPU
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(my_net.parameters(), lr=learning_rate)

print('training...')
for epoch in range(n_epoch):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = my_net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
    print('epoch %d, %d samples, loss: %.3f' % (epoch + 1, (i+1)*train_dataloader.batch_size,running_loss / (i+1)))
    running_loss = 0.0

print('Finished Training')


training...
epoch 1, 19776 samples, loss: 1.461
epoch 2, 19776 samples, loss: 1244.654
epoch 3, 19776 samples, loss: 3.593
epoch 4, 19776 samples, loss: 1.467
epoch 5, 19776 samples, loss: 1.444
epoch 6, 19776 samples, loss: 1.428
epoch 7, 19776 samples, loss: 1.411
epoch 8, 19776 samples, loss: 1.387
epoch 9, 19776 samples, loss: 1.453
epoch 10, 19776 samples, loss: 1.373
epoch 11, 19776 samples, loss: 1.345
epoch 12, 19776 samples, loss: 1.431
epoch 13, 19776 samples, loss: 2.177
epoch 14, 19776 samples, loss: 1.334
epoch 15, 19776 samples, loss: 1.300
epoch 16, 19776 samples, loss: 1.267
epoch 17, 19776 samples, loss: 1.253
epoch 18, 19776 samples, loss: 68.422
epoch 19, 19776 samples, loss: 1.246
epoch 20, 19776 samples, loss: 1.213
epoch 21, 19776 samples, loss: 1.194
epoch 22, 19776 samples, loss: 1.173
epoch 23, 19776 samples, loss: 1.165
epoch 24, 19776 samples, loss: 1.151
epoch 25, 19776 samples, loss: 1.150
epoch 26, 19776 samples, loss: 1.160
epoch 27, 19776 samples, loss: 

Now the training is complete, let's assess its performance on the validation data

In [10]:
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score, confusion_matrix

# params
classes = ['Wake', 'N1', 'N2', 'N3', 'REM']

with torch.no_grad():
    prediction_list = torch.empty(0).to(device)
    true_list = torch.empty(0).to(device)
    for data in validation_dataloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        outputs = my_net(inputs)
        _, predicted = torch.max(outputs, 1)
        prediction_list = torch.cat([prediction_list, predicted])
        true_list = torch.cat([true_list, labels])

true_list = true_list.cpu().numpy()
prediction_list = prediction_list.cpu().numpy()
scores = {'balanced_accuracy': balanced_accuracy_score(true_list, prediction_list),
            'cohen_kappa_score': cohen_kappa_score(true_list, prediction_list),
            'confusion_matrix': confusion_matrix(true_list, prediction_list)}

print(scores)

{'balanced_accuracy': 0.5710283075038346, 'cohen_kappa_score': 0.5631847206015818, 'confusion_matrix': array([[ 406,    5,  108,   27,  162],
       [  21,    4,  151,   17,  144],
       [  23,    1, 1392,  223,  197],
       [   5,    0,  116,  943,    0],
       [  24,    2,  329,   17,  621]])}
