# Create EEG Sentences


In [1]:
import numpy as np                
import warnings
from sklearn.preprocessing import scale

contributor_selected = "I"                                 
contributor_train_file_path = '../data/Contributor_' + contributor_selected + '_Train.mat'
contributor_test_file_path = '../data/Contributor_' + contributor_selected + '_Test.mat'
channel_name_file_path = '../data/channels.csv'
channels = [i for i in range(64)]
warnings.filterwarnings('ignore')

In [2]:
from scipy.io import loadmat
from scipy import signal
from bundle.DataCraft import * 


data_train = loadmat(contributor_train_file_path)
signals_train = data_train['Signal']
flashing_train = data_train['Flashing']
stimulus_train = data_train['StimulusType']
word_train = data_train['TargetChar']
sampling_frequency = 240
repetitions = 15
recording_duration_train = (len(signals_train)) * (len(signals_train[0])) / (sampling_frequency * 60)
trials_train = len(word_train[0])

print("Train Data:")
print_data(signals_train, word_train, contributor_selected, sampling_frequency)


Train Data:
Contributor     Sampling Freq. (Hz)  Recording (min)      Trials     Spelled Word                  
I               240.00               46.01                85         EAEVQTDOJG8RBRGONCEDHCTUIDBPUH
                                                                     MEM6OUXOCFOUKWA4VJEFRZROLHYNQD
                                                                     W_EKTLBWXEPOUIKZERYOOTHQI     


In [3]:
# Application of butterworth filter
b, a = signal.butter(4, [0.1 / sampling_frequency, 20 / sampling_frequency], 'bandpass')
for trial in range(trials_train):
    signals_train[trial, :, :] = signal.filtfilt(b, a, signals_train[trial, :, :], axis=0)
    
# Down-sampling of the signals from 240Hz to 120Hz
down_sampling_frequency = 120
SCALE_FACTOR = round(sampling_frequency / down_sampling_frequency)
sampling_frequency = down_sampling_frequency

print("# Samples of EEG signals before downsampling: {}".format(len(signals_train[0])))

signals_train = signals_train[:, 0:-1:SCALE_FACTOR, :]
flashing_train = flashing_train[:, 0:-1:SCALE_FACTOR]
stimulus_train = stimulus_train[:, 0:-1:SCALE_FACTOR]

print("# Samples of EEG signals after downsampling: {}".format(len(signals_train[0])))

# Samples of EEG signals before downsampling: 7794
# Samples of EEG signals after downsampling: 3897


In [4]:
# Number of EEG channels
N_CHANNELS = len(channels)
# Window duration after each flashing [ms]
WINDOW_DURATION = 650
# Number of samples of each window
WINDOW_SAMPLES = round(sampling_frequency * (WINDOW_DURATION / 1000))
# Number of samples for each character in trials
SAMPLES_PER_TRIAL = len(signals_train[0])

train_features = []
train_labels = []

count_positive = 0
count_negative = 0

for trial in range(trials_train):
    for sample in (range(SAMPLES_PER_TRIAL)):
        if (sample == 0) or (flashing_train[trial, sample - 1] == 0 and flashing_train[trial, sample] == 1):
            lower_sample = sample
            upper_sample = sample + WINDOW_SAMPLES
            window = signals_train[trial, lower_sample:upper_sample, :]                
            # Features extraction
            train_features.append(window)
            # Labels extraction
            if stimulus_train[trial, sample] == 1:
                count_positive += 1
                train_labels.append(1) # Class P300
            else:
                count_negative += 1
                train_labels.append(0) # Class no-P300

# Get negative-positive classes ratio
train_ratio = count_negative/count_positive

# Convert lists to numpy arrays
train_features = np.array(train_features)
train_labels = np.array(train_labels)

# 3D Tensor shape (SAMPLES, 64, 78)
dim_train = train_features.shape
print("Features tensor shape: {}".format(dim_train))

# Data normalization Zi = (Xi - mu) / sigma
for pattern in range(len(train_features)):
    train_features[pattern] = scale(train_features[pattern], axis=0)

Features tensor shape: (15300, 78, 64)


# Map each sample to here character

In [5]:
print("Train features shape: {}".format(train_features[0:10]))
print("Train labels shape: {}".format(train_labels[0:10]))

Train features shape: [[[-0.33714175 -0.99835443 -1.3232312  ... -1.1834697  -0.54913986
   -2.146467  ]
  [-0.48710108 -0.99406785 -1.164695   ... -1.0431284  -0.55491173
   -2.1276486 ]
  [-0.5778562  -0.97020304 -0.9978452  ... -0.872261   -0.53857625
   -2.0364172 ]
  ...
  [-1.0796474  -0.7509958  -0.8101242  ... -1.5100381  -1.3592697
   -1.2251196 ]
  [-1.0141851  -0.7882367  -0.8428318  ... -1.5186968  -1.2910079
   -1.3577971 ]
  [-0.9740365  -0.8605112  -0.948646   ... -1.5444146  -1.2291203
   -1.4459876 ]]

 [[ 0.10983305 -0.5912166  -0.13849099 ...  1.2149168   1.2025368
    0.03397313]
  [ 0.2588424  -0.40057978  0.04210154 ...  1.1914977   1.229127
    0.17662951]
  [ 0.4308243  -0.1602149   0.26143023 ...  1.1193072   1.2058556
    0.2788561 ]
  ...
  [-0.6707997  -0.16460212  0.14333402 ... -0.06619225 -0.42303497
   -0.98408854]
  [-0.6886288  -0.17394923  0.050297   ... -0.15256841 -0.5299826
   -1.2219926 ]
  [-0.70042115 -0.17817381 -0.06943767 ... -0.23664345 -0.6