In [2]:
# https://keras.io/examples/timeseries/timeseries_weather_forecasting/
# https://www.researchgate.net/publication/343250071_Recognizing_Emotions_Evoked_by_Music_using_CNN-LSTM_Networks_on_EEG_signals
import pandas as pd
from utils import get_dfs, extract_classes
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from keras import backend as K
from keras.utils import to_categorical
from tensorflow import keras
from model import run_model
import tensorflow as tf
import tensorflow.keras.layers as tfl

In [3]:
hdf_file_path = [
    "../data/20221029-171117.hdf5",
    "../data/20221029-192231.hdf5",
    "../data/20221029-200201.hdf5",
    "../data/20221029-202757.hdf5",
    "../data/othmane_assis_EEG_20221029-231521.hdf5"
]

result_filtered = None
result_events = None

for path in hdf_file_path:
    _, df_events, df_filtered, _ = get_dfs(path)
    if result_filtered is None:
        result_filtered = df_filtered
    else:
        result_filtered = pd.concat([result_filtered, df_filtered])

    if result_events is None:
        result_events = df_events
    else:
        result_events = pd.concat([result_events, df_events])
# Clean events to have only 3 classes
result_events.replace('"repos"', 'neutral', inplace=True)
result_events.replace(['"calme"', '"lent"'], 'positive', inplace=True)
result_events.replace(['"rapide"', '"agite"'], 'negative', inplace=True)
result_events = result_events[result_events.data != '"fin"']
result_events = result_events[result_events.data != '"calme"']
result_events = result_events[result_events.data != '"agite"']
result_events = result_events[result_events.data != '"interuption"']

In [4]:
def prepare_ts_for_training(ts_df, event_df):
    df_lag = ts_df.copy()
    for i in range(1, 100):
        df_lag = df_lag.merge(ts_df.shift(i), how='inner', left_index=True, right_index=True, suffixes=('',f'_{i:02d}'))
    ts_df = df_lag.dropna().copy()
    del df_lag

    res = None
    classes = extract_classes(event_df)
    tmp_event_df = event_df.copy()
    for c in range(len(classes)):
        start_events = event_df.loc[(event_df.data == classes[c])]
        ref_start_date = start_events.index[0]
        start = event_df.loc[(event_df.data == classes[c]) & (event_df.label == 'start')].index[0]
        end = event_df.loc[(event_df.data == classes[c]) & (event_df.label == 'stop')].index

        if len(end) == 0:
            end = ts_df.index[-1]
        else:
            end = end[0]

        if c < len(classes) - 2:
            tmp_event_df = tmp_event_df.drop(tmp_event_df.index[0])

        current = ts_df[start:end].head(5000)
        current['class'] = classes[c].replace('"', '')
        if res is None:
            res = current
        else:
            res = pd.concat([res, current])
    # res['date'] = res.index
    return res


In [5]:
final_df = prepare_ts_for_training(df_filtered, df_events)

In [6]:
final_df

Unnamed: 0,Fpz,Fpz_01,Fpz_02,Fpz_03,Fpz_04,Fpz_05,Fpz_06,Fpz_07,Fpz_08,Fpz_09,...,Fpz_91,Fpz_92,Fpz_93,Fpz_94,Fpz_95,Fpz_96,Fpz_97,Fpz_98,Fpz_99,class
2022-10-29 23:16:35.221257,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,-0.344298,-0.569524,-0.715359,1.636246,...,4.751460,5.493750,5.991572,4.044888,2.897071,3.491017,4.123820,5.224463,5.655671,repos
2022-10-29 23:16:35.231257,-1.542927,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,-0.344298,-0.569524,-0.715359,...,6.356289,4.751460,5.493750,5.991572,4.044888,2.897071,3.491017,4.123820,5.224463,repos
2022-10-29 23:16:35.241257,-2.104169,-1.542927,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,-0.344298,-0.569524,...,8.057441,6.356289,4.751460,5.493750,5.991572,4.044888,2.897071,3.491017,4.123820,repos
2022-10-29 23:16:35.251257,-3.855607,-2.104169,-1.542927,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,-0.344298,...,6.376958,8.057441,6.356289,4.751460,5.493750,5.991572,4.044888,2.897071,3.491017,repos
2022-10-29 23:16:35.261257,-7.218886,-3.855607,-2.104169,-1.542927,-2.009272,-2.830380,0.678026,3.171745,-0.076711,-1.959629,...,4.146209,6.376958,8.057441,6.356289,4.751460,5.493750,5.991572,4.044888,2.897071,repos
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-10-29 23:17:25.171257,-2.621689,0.963083,-0.513126,-0.069434,2.917220,0.872808,-1.229614,-0.008977,-1.085612,0.344029,...,0.879487,1.691575,3.727979,6.287959,0.699076,-4.929825,-1.973310,0.858311,-1.174404,repos
2022-10-29 23:17:25.181257,-6.593097,-2.621689,0.963083,-0.513126,-0.069434,2.917220,0.872808,-1.229614,-0.008977,-1.085612,...,-3.808395,0.879487,1.691575,3.727979,6.287959,0.699076,-4.929825,-1.973310,0.858311,repos
2022-10-29 23:17:25.191257,-4.372527,-6.593097,-2.621689,0.963083,-0.513126,-0.069434,2.917220,0.872808,-1.229614,-0.008977,...,-2.513772,-3.808395,0.879487,1.691575,3.727979,6.287959,0.699076,-4.929825,-1.973310,repos
2022-10-29 23:17:25.201257,-3.217128,-4.372527,-6.593097,-2.621689,0.963083,-0.513126,-0.069434,2.917220,0.872808,-1.229614,...,4.466711,-2.513772,-3.808395,0.879487,1.691575,3.727979,6.287959,0.699076,-4.929825,repos


In [7]:
split_fraction = 0.715
train_split = int(split_fraction * int(final_df.shape[0]))
step = 100

past = 1000
future = 300
learning_rate = 0.001
batch_size = 1500
epochs = 10

start = past + future
end = start + train_split


def normalize(data, train_split):
    data_mean = data[:train_split].mean(axis=0)
    data_std = data[:train_split].std(axis=0)
    return (data - data_mean) / data_std


In [8]:
def load_ds():
    feature_keys = final_df.columns
    print(feature_keys)
    selected_features = [feature_keys[i] for i in range(len(feature_keys))]
    features = final_df[selected_features]

    le = preprocessing.LabelEncoder()
    features['class'] = le.fit_transform(features['class'])
    
    # features.index = final_df['class']
    # display(features.head())
    
    features = normalize(features.values, train_split)
    features = pd.DataFrame(features)
    features[features.columns[-1]] = le.fit_transform(final_df['class'])
    # display(features.head())

    train_data = features.loc[0 : train_split - 1]
    val_data = features.loc[train_split:]


    start = past + future
    end = start + train_split

    x_train = train_data[[i for i in range(len(feature_keys) - 1)]].values
    y_train = features.iloc[start:end][features.columns[-1]]

    sequence_length = int(past / step)

    dataset_train = keras.preprocessing.timeseries_dataset_from_array(
        x_train,
        y_train,
        sequence_length=sequence_length,
        sampling_rate=step,
        batch_size=batch_size,
    )

    sequence_length = int(past / step)


    x_end = len(val_data) - past - future
    label_start = train_split + past + future
    x_val = val_data.iloc[:x_end][[i for i in range(len(feature_keys) - 1)]].values
    y_val = features.iloc[label_start:][features.columns[-1]]
    # display(x_val.shape)
    dataset_val = keras.preprocessing.timeseries_dataset_from_array(
    x_val,
    y_val,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,)
    return dataset_train, dataset_val

In [9]:
dataset_train, dataset_val = load_ds()


for batch in dataset_train.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

Index(['Fpz', 'Fpz_01', 'Fpz_02', 'Fpz_03', 'Fpz_04', 'Fpz_05', 'Fpz_06',
       'Fpz_07', 'Fpz_08', 'Fpz_09',
       ...
       'Fpz_91', 'Fpz_92', 'Fpz_93', 'Fpz_94', 'Fpz_95', 'Fpz_96', 'Fpz_97',
       'Fpz_98', 'Fpz_99', 'class'],
      dtype='object', length=101)


2022-10-30 11:12:43.543448: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Input shape: (1500, 10, 100)
Target shape: (1500,)


In [10]:
m = run_model(dataset_train, dataset_val, inputs, num_classes=3, epochs=epochs)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 100)]         0         
                                                                 
 conv1d (Conv1D)             (None, 2, 16)             192016    
                                                                 
 re_lu (ReLU)                (None, 2, 16)             0         
                                                                 
 max_pooling1d (MaxPooling1D  (None, 1, 16)            0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 1, 32)             1568      
                                                                 
 re_lu_1 (ReLU)              (None, 1, 32)             0         
                                                             